aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-02-11 20:01:10 -0800
committerStephen Hines <srhines@google.com>2014-02-11 20:01:10 -0800
commitce9904c6ea8fd669978a8eefb854b330eb9828ff (patch)
tree2418ee2e96ea220977c8fb74959192036ab5b133 /lib
parentc27b10b198c1d9e9b51f2303994313ec2778edd7 (diff)
parentdbb832b83351cec97b025b61c26536ef50c3181c (diff)
downloadexternal_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.zip
external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.gz
external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.bz2
Merge remote-tracking branch 'upstream/release_34' into merge-20140211
Conflicts: lib/Linker/LinkModules.cpp lib/Support/Unix/Signals.inc Change-Id: Ia54f291fa5dc828052d2412736e8495c1282aa64
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasSetTracker.cpp2
-rw-r--r--lib/Analysis/Analysis.cpp11
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp99
-rw-r--r--lib/Analysis/BlockFrequencyInfo.cpp113
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp22
-rw-r--r--lib/Analysis/CFG.cpp128
-rw-r--r--lib/Analysis/CMakeLists.txt11
-rw-r--r--lib/Analysis/CaptureTracking.cpp6
-rw-r--r--lib/Analysis/ConstantFolding.cpp228
-rw-r--r--lib/Analysis/CostModel.cpp271
-rw-r--r--lib/Analysis/Delinearization.cpp133
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp76
-rw-r--r--lib/Analysis/IPA/CallGraph.cpp230
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp2
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp2
-rw-r--r--lib/Analysis/IPA/IPA.cpp3
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp147
-rw-r--r--lib/Analysis/InstructionSimplify.cpp20
-rw-r--r--lib/Analysis/Lint.cpp42
-rw-r--r--lib/Analysis/LoopInfo.cpp34
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp97
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp11
-rw-r--r--lib/Analysis/PathNumbering.cpp521
-rw-r--r--lib/Analysis/PathProfileInfo.cpp433
-rw-r--r--lib/Analysis/PathProfileVerifier.cpp206
-rw-r--r--lib/Analysis/ProfileDataLoader.cpp155
-rw-r--r--lib/Analysis/ProfileDataLoaderPass.cpp188
-rw-r--r--lib/Analysis/ProfileEstimatorPass.cpp426
-rw-r--r--lib/Analysis/ProfileInfo.cpp1079
-rw-r--r--lib/Analysis/ProfileInfoLoader.cpp155
-rw-r--r--lib/Analysis/ProfileInfoLoaderPass.cpp267
-rw-r--r--lib/Analysis/ProfileVerifierPass.cpp383
-rw-r--r--lib/Analysis/RegionInfo.cpp6
-rw-r--r--lib/Analysis/ScalarEvolution.cpp1051
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp110
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp18
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp46
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp116
-rw-r--r--lib/Analysis/ValueTracking.cpp43
-rw-r--r--lib/AsmParser/LLLexer.cpp7
-rw-r--r--lib/AsmParser/LLParser.cpp50
-rw-r--r--lib/AsmParser/LLParser.h2
-rw-r--r--lib/AsmParser/LLToken.h7
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp960
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h95
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp127
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp5
-rw-r--r--lib/CMakeLists.txt1
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp2
-rw-r--r--lib/CodeGen/Analysis.cpp66
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp18
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp215
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt1
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp57
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h89
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.cpp507
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.h147
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp85
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.h122
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp1216
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h304
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp1343
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h118
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h3
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp27
-rw-r--r--lib/CodeGen/BranchFolding.h2
-rw-r--r--lib/CodeGen/CMakeLists.txt4
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp36
-rw-r--r--lib/CodeGen/CodeGen.cpp2
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp3
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp117
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp2
-rw-r--r--lib/CodeGen/IfConversion.cpp215
-rw-r--r--lib/CodeGen/InlineSpiller.cpp160
-rw-r--r--lib/CodeGen/InterferenceCache.cpp8
-rw-r--r--lib/CodeGen/InterferenceCache.h2
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp5
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp14
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp53
-rw-r--r--lib/CodeGen/LiveDebugVariables.h4
-rw-r--r--lib/CodeGen/LiveInterval.cpp391
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp238
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp56
-rw-r--r--lib/CodeGen/LiveRangeCalc.h38
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp43
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp6
-rw-r--r--lib/CodeGen/LiveRegUnits.cpp111
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp6
-rw-r--r--lib/CodeGen/MachineInstr.cpp35
-rw-r--r--lib/CodeGen/MachineLICM.cpp12
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp7
-rw-r--r--lib/CodeGen/MachineScheduler.cpp707
-rw-r--r--lib/CodeGen/MachineSink.cpp30
-rw-r--r--lib/CodeGen/MachineVerifier.cpp275
-rw-r--r--lib/CodeGen/PHIElimination.cpp18
-rw-r--r--lib/CodeGen/PHIEliminationUtils.h2
-rw-r--r--lib/CodeGen/Passes.cpp19
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp250
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp29
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp2
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp220
-rw-r--r--lib/CodeGen/PrologEpilogInserter.h99
-rw-r--r--lib/CodeGen/RegAllocBase.cpp14
-rw-r--r--lib/CodeGen/RegAllocBase.h5
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp15
-rw-r--r--lib/CodeGen/RegAllocFast.cpp11
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp78
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp42
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp132
-rw-r--r--lib/CodeGen/RegisterPressure.cpp431
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp78
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1111
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp40
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp36
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp374
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp129
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp81
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h17
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp90
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp26
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp268
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp16
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp506
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp742
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h246
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp261
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp119
-rw-r--r--lib/CodeGen/ShrinkWrapping.cpp1152
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp215
-rw-r--r--lib/CodeGen/Spiller.cpp33
-rw-r--r--lib/CodeGen/SplitKit.cpp57
-rw-r--r--lib/CodeGen/StackColoring.cpp10
-rw-r--r--lib/CodeGen/StackMaps.cpp314
-rw-r--r--lib/CodeGen/StackProtector.cpp446
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp825
-rw-r--r--lib/CodeGen/TailDuplication.cpp2
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp52
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp65
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp38
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp6
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp8
-rw-r--r--lib/CodeGen/TargetSchedule.cpp10
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp4
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp4
-rw-r--r--lib/CodeGen/VirtRegMap.cpp46
-rw-r--r--lib/DebugInfo/CMakeLists.txt2
-rw-r--r--lib/DebugInfo/DWARFAbbreviationDeclaration.cpp76
-rw-r--r--lib/DebugInfo/DWARFAbbreviationDeclaration.h29
-rw-r--r--lib/DebugInfo/DWARFAttribute.h30
-rw-r--r--lib/DebugInfo/DWARFCompileUnit.cpp264
-rw-r--r--lib/DebugInfo/DWARFCompileUnit.h111
-rw-r--r--lib/DebugInfo/DWARFContext.cpp266
-rw-r--r--lib/DebugInfo/DWARFContext.h85
-rw-r--r--lib/DebugInfo/DWARFDebugArangeSet.cpp49
-rw-r--r--lib/DebugInfo/DWARFDebugArangeSet.h5
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.cpp206
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.h92
-rw-r--r--lib/DebugInfo/DWARFDebugInfoEntry.cpp302
-rw-r--r--lib/DebugInfo/DWARFDebugInfoEntry.h73
-rw-r--r--lib/DebugInfo/DWARFDebugLine.cpp2
-rw-r--r--lib/DebugInfo/DWARFFormValue.cpp304
-rw-r--r--lib/DebugInfo/DWARFTypeUnit.cpp39
-rw-r--r--lib/DebugInfo/DWARFTypeUnit.h35
-rw-r--r--lib/DebugInfo/DWARFUnit.cpp365
-rw-r--r--lib/DebugInfo/DWARFUnit.h168
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp44
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp45
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h2
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp255
-rw-r--r--lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp31
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.h6
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp156
-rw-r--r--lib/ExecutionEngine/JIT/JITMemoryManager.cpp7
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp288
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.h235
-rw-r--r--lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp16
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp66
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp17
-rw-r--r--lib/ExecutionEngine/RTDyldMemoryManager.cpp222
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h1
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h1
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp134
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp312
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h55
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h104
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp167
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h32
-rw-r--r--lib/ExecutionEngine/TargetSelect.cpp2
-rw-r--r--lib/IR/AsmWriter.cpp9
-rw-r--r--lib/IR/AttributeImpl.h3
-rw-r--r--lib/IR/Attributes.cpp7
-rw-r--r--lib/IR/AutoUpgrade.cpp108
-rw-r--r--lib/IR/CMakeLists.txt3
-rw-r--r--lib/IR/ConstantFold.cpp80
-rw-r--r--lib/IR/Constants.cpp27
-rw-r--r--lib/IR/Core.cpp114
-rw-r--r--lib/IR/DIBuilder.cpp358
-rw-r--r--lib/IR/DataLayout.cpp7
-rw-r--r--lib/IR/DebugInfo.cpp478
-rw-r--r--lib/IR/Function.cpp48
-rw-r--r--lib/IR/GCOV.cpp287
-rw-r--r--lib/IR/Globals.cpp16
-rw-r--r--lib/IR/Instruction.cpp25
-rw-r--r--lib/IR/Instructions.cpp328
-rw-r--r--lib/IR/LLVMContextImpl.h5
-rw-r--r--lib/IR/LegacyPassManager.cpp1920
-rw-r--r--lib/IR/Metadata.cpp4
-rw-r--r--lib/IR/Module.cpp42
-rw-r--r--lib/IR/PassManager.cpp1983
-rw-r--r--lib/IR/Type.cpp6
-rw-r--r--lib/IR/TypeFinder.cpp31
-rw-r--r--lib/IR/Value.cpp47
-rw-r--r--lib/IR/ValueTypes.cpp10
-rw-r--r--lib/IR/Verifier.cpp174
-rw-r--r--lib/IRReader/IRReader.cpp32
-rw-r--r--lib/LLVMBuild.txt2
-rw-r--r--lib/LTO/CMakeLists.txt4
-rw-r--r--lib/LTO/LLVMBuild.txt22
-rw-r--r--lib/LTO/LTOCodeGenerator.cpp521
-rw-r--r--lib/LTO/LTOModule.cpp794
-rw-r--r--lib/LTO/Makefile15
-rw-r--r--lib/Linker/LinkModules.cpp43
-rw-r--r--lib/MC/CMakeLists.txt2
-rw-r--r--lib/MC/ELFObjectWriter.cpp66
-rw-r--r--lib/MC/MCAsmInfo.cpp10
-rw-r--r--lib/MC/MCAsmInfoCOFF.cpp1
-rw-r--r--lib/MC/MCAsmInfoDarwin.cpp2
-rw-r--r--lib/MC/MCAsmInfoELF.cpp23
-rw-r--r--lib/MC/MCAsmStreamer.cpp169
-rw-r--r--lib/MC/MCAtom.cpp10
-rw-r--r--lib/MC/MCContext.cpp62
-rw-r--r--lib/MC/MCDisassembler/Disassembler.cpp142
-rw-r--r--lib/MC/MCDisassembler/Disassembler.h9
-rw-r--r--lib/MC/MCDwarf.cpp23
-rw-r--r--lib/MC/MCELF.cpp7
-rw-r--r--lib/MC/MCELFObjectTargetWriter.cpp6
-rw-r--r--lib/MC/MCELFStreamer.cpp74
-rw-r--r--lib/MC/MCExternalSymbolizer.cpp27
-rw-r--r--lib/MC/MCFunction.cpp44
-rw-r--r--lib/MC/MCInstPrinter.cpp8
-rw-r--r--lib/MC/MCMachOStreamer.cpp33
-rw-r--r--lib/MC/MCModule.cpp55
-rw-r--r--lib/MC/MCModuleYAML.cpp461
-rw-r--r--lib/MC/MCNullStreamer.cpp15
-rw-r--r--lib/MC/MCObjectDisassembler.cpp422
-rw-r--r--lib/MC/MCObjectFileInfo.cpp36
-rw-r--r--lib/MC/MCObjectStreamer.cpp12
-rw-r--r--lib/MC/MCObjectSymbolizer.cpp296
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp58
-rw-r--r--lib/MC/MCParser/AsmParser.cpp1625
-rw-r--r--lib/MC/MCParser/COFFAsmParser.cpp106
-rw-r--r--lib/MC/MCParser/DarwinAsmParser.cpp41
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp121
-rw-r--r--lib/MC/MCPureStreamer.cpp16
-rw-r--r--lib/MC/MCSectionELF.cpp57
-rw-r--r--lib/MC/MCStreamer.cpp103
-rw-r--r--lib/MC/MCSubtargetInfo.cpp9
-rw-r--r--lib/MC/MCSymbol.cpp17
-rw-r--r--lib/MC/MCWin64EH.cpp50
-rw-r--r--lib/MC/MachObjectWriter.cpp117
-rw-r--r--lib/MC/SubtargetFeature.cpp8
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp43
-rw-r--r--lib/MC/WinCOFFStreamer.cpp27
-rw-r--r--lib/Makefile7
-rw-r--r--lib/Object/Binary.cpp4
-rw-r--r--lib/Object/CMakeLists.txt1
-rw-r--r--lib/Object/COFFObjectFile.cpp308
-rw-r--r--lib/Object/ELF.cpp714
-rw-r--r--lib/Object/ELFObjectFile.cpp3
-rw-r--r--lib/Object/ELFYAML.cpp1
-rw-r--r--lib/Object/MachOObjectFile.cpp817
-rw-r--r--lib/Object/MachOUniversal.cpp58
-rw-r--r--lib/Object/ObjectFile.cpp2
-rw-r--r--lib/Object/YAML.cpp1
-rw-r--r--lib/Option/OptTable.cpp73
-rw-r--r--lib/Option/Option.cpp11
-rw-r--r--lib/Support/APFloat.cpp13
-rw-r--r--lib/Support/Allocator.cpp7
-rw-r--r--lib/Support/BlockFrequency.cpp98
-rw-r--r--lib/Support/CMakeLists.txt1
-rw-r--r--lib/Support/CommandLine.cpp3
-rw-r--r--lib/Support/Compression.cpp7
-rw-r--r--lib/Support/ConstantRange.cpp8
-rw-r--r--lib/Support/CrashRecoveryContext.cpp33
-rw-r--r--lib/Support/Dwarf.cpp61
-rw-r--r--lib/Support/DynamicLibrary.cpp41
-rw-r--r--lib/Support/Errno.cpp29
-rw-r--r--lib/Support/ErrorHandling.cpp17
-rw-r--r--lib/Support/GraphWriter.cpp3
-rw-r--r--lib/Support/Host.cpp161
-rw-r--r--lib/Support/Locale.cpp33
-rw-r--r--lib/Support/LocaleGeneric.inc382
-rw-r--r--lib/Support/LocaleWindows.inc15
-rw-r--r--lib/Support/LocaleXlocale.inc61
-rw-r--r--lib/Support/MemoryBuffer.cpp35
-rw-r--r--lib/Support/Path.cpp40
-rw-r--r--lib/Support/PrettyStackTrace.cpp58
-rw-r--r--lib/Support/Process.cpp18
-rw-r--r--lib/Support/Program.cpp36
-rw-r--r--lib/Support/Regex.cpp2
-rw-r--r--lib/Support/SmallPtrSet.cpp9
-rw-r--r--lib/Support/SourceMgr.cpp13
-rw-r--r--lib/Support/StringRef.cpp31
-rw-r--r--lib/Support/TargetRegistry.cpp6
-rw-r--r--lib/Support/ThreadLocal.cpp2
-rw-r--r--lib/Support/Triple.cpp2
-rw-r--r--lib/Support/Unicode.cpp367
-rw-r--r--lib/Support/Unix/Path.inc17
-rw-r--r--lib/Support/Unix/Process.inc116
-rw-r--r--lib/Support/Unix/Program.inc127
-rw-r--r--lib/Support/Unix/Signals.inc4
-rw-r--r--lib/Support/Unix/ThreadLocal.inc2
-rw-r--r--lib/Support/Unix/Unix.h4
-rw-r--r--lib/Support/Windows/DynamicLibrary.inc15
-rw-r--r--lib/Support/Windows/Memory.inc4
-rw-r--r--lib/Support/Windows/Path.inc217
-rw-r--r--lib/Support/Windows/Process.inc114
-rw-r--r--lib/Support/Windows/Program.inc241
-rw-r--r--lib/Support/Windows/RWMutex.inc4
-rw-r--r--lib/Support/Windows/Signals.inc2
-rw-r--r--lib/Support/Windows/TimeValue.inc10
-rw-r--r--lib/Support/Windows/Windows.h27
-rw-r--r--lib/Support/YAMLParser.cpp111
-rw-r--r--lib/Support/YAMLTraits.cpp94
-rw-r--r--lib/Support/raw_ostream.cpp3
-rw-r--r--lib/TableGen/Record.cpp31
-rw-r--r--lib/TableGen/TGParser.cpp3
-rw-r--r--lib/Target/AArch64/AArch64.td7
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp130
-rw-r--r--lib/Target/AArch64/AArch64CallingConv.td9
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp1027
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp1183
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h86
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td451
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp92
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td50
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td7329
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp2
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td164
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp26
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h19
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp353
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp762
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp54
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp11
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp5
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h10
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp56
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h5
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h59
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp7
-rw-r--r--lib/Target/ARM/ARM.td81
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp425
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp299
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h20
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp44
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h12
-rw-r--r--lib/Target/ARM/ARMBuildAttrs.h71
-rw-r--r--lib/Target/ARM/ARMCallingConv.td22
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp50
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h33
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp75
-rw-r--r--lib/Target/ARM/ARMFPUName.def32
-rw-r--r--lib/Target/ARM/ARMFPUName.h26
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp104
-rw-r--r--lib/Target/ARM/ARMFeatures.h93
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp150
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp387
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1026
-rw-r--r--lib/Target/ARM/ARMISelLowering.h29
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td76
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp17
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td603
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td227
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td57
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td430
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td165
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp82
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h61
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td3
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td132
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td13
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp68
-rw-r--r--lib/Target/ARM/ARMSubtarget.h59
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp7
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp2
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp94
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp516
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp89
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp167
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp36
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h18
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp540
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h27
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp21
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp64
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h12
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp129
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp30
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp8
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp63
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp17
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp25
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt1
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp37
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h7
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td2
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h1
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td1
-rw-r--r--lib/Target/Hexagon/HexagonMCInstLower.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp16
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.h6
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h2
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp8
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp24
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp16
-rw-r--r--lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp1
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h5
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp3
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp2
-rw-r--r--lib/Target/MSP430/MSP430CallingConv.td7
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h4
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp1
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp128
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp5
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h1
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.cpp2
-rw-r--r--lib/Target/Mangler.cpp113
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp1314
-rw-r--r--lib/Target/Mips/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp347
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp20
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/CMakeLists.txt2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp43
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp39
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp93
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h43
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h39
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp140
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp39
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h16
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp67
-rw-r--r--lib/Target/Mips/MSA.txt78
-rw-r--r--lib/Target/Mips/MicroMipsInstrFormats.td196
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td182
-rw-r--r--lib/Target/Mips/Mips.h1
-rw-r--r--lib/Target/Mips/Mips.td2
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h2
-rw-r--r--lib/Target/Mips/Mips16HardFloat.cpp64
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp9
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp61
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp62
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h2
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td148
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td213
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.h10
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp118
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h24
-rw-r--r--lib/Target/Mips/MipsCallingConv.td20
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp37
-rw-r--r--lib/Target/Mips/MipsCondMov.td81
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp1469
-rw-r--r--lib/Target/Mips/MipsDSPInstrInfo.td468
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp29
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp78
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h38
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp562
-rw-r--r--lib/Target/Mips/MipsISelLowering.h151
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td326
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td66
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp11
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h1
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td661
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp9
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp5
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h4
-rw-r--r--lib/Target/Mips/MipsMSAInstrFormats.td406
-rw-r--r--lib/Target/Mips/MipsMSAInstrInfo.td3694
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp66
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h103
-rw-r--r--lib/Target/Mips/MipsOs16.cpp45
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp57
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h7
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td219
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp93
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h2
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp429
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h44
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp2269
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h48
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp266
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h14
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp78
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp24
-rw-r--r--lib/Target/Mips/MipsSubtarget.h19
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp14
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h44
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp10
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h3
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp215
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h16
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp17
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp101
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h1
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h1
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td14
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h4
-rw-r--r--lib/Target/NVPTX/NVPTXSplitBBatBar.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp28
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h24
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp4
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp44
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp22
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp44
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h3
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp9
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp49
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp389
-rw-r--r--lib/Target/PowerPC/PPC.td51
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp58
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp14
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td31
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp1946
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp540
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp4
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp184
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h2
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td41
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td30
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td33
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp9
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h1
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td117
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp4
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp10
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td8
-rw-r--r--lib/Target/PowerPC/PPCScheduleA2.td841
-rw-r--r--lib/Target/PowerPC/PPCScheduleE500mc.td2
-rw-r--r--lib/Target/PowerPC/PPCScheduleE5500.td1
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp54
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h15
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h23
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp9
-rw-r--r--lib/Target/R600/AMDGPU.h3
-rw-r--r--lib/Target/R600/AMDGPU.td12
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp67
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.h12
-rw-r--r--lib/Target/R600/AMDGPUCallingConv.td35
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp536
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp360
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h37
-rw-r--r--lib/Target/R600/AMDGPUIndirectAddressing.cpp345
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp117
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h45
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.td14
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td192
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.cpp43
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.cpp3
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.h5
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.cpp32
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h8
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp15
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h11
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp43
-rw-r--r--lib/Target/R600/AMDILCFGStructurizer.cpp112
-rw-r--r--lib/Target/R600/AMDILInstrInfo.td10
-rw-r--r--lib/Target/R600/CMakeLists.txt3
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp82
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h2
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp4
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp6
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp21
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h1
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp2
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h4
-rw-r--r--lib/Target/R600/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp1
-rw-r--r--lib/Target/R600/Processors.td7
-rw-r--r--lib/Target/R600/R600ClauseMergePass.cpp204
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp13
-rw-r--r--lib/Target/R600/R600Defines.h4
-rw-r--r--lib/Target/R600/R600EmitClauseMarkers.cpp80
-rw-r--r--lib/Target/R600/R600ExpandSpecialInstrs.cpp17
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp609
-rw-r--r--lib/Target/R600/R600ISelLowering.h3
-rw-r--r--lib/Target/R600/R600InstrFormats.td6
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp199
-rw-r--r--lib/Target/R600/R600InstrInfo.h47
-rw-r--r--lib/Target/R600/R600Instructions.td342
-rw-r--r--lib/Target/R600/R600Intrinsics.td7
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.cpp6
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h1
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp69
-rw-r--r--lib/Target/R600/R600MachineScheduler.h7
-rw-r--r--lib/Target/R600/R600OptimizeVectorRegisters.cpp3
-rw-r--r--lib/Target/R600/R600Packetizer.cpp52
-rw-r--r--lib/Target/R600/R600RegisterInfo.cpp44
-rw-r--r--lib/Target/R600/R600RegisterInfo.h8
-rw-r--r--lib/Target/R600/R600RegisterInfo.td45
-rw-r--r--lib/Target/R600/R600TextureIntrinsicsReplacer.cpp24
-rw-r--r--lib/Target/R600/SIDefines.h12
-rw-r--r--lib/Target/R600/SIFixSGPRCopies.cpp153
-rw-r--r--lib/Target/R600/SIISelLowering.cpp391
-rw-r--r--lib/Target/R600/SIISelLowering.h10
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp21
-rw-r--r--lib/Target/R600/SIInstrFormats.td30
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp512
-rw-r--r--lib/Target/R600/SIInstrInfo.h80
-rw-r--r--lib/Target/R600/SIInstrInfo.td174
-rw-r--r--lib/Target/R600/SIInstructions.td572
-rw-r--r--lib/Target/R600/SIIntrinsics.td24
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp17
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.cpp4
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.h1
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp59
-rw-r--r--lib/Target/R600/SIRegisterInfo.h18
-rw-r--r--lib/Target/R600/SIRegisterInfo.td16
-rw-r--r--lib/Target/R600/SITypeRewriter.cpp162
-rw-r--r--lib/Target/Sparc/CMakeLists.txt3
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp20
-rw-r--r--lib/Target/Sparc/LLVMBuild.txt1
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h22
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp13
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h4
-rw-r--r--lib/Target/Sparc/Makefile3
-rw-r--r--lib/Target/Sparc/Sparc.h19
-rw-r--r--lib/Target/Sparc/Sparc.td4
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp78
-rw-r--r--lib/Target/Sparc/SparcCallingConv.td11
-rw-r--r--lib/Target/Sparc/SparcCodeEmitter.cpp245
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp104
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h8
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp10
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp994
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h35
-rw-r--r--lib/Target/Sparc/SparcInstr64Bit.td66
-rw-r--r--lib/Target/Sparc/SparcInstrFormats.td85
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp104
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h1
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td451
-rw-r--r--lib/Target/Sparc/SparcJITInfo.cpp165
-rw-r--r--lib/Target/Sparc/SparcJITInfo.h67
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp133
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h7
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.td67
-rw-r--r--lib/Target/Sparc/SparcRelocations.h41
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp3
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp7
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h6
-rw-r--r--lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp8
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp16
-rw-r--r--lib/Target/SystemZ/CMakeLists.txt2
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp19
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp12
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h1
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp48
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h4
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp8
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp35
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h25
-rw-r--r--lib/Target/SystemZ/README.txt48
-rw-r--r--lib/Target/SystemZ/SystemZ.h24
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.cpp134
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.td4
-rw-r--r--lib/Target/SystemZ/SystemZConstantPoolValue.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp296
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp1187
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h109
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td93
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td241
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp502
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h28
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td721
-rw-r--r--lib/Target/SystemZ/SystemZLongBranch.cpp23
-rw-r--r--lib/Target/SystemZ/SystemZMCInstLower.cpp102
-rw-r--r--lib/Target/SystemZ/SystemZMCInstLower.h13
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp17
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.h1
-rw-r--r--lib/Target/SystemZ/SystemZOperands.td30
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td155
-rw-r--r--lib/Target/SystemZ/SystemZPatterns.td93
-rw-r--r--lib/Target/SystemZ/SystemZProcessors.td16
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp6
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h8
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td66
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp212
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.h36
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp163
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp13
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h8
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp9
-rw-r--r--lib/Target/Target.cpp8
-rw-r--r--lib/Target/TargetLibraryInfo.cpp37
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp14
-rw-r--r--lib/Target/TargetMachine.cpp5
-rw-r--r--lib/Target/TargetMachineC.cpp54
-rw-r--r--lib/Target/TargetSubtargetInfo.cpp19
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp410
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp31
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c244
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h32
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h69
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp16
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h16
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp16
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h20
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp401
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp9
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp6
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp30
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h6
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp24
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp177
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/X86.td57
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp44
-rw-r--r--lib/Target/X86/X86AsmPrinter.h15
-rw-r--r--lib/Target/X86/X86CallingConv.h35
-rw-r--r--lib/Target/X86/X86CallingConv.td57
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp36
-rw-r--r--lib/Target/X86/X86FastISel.cpp311
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp9
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp268
-rw-r--r--lib/Target/X86/X86FrameLowering.h27
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp66
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp1800
-rw-r--r--lib/Target/X86/X86ISelLowering.h50
-rw-r--r--lib/Target/X86/X86InstrAVX512.td2815
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td84
-rw-r--r--lib/Target/X86/X86InstrCompiler.td24
-rw-r--r--lib/Target/X86/X86InstrControl.td5
-rw-r--r--lib/Target/X86/X86InstrExtension.td12
-rw-r--r--lib/Target/X86/X86InstrFMA.td77
-rw-r--r--lib/Target/X86/X86InstrFormats.td39
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td78
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp397
-rw-r--r--lib/Target/X86/X86InstrInfo.h4
-rw-r--r--lib/Target/X86/X86InstrInfo.td344
-rw-r--r--lib/Target/X86/X86InstrMMX.td64
-rw-r--r--lib/Target/X86/X86InstrSSE.td1554
-rw-r--r--lib/Target/X86/X86InstrXOP.td146
-rw-r--r--lib/Target/X86/X86JITInfo.cpp2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp184
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp19
-rw-r--r--lib/Target/X86/X86RegisterInfo.h4
-rw-r--r--lib/Target/X86/X86SchedHaswell.td4
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td4
-rw-r--r--lib/Target/X86/X86Schedule.td48
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td24
-rw-r--r--lib/Target/X86/X86ScheduleSLM.td668
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp59
-rw-r--r--lib/Target/X86/X86Subtarget.h32
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp6
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp4
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp149
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp34
-rw-r--r--lib/Target/XCore/CMakeLists.txt1
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp8
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h4
-rw-r--r--lib/Target/XCore/XCore.h2
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp15
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp44
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp42
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h10
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp6
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h1
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td9
-rw-r--r--lib/Target/XCore/XCoreLowerThreadLocal.cpp114
-rw-r--r--lib/Target/XCore/XCoreMCInstLower.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp8
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h2
-rw-r--r--lib/Target/XCore/XCoreTargetTransformInfo.cpp83
-rw-r--r--lib/Transforms/Hello/Hello.cpp2
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp10
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp11
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp13
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp54
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp4
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp3
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp270
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp2
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp4
-rw-r--r--lib/Transforms/IPO/Internalize.cpp118
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp77
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp92
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp2
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp226
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h13
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp184
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp34
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp156
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp119
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp30
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp51
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp14
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp26
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp38
-rw-r--r--lib/Transforms/InstCombine/InstCombineWorklist.h7
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp112
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp240
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp6
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt5
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp1397
-rw-r--r--lib/Transforms/Instrumentation/DebugIR.cpp3
-rw-r--r--lib/Transforms/Instrumentation/EdgeProfiling.cpp117
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp25
-rw-r--r--lib/Transforms/Instrumentation/Instrumentation.cpp4
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp443
-rw-r--r--lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp225
-rw-r--r--lib/Transforms/Instrumentation/PathProfiling.cpp1424
-rw-r--r--lib/Transforms/Instrumentation/ProfilingUtils.cpp169
-rw-r--r--lib/Transforms/Instrumentation/ProfilingUtils.h36
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp12
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp242
-rw-r--r--lib/Transforms/Scalar/BasicBlockPlacement.cpp152
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt4
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp22
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp10
-rw-r--r--lib/Transforms/Scalar/GVN.cpp197
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp20
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp50
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp1184
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp21
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp42
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp127
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp13
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp (renamed from lib/Target/Mips/MipsOptimizeMathLibCalls.cpp)83
-rw-r--r--lib/Transforms/Scalar/SROA.cpp155
-rw-r--r--lib/Transforms/Scalar/SampleProfile.cpp479
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp12
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp4
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp165
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp51
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp9
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp9
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp3
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp6
-rw-r--r--lib/Transforms/Utils/GlobalStatus.cpp183
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp3
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp15
-rw-r--r--lib/Transforms/Utils/Local.cpp208
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp8
-rw-r--r--lib/Transforms/Utils/LowerExpectIntrinsic.cpp2
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp1
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp62
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp7
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp298
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp6
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp134
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp271
-rw-r--r--lib/Transforms/Utils/SpecialCaseList.cpp97
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp78
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp1119
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp1097
902 files changed, 86330 insertions, 36243 deletions
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 5910526..2289c12 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -299,7 +299,6 @@ bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
bool AliasSetTracker::add(LoadInst *LI) {
if (LI->getOrdering() > Monotonic) return addUnknown(LI);
AliasSet::AccessType ATy = AliasSet::Refs;
- if (!LI->isUnordered()) ATy = AliasSet::ModRef;
bool NewPtr;
AliasSet &AS = addPointer(LI->getOperand(0),
AA.getTypeStoreSize(LI->getType()),
@@ -312,7 +311,6 @@ bool AliasSetTracker::add(LoadInst *LI) {
bool AliasSetTracker::add(StoreInst *SI) {
if (SI->getOrdering() > Monotonic) return addUnknown(SI);
AliasSet::AccessType ATy = AliasSet::Mods;
- if (!SI->isUnordered()) ATy = AliasSet::ModRef;
bool NewPtr;
Value *Val = SI->getOperand(0);
AliasSet &AS = addPointer(SI->getOperand(1),
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 349c417..98f2a55 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -34,6 +34,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCFGOnlyViewerPass(Registry);
initializeCFGOnlyPrinterPass(Registry);
initializeDependenceAnalysisPass(Registry);
+ initializeDelinearizationPass(Registry);
initializeDominanceFrontierPass(Registry);
initializeDomViewerPass(Registry);
initializeDomPrinterPass(Registry);
@@ -54,16 +55,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeMemoryDependenceAnalysisPass(Registry);
initializeModuleDebugInfoPrinterPass(Registry);
initializePostDominatorTreePass(Registry);
- initializeProfileEstimatorPassPass(Registry);
- initializeNoProfileInfoPass(Registry);
- initializeNoPathProfileInfoPass(Registry);
- initializeProfileInfoAnalysisGroup(Registry);
- initializePathProfileInfoAnalysisGroup(Registry);
- initializeLoaderPassPass(Registry);
- initializePathProfileLoaderPassPass(Registry);
- initializeProfileVerifierPassPass(Registry);
- initializePathProfileVerifierPass(Registry);
- initializeProfileMetadataLoaderPassPass(Registry);
initializeRegionInfoPass(Registry);
initializeRegionViewerPass(Registry);
initializeRegionPrinterPass(Registry);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 9fe1362..b2c2011 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -122,7 +122,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
// question (in this case rewind to p), or
// - just give up. It is up to caller to make sure the pointer is pointing
// to the base address the object.
- //
+ //
// We go for 2nd option for simplicity.
if (!isIdentifiedObject(V))
return false;
@@ -130,7 +130,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
// This function needs to use the aligned object size because we allow
// reads a bit past the end given sufficient alignment.
uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true);
-
+
return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
}
@@ -163,7 +163,7 @@ namespace {
EK_SignExt,
EK_ZeroExt
};
-
+
struct VariableGEPIndex {
const Value *V;
ExtensionKind Extension;
@@ -200,7 +200,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
Offset = 0;
return V;
}
-
+
if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
switch (BOp->getOpcode()) {
@@ -231,7 +231,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
}
}
}
-
+
// Since GEP indices are sign extended anyway, we don't care about the high
// bits of a sign or zero extended value - just scales and offsets. The
// extensions have to be consistent though.
@@ -248,10 +248,10 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
TD, Depth+1);
Scale = Scale.zext(OldWidth);
Offset = Offset.zext(OldWidth);
-
+
return Result;
}
-
+
Scale = 1;
Offset = 0;
return V;
@@ -276,7 +276,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
const DataLayout *TD) {
// Limit recursion depth to limit compile time in crazy cases.
unsigned MaxLookup = 6;
-
+
BaseOffs = 0;
do {
// See if this is a bitcast or GEP.
@@ -291,7 +291,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
}
return V;
}
-
+
if (Op->getOpcode() == Instruction::BitCast) {
V = Op->getOperand(0);
continue;
@@ -308,15 +308,14 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
V = Simplified;
continue;
}
-
+
return V;
}
-
+
// Don't attempt to analyze GEPs over unsized objects.
- if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
- ->getElementType()->isSized())
+ if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized())
return V;
-
+
// If we are lacking DataLayout information, we can't compute the offets of
// elements computed by GEPs. However, we can handle bitcast equivalent
// GEPs.
@@ -326,7 +325,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
V = GEPOp->getOperand(0);
continue;
}
-
+
+ unsigned AS = GEPOp->getPointerAddressSpace();
// Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
gep_type_iterator GTI = gep_type_begin(GEPOp);
for (User::const_op_iterator I = GEPOp->op_begin()+1,
@@ -337,38 +337,37 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
if (FieldNo == 0) continue;
-
+
BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
continue;
}
-
+
// For an array/pointer, add the element offset, explicitly scaled.
if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
if (CIdx->isZero()) continue;
BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
continue;
}
-
+
uint64_t Scale = TD->getTypeAllocSize(*GTI);
ExtensionKind Extension = EK_NotExtended;
-
+
// If the integer type is smaller than the pointer size, it is implicitly
// sign extended to pointer size.
- unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
- if (TD->getPointerSizeInBits() > Width)
+ unsigned Width = Index->getType()->getIntegerBitWidth();
+ if (TD->getPointerSizeInBits(AS) > Width)
Extension = EK_SignExt;
-
+
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
*TD, 0);
-
+
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
BaseOffs += IndexOffset.getSExtValue()*Scale;
Scale *= IndexScale.getSExtValue();
-
-
+
// If we already had an occurrence of this index variable, merge this
// scale into it. For example, we want to handle:
// A[x][x] -> x*16 + x*4 -> x*20
@@ -381,25 +380,25 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
break;
}
}
-
+
// Make sure that we have a scale that makes sense for this target's
// pointer size.
- if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+ if (unsigned ShiftBits = 64 - TD->getPointerSizeInBits(AS)) {
Scale <<= ShiftBits;
Scale = (int64_t)Scale >> ShiftBits;
}
-
+
if (Scale) {
VariableGEPIndex Entry = {Index, Extension,
static_cast<int64_t>(Scale)};
VarIndices.push_back(Entry);
}
}
-
+
// Analyze the base pointer next.
V = GEPOp->getOperand(0);
} while (--MaxLookup);
-
+
// If the chain of expressions is too deep, just return early.
return V;
}
@@ -407,7 +406,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
/// GetIndexDifference - Dest and Src are the variable indices from two
/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers.
+/// difference between the two pointers.
static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
const SmallVectorImpl<VariableGEPIndex> &Src) {
if (Src.empty()) return;
@@ -416,12 +415,12 @@ static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
const Value *V = Src[i].V;
ExtensionKind Extension = Src[i].Extension;
int64_t Scale = Src[i].Scale;
-
+
// Find V in Dest. This is N^2, but pointer indices almost never have more
// than a few variable indexes.
for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
-
+
// If we found it, subtract off Scale V's from the entry in Dest. If it
// goes to zero, remove the entry.
if (Dest[j].Scale != Scale)
@@ -431,7 +430,7 @@ static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
Scale = 0;
break;
}
-
+
// If we didn't consume this entry, add it to the end of the Dest list.
if (Scale) {
VariableGEPIndex Entry = { V, Extension, -Scale };
@@ -526,7 +525,7 @@ namespace {
return (AliasAnalysis*)this;
return this;
}
-
+
private:
// AliasCache - Track alias queries to guard against recursion.
typedef std::pair<Location, Location> LocPair;
@@ -696,7 +695,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
"AliasAnalysis query involving multiple functions!");
const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
-
+
// If this is a tail call and Loc.Ptr points to a stack location, we know that
// the tail call cannot access or modify the local stack.
// We cannot exclude byval arguments here; these belong to the caller of
@@ -706,7 +705,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
if (CI->isTailCall())
return NoModRef;
-
+
// If the pointer is to a locally allocated object that does not escape,
// then the call can not mod/ref the pointer unless the call takes the pointer
// as an argument, and itself doesn't capture it.
@@ -722,7 +721,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
if (!(*CI)->getType()->isPointerTy() ||
(!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
continue;
-
+
// If this is a no-capture pointer argument, see if we can tell that it
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
@@ -732,7 +731,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
break;
}
}
-
+
if (!PassedAsArg)
return NoModRef;
}
@@ -821,7 +820,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
}
// We can bound the aliasing properties of memset_pattern16 just as we can
- // for memcpy/memset. This is particularly important because the
+ // for memcpy/memset. This is particularly important because the
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
// whenever possible.
else if (TLI.has(LibFunc::memset_pattern16) &&
@@ -925,22 +924,22 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
GEP1VariableIndices.clear();
}
}
-
+
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
if (BaseAlias != MustAlias) return BaseAlias;
-
+
// Otherwise, we have a MustAlias. Since the base pointers alias each other
// exactly, see if the computed offset from the common pointer tells us
// about the relation of the resulting pointer.
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
-
+
int64_t GEP2BaseOffset;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
-
+
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
@@ -948,12 +947,12 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
"DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
-
+
// Subtract the GEP2 pointer from the GEP1 pointer to find out their
// symbolic difference.
GEP1BaseOffset -= GEP2BaseOffset;
GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
-
+
} else {
// Check to see if these two pointers are related by the getelementptr
// instruction. If one pointer is a GEP with a non-zero index of the other
@@ -975,7 +974,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
-
+
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1) {
@@ -984,7 +983,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
return MayAlias;
}
}
-
+
// In the two GEP Case, if there is no difference in the offsets of the
// computed pointers, the resultant pointers are a must alias. This
// hapens when we have two lexically identical GEP's (for example).
@@ -1226,7 +1225,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
(isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
return NoAlias;
-
+
// If one pointer is the result of a call/invoke or load and the other is a
// non-escaping local object within the same function, then we know the
// object couldn't escape to a point where the call could return it.
@@ -1248,7 +1247,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) ||
(V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI)))
return NoAlias;
-
+
// Check the cache before climbing up use-def chains. This also terminates
// otherwise infinitely recursive queries.
LocPair Locs(Location(V1, V1Size, V1TBAAInfo),
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 8469556..62f3ab1 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -1,4 +1,4 @@
-//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------=======//
+//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,14 +17,97 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
using namespace llvm;
-INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
- true, true)
+#ifndef NDEBUG
+enum GVDAGType {
+ GVDT_None,
+ GVDT_Fraction,
+ GVDT_Integer
+};
+
+static cl::opt<GVDAGType>
+ViewBlockFreqPropagationDAG("view-block-freq-propagation-dags", cl::Hidden,
+ cl::desc("Pop up a window to show a dag displaying how block "
+ "frequencies propagation through the CFG."),
+ cl::values(
+ clEnumValN(GVDT_None, "none",
+ "do not display graphs."),
+ clEnumValN(GVDT_Fraction, "fraction", "display a graph using the "
+ "fractional block frequency representation."),
+ clEnumValN(GVDT_Integer, "integer", "display a graph using the raw "
+ "integer fractional block frequency representation."),
+ clEnumValEnd));
+
+namespace llvm {
+
+template <>
+struct GraphTraits<BlockFrequencyInfo *> {
+ typedef const BasicBlock NodeType;
+ typedef succ_const_iterator ChildIteratorType;
+ typedef Function::const_iterator nodes_iterator;
+
+ static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) {
+ return G->getFunction()->begin();
+ }
+ static ChildIteratorType child_begin(const NodeType *N) {
+ return succ_begin(N);
+ }
+ static ChildIteratorType child_end(const NodeType *N) {
+ return succ_end(N);
+ }
+ static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) {
+ return G->getFunction()->begin();
+ }
+ static nodes_iterator nodes_end(const BlockFrequencyInfo *G) {
+ return G->getFunction()->end();
+ }
+};
+
+template<>
+struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
+ explicit DOTGraphTraits(bool isSimple=false) :
+ DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const BlockFrequencyInfo *G) {
+ return G->getFunction()->getName();
+ }
+
+ std::string getNodeLabel(const BasicBlock *Node,
+ const BlockFrequencyInfo *Graph) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+
+ OS << Node->getName().str() << ":";
+ switch (ViewBlockFreqPropagationDAG) {
+ case GVDT_Fraction:
+ Graph->getBlockFreq(Node).print(OS);
+ break;
+ case GVDT_Integer:
+ OS << Graph->getBlockFreq(Node).getFrequency();
+ break;
+ case GVDT_None:
+ llvm_unreachable("If we are not supposed to render a graph we should "
+ "never reach this point.");
+ }
+
+ return Result;
+ }
+};
+
+} // end namespace llvm
+#endif
+
+INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq",
+ "Block Frequency Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
-INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
- true, true)
+INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq",
+ "Block Frequency Analysis", true, true)
char BlockFrequencyInfo::ID = 0;
@@ -46,6 +129,10 @@ void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
bool BlockFrequencyInfo::runOnFunction(Function &F) {
BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
BFI->doFunction(&F, &BPI);
+#ifndef NDEBUG
+ if (ViewBlockFreqPropagationDAG != GVDT_None)
+ view();
+#endif
return false;
}
@@ -56,3 +143,19 @@ void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
return BFI->getBlockFreq(BB);
}
+
+/// Pop up a ghostview window with the current block frequency propagation
+/// rendered using dot.
+void BlockFrequencyInfo::view() const {
+// This code is only for debugging.
+#ifndef NDEBUG
+ ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs");
+#else
+ errs() << "BlockFrequencyInfo::view is only available in debug builds on "
+ "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+const Function *BlockFrequencyInfo::getFunction() const {
+ return BFI->Fn;
+}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 7cdf828..86560ca 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -398,10 +398,24 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
// InstCombine canonicalizes X <= 0 into X < 1.
// X <= 0 -> Unlikely
isProb = false;
- } else if (CV->isAllOnesValue() && CI->getPredicate() == CmpInst::ICMP_SGT) {
- // InstCombine canonicalizes X >= 0 into X > -1.
- // X >= 0 -> Likely
- isProb = true;
+ } else if (CV->isAllOnesValue()) {
+ switch (CI->getPredicate()) {
+ case CmpInst::ICMP_EQ:
+ // X == -1 -> Unlikely
+ isProb = false;
+ break;
+ case CmpInst::ICMP_NE:
+ // X != -1 -> Likely
+ isProb = true;
+ break;
+ case CmpInst::ICMP_SGT:
+ // InstCombine canonicalizes X >= 0 into X > -1.
+ // X >= 0 -> Likely
+ isProb = true;
+ break;
+ default:
+ return false;
+ }
} else {
return false;
}
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index a5ed21a..c3f32d3 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -116,7 +116,7 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
// LoopInfo contains a mapping from basic block to the innermost loop. Find
// the outermost loop in the loop nest that contains BB.
-static const Loop *getOutermostLoop(LoopInfo *LI, const BasicBlock *BB) {
+static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) {
const Loop *L = LI->getLoopFor(BB);
if (L) {
while (const Loop *Parent = L->getParentLoop())
@@ -126,60 +126,17 @@ static const Loop *getOutermostLoop(LoopInfo *LI, const BasicBlock *BB) {
}
// True if there is a loop which contains both BB1 and BB2.
-static bool loopContainsBoth(LoopInfo *LI,
+static bool loopContainsBoth(const LoopInfo *LI,
const BasicBlock *BB1, const BasicBlock *BB2) {
const Loop *L1 = getOutermostLoop(LI, BB1);
const Loop *L2 = getOutermostLoop(LI, BB2);
return L1 != NULL && L1 == L2;
}
-static bool isPotentiallyReachableSameBlock(const Instruction *A,
- const Instruction *B,
- LoopInfo *LI) {
- // The same block case is special because it's the only time we're looking
- // within a single block to see which comes first. Once we start looking at
- // multiple blocks, the first instruction of the block is reachable, so we
- // only need to determine reachability between whole blocks.
-
- const BasicBlock *BB = A->getParent();
- // If the block is in a loop then we can reach any instruction in the block
- // from any other instruction in the block by going around the backedge.
- // Check whether we're in a loop (or aren't sure).
-
- // Can't be in a loop if it's the entry block -- the entry block may not
- // have predecessors.
- bool HasLoop = BB != &BB->getParent()->getEntryBlock();
-
- // Can't be in a loop if LoopInfo doesn't know about it.
- if (LI && HasLoop) {
- HasLoop = LI->getLoopFor(BB) != 0;
- }
- if (HasLoop)
- return true;
-
- // Linear scan, start at 'A', see whether we hit 'B' or the end first.
- for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) {
- if (&*I == B)
- return true;
- }
- return false;
-}
-
-bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
- DominatorTree *DT, LoopInfo *LI) {
- assert(A->getParent()->getParent() == B->getParent()->getParent() &&
- "This analysis is function-local!");
-
- const BasicBlock *StopBB = B->getParent();
-
- if (A->getParent() == B->getParent())
- return isPotentiallyReachableSameBlock(A, B, LI);
-
- if (A->getParent() == &A->getParent()->getParent()->getEntryBlock())
- return true;
- if (B->getParent() == &A->getParent()->getParent()->getEntryBlock())
- return false;
-
+static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist,
+ BasicBlock *StopBB,
+ const DominatorTree *DT,
+ const LoopInfo *LI) {
// When the stop block is unreachable, it's dominated from everywhere,
// regardless of whether there's a path between the two blocks.
if (DT && !DT->isReachableFromEntry(StopBB))
@@ -188,11 +145,7 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
// Limit the number of blocks we visit. The goal is to avoid run-away compile
// times on large CFGs without hampering sensible code. Arbitrarily chosen.
unsigned Limit = 32;
-
SmallSet<const BasicBlock*, 64> Visited;
- SmallVector<BasicBlock*, 32> Worklist;
- Worklist.push_back(const_cast<BasicBlock*>(A->getParent()));
-
do {
BasicBlock *BB = Worklist.pop_back_val();
if (!Visited.insert(BB))
@@ -221,7 +174,72 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
}
} while (!Worklist.empty());
- // We have exhaustived all possible paths and are certain that 'To' can not
- // be reached from 'From'.
+ // We have exhausted all possible paths and are certain that 'To' can not be
+ // reached from 'From'.
return false;
}
+
+bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B,
+ const DominatorTree *DT, const LoopInfo *LI) {
+ assert(A->getParent() == B->getParent() &&
+ "This analysis is function-local!");
+
+ SmallVector<BasicBlock*, 32> Worklist;
+ Worklist.push_back(const_cast<BasicBlock*>(A));
+
+ return isPotentiallyReachableInner(Worklist, const_cast<BasicBlock*>(B),
+ DT, LI);
+}
+
+bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
+ const DominatorTree *DT, const LoopInfo *LI) {
+ assert(A->getParent()->getParent() == B->getParent()->getParent() &&
+ "This analysis is function-local!");
+
+ SmallVector<BasicBlock*, 32> Worklist;
+
+ if (A->getParent() == B->getParent()) {
+ // The same block case is special because it's the only time we're looking
+ // within a single block to see which instruction comes first. Once we
+ // start looking at multiple blocks, the first instruction of the block is
+ // reachable, so we only need to determine reachability between whole
+ // blocks.
+ BasicBlock *BB = const_cast<BasicBlock *>(A->getParent());
+
+ // If the block is in a loop then we can reach any instruction in the block
+ // from any other instruction in the block by going around a backedge.
+ if (LI && LI->getLoopFor(BB) != 0)
+ return true;
+
+ // Linear scan, start at 'A', see whether we hit 'B' or the end first.
+ for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) {
+ if (&*I == B)
+ return true;
+ }
+
+ // Can't be in a loop if it's the entry block -- the entry block may not
+ // have predecessors.
+ if (BB == &BB->getParent()->getEntryBlock())
+ return false;
+
+ // Otherwise, continue doing the normal per-BB CFG walk.
+ for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
+ Worklist.push_back(*I);
+
+ if (Worklist.empty()) {
+ // We've proven that there's no path!
+ return false;
+ }
+ } else {
+ Worklist.push_back(const_cast<BasicBlock*>(A->getParent()));
+ }
+
+ if (A->getParent() == &A->getParent()->getParent()->getEntryBlock())
+ return true;
+ if (B->getParent() == &A->getParent()->getParent()->getEntryBlock())
+ return false;
+
+ return isPotentiallyReachableInner(Worklist,
+ const_cast<BasicBlock*>(B->getParent()),
+ DT, LI);
+}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 94ded34..3624aac 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_library(LLVMAnalysis
CostModel.cpp
CodeMetrics.cpp
ConstantFolding.cpp
+ Delinearization.cpp
DependenceAnalysis.cpp
DomPrinter.cpp
DominanceFrontier.cpp
@@ -35,17 +36,7 @@ add_llvm_library(LLVMAnalysis
ModuleDebugInfoPrinter.cpp
NoAliasAnalysis.cpp
PHITransAddr.cpp
- PathNumbering.cpp
- PathProfileInfo.cpp
- PathProfileVerifier.cpp
PostDominators.cpp
- ProfileEstimatorPass.cpp
- ProfileInfo.cpp
- ProfileInfoLoader.cpp
- ProfileInfoLoaderPass.cpp
- ProfileVerifierPass.cpp
- ProfileDataLoader.cpp
- ProfileDataLoaderPass.cpp
PtrUseVisitor.cpp
RegionInfo.cpp
RegionPass.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 9eb76a8..79fab1b 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -146,8 +146,14 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
case Instruction::PHI:
case Instruction::Select:
// The original value is not captured via this if the new value isn't.
+ Count = 0;
for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI) {
+ // If there are lots of uses, conservatively say that the value
+ // is captured to avoid taking too much compile time.
+ if (Count++ >= Threshold)
+ return Tracker->tooManyUses();
+
Use *U = &UI.getUse();
if (Visited.insert(U))
if (Tracker->shouldExplore(U))
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index bc0dffc..3d32232 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -224,7 +224,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
APInt &Offset, const DataLayout &TD) {
// Trivial case, constant is the global.
if ((GV = dyn_cast<GlobalValue>(C))) {
- Offset.clearAllBits();
+ unsigned BitWidth = TD.getPointerTypeSizeInBits(GV->getType());
+ Offset = APInt(BitWidth, 0);
return true;
}
@@ -238,16 +239,23 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
// i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
- // If the base isn't a global+constant, we aren't either.
- if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
- return false;
+ GEPOperator *GEP = dyn_cast<GEPOperator>(CE);
+ if (!GEP)
+ return false;
- // Otherwise, add any offset that our operands provide.
- return GEP->accumulateConstantOffset(TD, Offset);
- }
+ unsigned BitWidth = TD.getPointerTypeSizeInBits(GEP->getType());
+ APInt TmpOffset(BitWidth, 0);
- return false;
+ // If the base isn't a global+constant, we aren't either.
+ if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, TD))
+ return false;
+
+ // Otherwise, add any offset that our operands provide.
+ if (!GEP->accumulateConstantOffset(TD, TmpOffset))
+ return false;
+
+ Offset = TmpOffset;
+ return true;
}
/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the
@@ -324,12 +332,12 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
// If we read all of the bytes we needed from this element we're done.
uint64_t NextEltOffset = SL->getElementOffset(Index);
- if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset)
+ if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)
return true;
// Move to the next element of the struct.
- CurPtr += NextEltOffset-CurEltOffset-ByteOffset;
- BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset;
+ CurPtr += NextEltOffset - CurEltOffset - ByteOffset;
+ BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;
ByteOffset = 0;
CurEltOffset = NextEltOffset;
}
@@ -338,7 +346,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
isa<ConstantDataSequential>(C)) {
- Type *EltTy = cast<SequentialType>(C->getType())->getElementType();
+ Type *EltTy = C->getType()->getSequentialElementType();
uint64_t EltSize = TD.getTypeAllocSize(EltTy);
uint64_t Index = ByteOffset / EltSize;
uint64_t Offset = ByteOffset - Index * EltSize;
@@ -346,7 +354,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (ArrayType *AT = dyn_cast<ArrayType>(C->getType()))
NumElts = AT->getNumElements();
else
- NumElts = cast<VectorType>(C->getType())->getNumElements();
+ NumElts = C->getType()->getVectorNumElements();
for (; Index != NumElts; ++Index) {
if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
@@ -367,9 +375,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
if (CE->getOpcode() == Instruction::IntToPtr &&
- CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext()))
+ CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) {
return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
BytesLeft, TD);
+ }
}
// Otherwise, unknown initializer type.
@@ -378,26 +387,29 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
const DataLayout &TD) {
- Type *LoadTy = cast<PointerType>(C->getType())->getElementType();
+ PointerType *PTy = cast<PointerType>(C->getType());
+ Type *LoadTy = PTy->getElementType();
IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
// If this isn't an integer load we can't fold it directly.
if (!IntType) {
+ unsigned AS = PTy->getAddressSpace();
+
// If this is a float/double load, we can try folding it as an int32/64 load
// and then bitcast the result. This can be useful for union cases. Note
// that address spaces don't matter here since we're not going to result in
// an actual new load.
Type *MapTy;
if (LoadTy->isHalfTy())
- MapTy = Type::getInt16PtrTy(C->getContext());
+ MapTy = Type::getInt16PtrTy(C->getContext(), AS);
else if (LoadTy->isFloatTy())
- MapTy = Type::getInt32PtrTy(C->getContext());
+ MapTy = Type::getInt32PtrTy(C->getContext(), AS);
else if (LoadTy->isDoubleTy())
- MapTy = Type::getInt64PtrTy(C->getContext());
+ MapTy = Type::getInt64PtrTy(C->getContext(), AS);
else if (LoadTy->isVectorTy()) {
- MapTy = IntegerType::get(C->getContext(),
- TD.getTypeAllocSizeInBits(LoadTy));
- MapTy = PointerType::getUnqual(MapTy);
+ MapTy = PointerType::getIntNPtrTy(C->getContext(),
+ TD.getTypeAllocSizeInBits(LoadTy),
+ AS);
} else
return 0;
@@ -408,10 +420,11 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
}
unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
- if (BytesLoaded > 32 || BytesLoaded == 0) return 0;
+ if (BytesLoaded > 32 || BytesLoaded == 0)
+ return 0;
GlobalValue *GVal;
- APInt Offset(TD.getPointerSizeInBits(), 0);
+ APInt Offset;
if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
return 0;
@@ -422,7 +435,8 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
// If we're loading off the beginning of the global, some bytes may be valid,
// but we don't try to handle this.
- if (Offset.isNegative()) return 0;
+ if (Offset.isNegative())
+ return 0;
// If we're not accessing anything in this constant, the result is undefined.
if (Offset.getZExtValue() >=
@@ -439,7 +453,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
ResultVal = RawBytes[BytesLoaded - 1];
for (unsigned i = 1; i != BytesLoaded; ++i) {
ResultVal <<= 8;
- ResultVal |= RawBytes[BytesLoaded-1-i];
+ ResultVal |= RawBytes[BytesLoaded - 1 - i];
}
} else {
ResultVal = RawBytes[0];
@@ -464,14 +478,17 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
// If the loaded value isn't a constant expr, we can't handle it.
ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
- if (!CE) return 0;
+ if (!CE)
+ return 0;
if (CE->getOpcode() == Instruction::GetElementPtr) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
+ if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
if (Constant *V =
ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
return V;
+ }
+ }
}
// Instead of loading constant c string, use corresponding integer value
@@ -576,13 +593,13 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
// constant. This happens frequently when iterating over a global array.
if (Opc == Instruction::Sub && DL) {
GlobalValue *GV1, *GV2;
- unsigned PtrSize = DL->getPointerSizeInBits();
- unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
- APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0);
+ APInt Offs1, Offs2;
if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL))
if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) &&
GV1 == GV2) {
+ unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
+
// (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
// PtrToInt may change the bitwidth so we have convert to the right size
// first.
@@ -600,15 +617,18 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
- if (!TD) return 0;
- Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
+ if (!TD)
+ return 0;
+
+ Type *IntPtrTy = TD->getIntPtrType(ResultTy);
bool Any = false;
SmallVector<Constant*, 32> NewIdxs;
for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
if ((i == 1 ||
- !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
- Ops.slice(1, i-1)))) &&
+ !isa<StructType>(GetElementPtrInst::getIndexedType(
+ Ops[0]->getType(),
+ Ops.slice(1, i - 1)))) &&
Ops[i]->getType() != IntPtrTy) {
Any = true;
NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
@@ -619,13 +639,16 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
} else
NewIdxs.push_back(Ops[i]);
}
- if (!Any) return 0;
- Constant *C =
- ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (!Any)
+ return 0;
+
+ Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
+ }
+
return C;
}
@@ -640,7 +663,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) {
if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) {
NewPtrTy = NewPtrTy->getElementType()->getPointerTo(
OldPtrTy->getAddressSpace());
- Ptr = ConstantExpr::getBitCast(Ptr, NewPtrTy);
+ Ptr = ConstantExpr::getPointerCast(Ptr, NewPtrTy);
}
return Ptr;
}
@@ -651,11 +674,12 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
Constant *Ptr = Ops[0];
- if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() ||
+ if (!TD || !Ptr->getType()->getPointerElementType()->isSized() ||
!Ptr->getType()->isPointerTy())
return 0;
- Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(Ptr->getType());
+ Type *ResultElementTy = ResultTy->getPointerElementType();
// If this is a constant expr gep that is effectively computing an
// "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
@@ -664,8 +688,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// If this is "gep i8* Ptr, (sub 0, V)", fold this as:
// "inttoptr (sub (ptrtoint Ptr), V)"
- if (Ops.size() == 2 &&
- cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) {
+ if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) {
ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
assert((CE == 0 || CE->getType() == IntPtrTy) &&
"CastGEPIndices didn't canonicalize index types!");
@@ -692,7 +715,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// If this is a GEP of a GEP, fold it all into a single GEP.
while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end());
+ SmallVector<Value *, 4> NestedOps(GEP->op_begin() + 1, GEP->op_end());
// Do not try the incorporate the sub-GEP if some index is not a number.
bool AllConstantInt = true;
@@ -713,12 +736,15 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// If the base value for this address is a literal integer value, fold the
// getelementptr to the resulting integer value casted to the pointer type.
APInt BasePtr(BitWidth, 0);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
- if (CE->getOpcode() == Instruction::IntToPtr)
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ if (CE->getOpcode() == Instruction::IntToPtr) {
if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
BasePtr = Base->getValue().zextOrTrunc(BitWidth);
+ }
+ }
+
if (Ptr->isNullValue() || BasePtr != 0) {
- Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
+ Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);
return ConstantExpr::getIntToPtr(C, ResultTy);
}
@@ -728,7 +754,8 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// Also, this helps GlobalOpt do SROA on GlobalVariables.
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type");
- SmallVector<Constant*, 32> NewIdxs;
+ SmallVector<Constant *, 32> NewIdxs;
+
do {
if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
if (ATy->isPointerTy()) {
@@ -743,7 +770,6 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// Determine which element of the array the offset points into.
APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
- IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
if (ElemSize == 0)
// The element size is 0. This may be [0 x Ty]*, so just use a zero
// index for this level and proceed to the next level to see if it can
@@ -778,7 +804,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// We've reached some non-indexable type.
break;
}
- } while (Ty != cast<PointerType>(ResultTy)->getElementType());
+ } while (Ty != ResultElementTy);
// If we haven't used up the entire offset by descending the static
// type, then the offset is pointing into the middle of an indivisible
@@ -787,14 +813,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
return 0;
// Create a GEP.
- Constant *C =
- ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
- assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
+ Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
+ assert(C->getType()->getPointerElementType() == Ty &&
"Computed GetElementPtr has unexpected type!");
// If we ended up indexing a member with a type that doesn't match
// the type of what the original indices indexed, add a cast.
- if (Ty != cast<PointerType>(ResultTy)->getElementType())
+ if (Ty != ResultElementTy)
C = FoldBitCast(C, ResultTy, *TD);
return C;
@@ -867,16 +892,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
return ConstantFoldLoadInst(LI, TD);
- if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I))
+ if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) {
return ConstantExpr::getInsertValue(
cast<Constant>(IVI->getAggregateOperand()),
cast<Constant>(IVI->getInsertedValueOperand()),
IVI->getIndices());
+ }
- if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I)) {
return ConstantExpr::getExtractValue(
cast<Constant>(EVI->getAggregateOperand()),
EVI->getIndices());
+ }
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
}
@@ -930,9 +957,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
const TargetLibraryInfo *TLI) {
// Handle easy binops first.
if (Instruction::isBinaryOp(Opcode)) {
- if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
+ if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) {
if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
return C;
+ }
return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
}
@@ -953,10 +981,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
if (TD && CE->getOpcode() == Instruction::IntToPtr) {
Constant *Input = CE->getOperand(0);
unsigned InWidth = Input->getType()->getScalarSizeInBits();
- if (TD->getPointerSizeInBits() < InWidth) {
+ unsigned PtrWidth = TD->getPointerTypeSizeInBits(CE->getType());
+ if (PtrWidth < InWidth) {
Constant *Mask =
- ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
- TD->getPointerSizeInBits()));
+ ConstantInt::get(CE->getContext(),
+ APInt::getLowBitsSet(InWidth, PtrWidth));
Input = ConstantExpr::getAnd(Input, Mask);
}
// Do a zext or trunc to get to the dest size.
@@ -966,13 +995,22 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::IntToPtr:
// If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
- // the int size is >= the ptr size. This requires knowing the width of a
- // pointer, so it can't be done in ConstantExpr::getCast.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
- if (TD &&
- TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
- CE->getOpcode() == Instruction::PtrToInt)
- return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+ // the int size is >= the ptr size and the address spaces are the same.
+ // This requires knowing the width of a pointer, so it can't be done in
+ // ConstantExpr::getCast.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+ if (TD && CE->getOpcode() == Instruction::PtrToInt) {
+ Constant *SrcPtr = CE->getOperand(0);
+ unsigned SrcPtrSize = TD->getPointerTypeSizeInBits(SrcPtr->getType());
+ unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
+
+ if (MidIntSize >= SrcPtrSize) {
+ unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
+ if (SrcAS == DestTy->getPointerAddressSpace())
+ return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+ }
+ }
+ }
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::Trunc:
@@ -984,6 +1022,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
case Instruction::SIToFP:
case Instruction::FPToUI:
case Instruction::FPToSI:
+ case Instruction::AddrSpaceCast:
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::BitCast:
if (TD)
@@ -1024,8 +1063,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// around to know if bit truncation is happening.
if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
if (TD && Ops1->isNullValue()) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
if (CE0->getOpcode() == Instruction::IntToPtr) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
@@ -1036,19 +1075,21 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
- if (CE0->getOpcode() == Instruction::PtrToInt &&
- CE0->getType() == IntPtrTy) {
- Constant *C = CE0->getOperand(0);
- Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ if (CE0->getOpcode() == Instruction::PtrToInt) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+ if (CE0->getType() == IntPtrTy) {
+ Constant *C = CE0->getOperand(0);
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ }
}
}
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
if (TD && CE0->getOpcode() == CE1->getOpcode()) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
-
if (CE0->getOpcode() == Instruction::IntToPtr) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
+
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
@@ -1060,11 +1101,17 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
- if ((CE0->getOpcode() == Instruction::PtrToInt &&
- CE0->getType() == IntPtrTy &&
- CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
- return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
- CE1->getOperand(0), TD, TLI);
+ if (CE0->getOpcode() == Instruction::PtrToInt) {
+ Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+ if (CE0->getType() == IntPtrTy &&
+ CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
+ return ConstantFoldCompareInstOperands(Predicate,
+ CE0->getOperand(0),
+ CE1->getOperand(0),
+ TD,
+ TLI);
+ }
+ }
}
}
@@ -1101,7 +1148,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
// addressing.
for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) {
C = C->getAggregateElement(CE->getOperand(i));
- if (C == 0) return 0;
+ if (C == 0)
+ return 0;
}
return C;
}
@@ -1116,7 +1164,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
// addressing.
for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
C = C->getAggregateElement(Indices[i]);
- if (C == 0) return 0;
+ if (C == 0)
+ return 0;
}
return C;
}
@@ -1128,8 +1177,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
/// canConstantFoldCallTo - Return true if its even possible to fold a call to
/// the specified function.
-bool
-llvm::canConstantFoldCallTo(const Function *F) {
+bool llvm::canConstantFoldCallTo(const Function *F) {
switch (F->getIntrinsicID()) {
case Intrinsic::fabs:
case Intrinsic::log:
@@ -1167,7 +1215,8 @@ llvm::canConstantFoldCallTo(const Function *F) {
case 0: break;
}
- if (!F->hasName()) return false;
+ if (!F->hasName())
+ return false;
StringRef Name = F->getName();
// In these cases, the check of the length is required. We don't want to
@@ -1250,7 +1299,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
static Constant *ConstantFoldConvertToInt(const APFloat &Val,
bool roundTowardZero, Type *Ty) {
// All of these conversion intrinsics form an integer of at most 64bits.
- unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
+ unsigned ResultWidth = Ty->getIntegerBitWidth();
assert(ResultWidth <= 64 &&
"Can only constant fold conversions to 64 and 32 bit ints");
@@ -1271,7 +1320,8 @@ static Constant *ConstantFoldConvertToInt(const APFloat &Val,
Constant *
llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI) {
- if (!F->hasName()) return 0;
+ if (!F->hasName())
+ return 0;
StringRef Name = F->getName();
Type *Ty = F->getReturnType();
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 927508e..f943258 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -19,6 +19,7 @@
#define CM_NAME "cost-model"
#define DEBUG_TYPE CM_NAME
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
@@ -26,10 +27,15 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
+ cl::Hidden,
+ cl::desc("Recognize reduction patterns."));
+
namespace {
class CostModelAnalysis : public FunctionPass {
@@ -105,6 +111,260 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
return OpInfo;
}
+static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) {
+ if (M1.size() != M2.size())
+ return false;
+
+ for (unsigned i = 0, e = M1.size(); i != e; ++i)
+ if (M1[i] != M2[i])
+ return false;
+
+ return true;
+}
+
+static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
+ unsigned Level) {
+ // We don't need a shuffle if we just want to have element 0 in position 0 of
+ // the vector.
+ if (!SI && Level == 0 && IsLeft)
+ return true;
+ else if (!SI)
+ return false;
+
+ SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
+
+ // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
+ // we look at the left or right side.
+ for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
+ Mask[i] = val;
+
+ SmallVector<int, 16> ActualMask = SI->getShuffleMask();
+ if (!matchMask(Mask, ActualMask))
+ return false;
+
+ return true;
+}
+
+static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
+ unsigned Level, unsigned NumLevels) {
+ // Match one level of pairwise operations.
+ // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+ // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+ // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+ if (BinOp == 0)
+ return false;
+
+ assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");
+
+ unsigned Opcode = BinOp->getOpcode();
+ Value *L = BinOp->getOperand(0);
+ Value *R = BinOp->getOperand(1);
+
+ ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L);
+ if (!LS && Level)
+ return false;
+ ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R);
+ if (!RS && Level)
+ return false;
+
+ // On level 0 we can omit one shufflevector instruction.
+ if (!Level && !RS && !LS)
+ return false;
+
+ // Shuffle inputs must match.
+ Value *NextLevelOpL = LS ? LS->getOperand(0) : 0;
+ Value *NextLevelOpR = RS ? RS->getOperand(0) : 0;
+ Value *NextLevelOp = 0;
+ if (NextLevelOpR && NextLevelOpL) {
+ // If we have two shuffles their operands must match.
+ if (NextLevelOpL != NextLevelOpR)
+ return false;
+
+ NextLevelOp = NextLevelOpL;
+ } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
+ // On the first level we can omit the shufflevector <0, undef,...>. So the
+ // input to the other shufflevector <1, undef> must match with one of the
+ // inputs to the current binary operation.
+ // Example:
+ // %NextLevelOpL = shufflevector %R, <1, undef ...>
+ // %BinOp = fadd %NextLevelOpL, %R
+ if (NextLevelOpL && NextLevelOpL != R)
+ return false;
+ else if (NextLevelOpR && NextLevelOpR != L)
+ return false;
+
+ NextLevelOp = NextLevelOpL ? R : L;
+ } else
+ return false;
+
+ // Check that the next levels binary operation exists and matches with the
+ // current one.
+ BinaryOperator *NextLevelBinOp = 0;
+ if (Level + 1 != NumLevels) {
+ if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
+ return false;
+ else if (NextLevelBinOp->getOpcode() != Opcode)
+ return false;
+ }
+
+ // Shuffle mask for pairwise operation must match.
+ if (matchPairwiseShuffleMask(LS, true, Level)) {
+ if (!matchPairwiseShuffleMask(RS, false, Level))
+ return false;
+ } else if (matchPairwiseShuffleMask(RS, true, Level)) {
+ if (!matchPairwiseShuffleMask(LS, false, Level))
+ return false;
+ } else
+ return false;
+
+ if (++Level == NumLevels)
+ return true;
+
+ // Match next level.
+ return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels);
+}
+
+static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
+ unsigned &Opcode, Type *&Ty) {
+ if (!EnableReduxCost)
+ return false;
+
+ // Need to extract the first element.
+ ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+ unsigned Idx = ~0u;
+ if (CI)
+ Idx = CI->getZExtValue();
+ if (Idx != 0)
+ return false;
+
+ BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
+ if (!RdxStart)
+ return false;
+
+ Type *VecTy = ReduxRoot->getOperand(0)->getType();
+ unsigned NumVecElems = VecTy->getVectorNumElements();
+ if (!isPowerOf2_32(NumVecElems))
+ return false;
+
+ // We look for a sequence of shuffle,shuffle,add triples like the following
+ // that builds a pairwise reduction tree.
+ //
+ // (X0, X1, X2, X3)
+ // (X0 + X1, X2 + X3, undef, undef)
+ // ((X0 + X1) + (X2 + X3), undef, undef, undef)
+ //
+ // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+ // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+ // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+ // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+ // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+ // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+ // %r = extractelement <4 x float> %bin.rdx8, i32 0
+ if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)))
+ return false;
+
+ Opcode = RdxStart->getOpcode();
+ Ty = VecTy;
+
+ return true;
+}
+
+static std::pair<Value *, ShuffleVectorInst *>
+getShuffleAndOtherOprd(BinaryOperator *B) {
+
+ Value *L = B->getOperand(0);
+ Value *R = B->getOperand(1);
+ ShuffleVectorInst *S = 0;
+
+ if ((S = dyn_cast<ShuffleVectorInst>(L)))
+ return std::make_pair(R, S);
+
+ S = dyn_cast<ShuffleVectorInst>(R);
+ return std::make_pair(L, S);
+}
+
+static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
+ unsigned &Opcode, Type *&Ty) {
+ if (!EnableReduxCost)
+ return false;
+
+ // Need to extract the first element.
+ ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+ unsigned Idx = ~0u;
+ if (CI)
+ Idx = CI->getZExtValue();
+ if (Idx != 0)
+ return false;
+
+ BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
+ if (!RdxStart)
+ return false;
+ unsigned RdxOpcode = RdxStart->getOpcode();
+
+ Type *VecTy = ReduxRoot->getOperand(0)->getType();
+ unsigned NumVecElems = VecTy->getVectorNumElements();
+ if (!isPowerOf2_32(NumVecElems))
+ return false;
+
+ // We look for a sequence of shuffles and adds like the following matching one
+ // fadd, shuffle vector pair at a time.
+ //
+ // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
+ // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
+ // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
+ // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
+ // %r = extractelement <4 x float> %bin.rdx8, i32 0
+
+ unsigned MaskStart = 1;
+ Value *RdxOp = RdxStart;
+ SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
+ unsigned NumVecElemsRemain = NumVecElems;
+ while (NumVecElemsRemain - 1) {
+ // Check for the right reduction operation.
+ BinaryOperator *BinOp;
+ if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp)))
+ return false;
+ if (BinOp->getOpcode() != RdxOpcode)
+ return false;
+
+ Value *NextRdxOp;
+ ShuffleVectorInst *Shuffle;
+ tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
+
+ // Check the current reduction operation and the shuffle use the same value.
+ if (Shuffle == 0)
+ return false;
+ if (Shuffle->getOperand(0) != NextRdxOp)
+ return false;
+
+ // Check that shuffle masks matches.
+ for (unsigned j = 0; j != MaskStart; ++j)
+ ShuffleMask[j] = MaskStart + j;
+ // Fill the rest of the mask with -1 for undef.
+ std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
+
+ SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+ if (!matchMask(ShuffleMask, Mask))
+ return false;
+
+ RdxOp = NextRdxOp;
+ NumVecElemsRemain /= 2;
+ MaskStart *= 2;
+ }
+
+ Opcode = RdxOpcode;
+ Ty = VecTy;
+ return true;
+}
+
unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
if (!TTI)
return -1;
@@ -189,6 +449,17 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
unsigned Idx = -1;
if (CI)
Idx = CI->getZExtValue();
+
+ // Try to match a reduction sequence (series of shufflevector and vector
+ // adds followed by a extractelement).
+ unsigned ReduxOpCode;
+ Type *ReduxType;
+
+ if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType))
+ return TTI->getReductionCost(ReduxOpCode, ReduxType, false);
+ else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType))
+ return TTI->getReductionCost(ReduxOpCode, ReduxType, true);
+
return TTI->getVectorInstrCost(I->getOpcode(),
EEI->getOperand(0)->getType(), Idx);
}
diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp
new file mode 100644
index 0000000..3ed0609
--- /dev/null
+++ b/lib/Analysis/Delinearization.cpp
@@ -0,0 +1,133 @@
+//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements an analysis pass that tries to delinearize all GEP
+// instructions in all loops using the SCEV analysis functionality. This pass is
+// only used for testing purposes: if your pass needs delinearization, please
+// use the on-demand SCEVAddRecExpr::delinearize() function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DL_NAME "delinearize"
+#define DEBUG_TYPE DL_NAME
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+class Delinearization : public FunctionPass {
+ Delinearization(const Delinearization &); // do not implement
+protected:
+ Function *F;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ Delinearization() : FunctionPass(ID) {
+ initializeDelinearizationPass(*PassRegistry::getPassRegistry());
+ }
+ virtual bool runOnFunction(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual void print(raw_ostream &O, const Module *M = 0) const;
+};
+
+} // end anonymous namespace
+
+void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+}
+
+bool Delinearization::runOnFunction(Function &F) {
+ this->F = &F;
+ SE = &getAnalysis<ScalarEvolution>();
+ LI = &getAnalysis<LoopInfo>();
+ return false;
+}
+
+static Value *getPointerOperand(Instruction &Inst) {
+ if (LoadInst *Load = dyn_cast<LoadInst>(&Inst))
+ return Load->getPointerOperand();
+ else if (StoreInst *Store = dyn_cast<StoreInst>(&Inst))
+ return Store->getPointerOperand();
+ else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst))
+ return Gep->getPointerOperand();
+ return NULL;
+}
+
+void Delinearization::print(raw_ostream &O, const Module *) const {
+ O << "Delinearization on function " << F->getName() << ":\n";
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ Instruction *Inst = &(*I);
+
+ // Only analyze loads and stores.
+ if (!isa<StoreInst>(Inst) && !isa<LoadInst>(Inst) &&
+ !isa<GetElementPtrInst>(Inst))
+ continue;
+
+ const BasicBlock *BB = Inst->getParent();
+ // Delinearize the memory access as analyzed in all the surrounding loops.
+ // Do not analyze memory accesses outside loops.
+ for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) {
+ const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn);
+
+ // Do not try to delinearize memory accesses that are not AddRecs.
+ if (!AR)
+ break;
+
+ O << "AddRec: " << *AR << "\n";
+
+ SmallVector<const SCEV *, 3> Subscripts, Sizes;
+ const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes);
+ int Size = Subscripts.size();
+ if (Res == AR || Size == 0) {
+ O << "failed to delinearize\n";
+ continue;
+ }
+ O << "Base offset: " << *Res << "\n";
+ O << "ArrayDecl[UnknownSize]";
+ for (int i = 0; i < Size - 1; i++)
+ O << "[" << *Sizes[i] << "]";
+ O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
+
+ O << "ArrayRef";
+ for (int i = 0; i < Size; i++)
+ O << "[" << *Subscripts[i] << "]";
+ O << "\n";
+ }
+ }
+}
+
+char Delinearization::ID = 0;
+static const char delinearization_name[] = "Delinearization";
+INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true,
+ true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true)
+
+FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; }
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index a0f1a69..3b3e2ef 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -24,11 +24,11 @@
// Both of these are conservative weaknesses;
// that is, not a source of correctness problems.
//
-// The implementation depends on the GEP instruction to
-// differentiate subscripts. Since Clang linearizes subscripts
-// for most arrays, we give up some precision (though the existing MIV tests
-// will help). We trust that the GEP instruction will eventually be extended.
-// In the meantime, we should explore Maslov's ideas about delinearization.
+// The implementation depends on the GEP instruction to differentiate
+// subscripts. Since Clang linearizes some array subscripts, the dependence
+// analysis is using SCEV->delinearize to recover the representation of multiple
+// subscripts, and thus avoid the more expensive and less precise MIV tests. The
+// delinearization is controlled by the flag -da-delinearize.
//
// We should pay some careful attention to the possibility of integer overflow
// in the implementation of the various tests. This could happen with Add,
@@ -61,6 +61,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstIterator.h"
@@ -104,6 +105,10 @@ STATISTIC(BanerjeeApplications, "Banerjee applications");
STATISTIC(BanerjeeIndependence, "Banerjee independence");
STATISTIC(BanerjeeSuccesses, "Banerjee successes");
+static cl::opt<bool>
+Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Try to delinearize array references."));
+
//===----------------------------------------------------------------------===//
// basics
@@ -3171,6 +3176,55 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
llvm_unreachable("constraint has unexpected kind");
}
+/// Check if we can delinearize the subscripts. If the SCEVs representing the
+/// source and destination array references are recurrences on a nested loop,
+/// this function flattens the nested recurrences into seperate recurrences
+/// for each loop level.
+bool
+DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV,
+ SmallVectorImpl<Subscript> &Pair) const {
+ const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
+ const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
+ if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
+ return false;
+
+ SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes;
+ SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes);
+ DstAR->delinearize(*SE, DstSubscripts, DstSizes);
+
+ int size = SrcSubscripts.size();
+ int dstSize = DstSubscripts.size();
+ if (size != dstSize || size < 2)
+ return false;
+
+#ifndef NDEBUG
+ DEBUG(errs() << "\nSrcSubscripts: ");
+ for (int i = 0; i < size; i++)
+ DEBUG(errs() << *SrcSubscripts[i]);
+ DEBUG(errs() << "\nDstSubscripts: ");
+ for (int i = 0; i < size; i++)
+ DEBUG(errs() << *DstSubscripts[i]);
+#endif
+
+ // The delinearization transforms a single-subscript MIV dependence test into
+ // a multi-subscript SIV dependence test that is easier to compute. So we
+ // resize Pair to contain as many pairs of subscripts as the delinearization
+ // has found, and then initialize the pairs following the delinearization.
+ Pair.resize(size);
+ for (int i = 0; i < size; ++i) {
+ Pair[i].Src = SrcSubscripts[i];
+ Pair[i].Dst = DstSubscripts[i];
+
+ // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the
+ // delinearization has found, and add these constraints to the dependence
+ // check to avoid memory accesses overflow from one dimension into another.
+ // This is related to the problem of determining the existence of data
+ // dependences in array accesses using a different number of subscripts: in
+ // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc.
+ }
+
+ return true;
+}
//===----------------------------------------------------------------------===//
@@ -3280,6 +3334,12 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
Pair[0].Dst = DstSCEV;
}
+ if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
+ tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
+ DEBUG(dbgs() << " delinerized GEP\n");
+ Pairs = Pair.size();
+ }
+
for (unsigned P = 0; P < Pairs; ++P) {
Pair[P].Loops.resize(MaxLevels + 1);
Pair[P].GroupLoops.resize(MaxLevels + 1);
@@ -3698,6 +3758,12 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
Pair[0].Dst = DstSCEV;
}
+ if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
+ tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
+ DEBUG(dbgs() << " delinerized GEP\n");
+ Pairs = Pair.size();
+ }
+
for (unsigned P = 0; P < Pairs; ++P) {
Pair[P].Loops.resize(MaxLevels + 1);
Pair[P].GroupLoops.resize(MaxLevels + 1);
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 7620fd9..f042964 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -6,11 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file implements the CallGraph class and provides the BasicCallGraph
-// default implementation.
-//
-//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Instructions.h"
@@ -21,168 +16,92 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-namespace {
+CallGraph::CallGraph()
+ : ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) {
+ initializeCallGraphPass(*PassRegistry::getPassRegistry());
+}
-//===----------------------------------------------------------------------===//
-// BasicCallGraph class definition
-//
-class BasicCallGraph : public ModulePass, public CallGraph {
- // Root is root of the call graph, or the external node if a 'main' function
- // couldn't be found.
- //
- CallGraphNode *Root;
-
- // ExternalCallingNode - This node has edges to all external functions and
- // those internal functions that have their address taken.
- CallGraphNode *ExternalCallingNode;
-
- // CallsExternalNode - This node has edges to it from all functions making
- // indirect calls or calling an external function.
- CallGraphNode *CallsExternalNode;
-
-public:
- static char ID; // Class identification, replacement for typeinfo
- BasicCallGraph() : ModulePass(ID), Root(0),
- ExternalCallingNode(0), CallsExternalNode(0) {
- initializeBasicCallGraphPass(*PassRegistry::getPassRegistry());
- }
+void CallGraph::addToCallGraph(Function *F) {
+ CallGraphNode *Node = getOrInsertFunction(F);
- // runOnModule - Compute the call graph for the specified module.
- virtual bool runOnModule(Module &M) {
- CallGraph::initialize(M);
-
- ExternalCallingNode = getOrInsertFunction(0);
- CallsExternalNode = new CallGraphNode(0);
- Root = 0;
-
- // Add every function to the call graph.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- addToCallGraph(I);
-
- // If we didn't find a main function, use the external call graph node
- if (Root == 0) Root = ExternalCallingNode;
-
- return false;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
+ // If this function has external linkage, anything could call it.
+ if (!F->hasLocalLinkage()) {
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
- virtual void print(raw_ostream &OS, const Module *) const {
- OS << "CallGraph Root is: ";
- if (Function *F = getRoot()->getFunction())
- OS << F->getName() << "\n";
- else {
- OS << "<<null function: 0x" << getRoot() << ">>\n";
+ // Found the entry point?
+ if (F->getName() == "main") {
+ if (Root) // Found multiple external mains? Don't pick one.
+ Root = ExternalCallingNode;
+ else
+ Root = Node; // Found a main, keep track of it!
}
-
- CallGraph::print(OS, 0);
}
- virtual void releaseMemory() {
- destroy();
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it should
- /// override this to adjust the this pointer as needed for the specified pass
- /// info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &CallGraph::ID)
- return (CallGraph*)this;
- return this;
- }
-
- CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; }
- CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; }
-
- // getRoot - Return the root of the call graph, which is either main, or if
- // main cannot be found, the external node.
- //
- CallGraphNode *getRoot() { return Root; }
- const CallGraphNode *getRoot() const { return Root; }
-
-private:
- //===---------------------------------------------------------------------
- // Implementation of CallGraph construction
- //
-
- // addToCallGraph - Add a function to the call graph, and link the node to all
- // of the functions that it calls.
- //
- void addToCallGraph(Function *F) {
- CallGraphNode *Node = getOrInsertFunction(F);
-
- // If this function has external linkage, anything could call it.
- if (!F->hasLocalLinkage()) {
- ExternalCallingNode->addCalledFunction(CallSite(), Node);
-
- // Found the entry point?
- if (F->getName() == "main") {
- if (Root) // Found multiple external mains? Don't pick one.
- Root = ExternalCallingNode;
- else
- Root = Node; // Found a main, keep track of it!
+ // If this function has its address taken, anything could call it.
+ if (F->hasAddressTaken())
+ ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+ // If this function is not defined in this translation unit, it could call
+ // anything.
+ if (F->isDeclaration() && !F->isIntrinsic())
+ Node->addCalledFunction(CallSite(), CallsExternalNode);
+
+ // Look for calls by this function.
+ for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;
+ ++II) {
+ CallSite CS(cast<Value>(II));
+ if (CS) {
+ const Function *Callee = CS.getCalledFunction();
+ if (!Callee)
+ // Indirect calls of intrinsics are not allowed so no need to check.
+ Node->addCalledFunction(CS, CallsExternalNode);
+ else if (!Callee->isIntrinsic())
+ Node->addCalledFunction(CS, getOrInsertFunction(Callee));
}
}
+}
- // If this function has its address taken, anything could call it.
- if (F->hasAddressTaken())
- ExternalCallingNode->addCalledFunction(CallSite(), Node);
-
- // If this function is not defined in this translation unit, it could call
- // anything.
- if (F->isDeclaration() && !F->isIntrinsic())
- Node->addCalledFunction(CallSite(), CallsExternalNode);
-
- // Look for calls by this function.
- for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
- for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
- II != IE; ++II) {
- CallSite CS(cast<Value>(II));
- if (CS) {
- const Function *Callee = CS.getCalledFunction();
- if (!Callee)
- // Indirect calls of intrinsics are not allowed so no need to check.
- Node->addCalledFunction(CS, CallsExternalNode);
- else if (!Callee->isIntrinsic())
- Node->addCalledFunction(CS, getOrInsertFunction(Callee));
- }
- }
- }
+void CallGraph::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+}
- //
- // destroy - Release memory for the call graph
- virtual void destroy() {
- /// CallsExternalNode is not in the function map, delete it explicitly.
- if (CallsExternalNode) {
- CallsExternalNode->allReferencesDropped();
- delete CallsExternalNode;
- CallsExternalNode = 0;
- }
- CallGraph::destroy();
- }
-};
+bool CallGraph::runOnModule(Module &M) {
+ Mod = &M;
-} //End anonymous namespace
+ ExternalCallingNode = getOrInsertFunction(0);
+ assert(!CallsExternalNode);
+ CallsExternalNode = new CallGraphNode(0);
+ Root = 0;
-INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph)
-INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
- "Basic CallGraph Construction", false, true, true)
+ // Add every function to the call graph.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ addToCallGraph(I);
-char CallGraph::ID = 0;
-char BasicCallGraph::ID = 0;
+ // If we didn't find a main function, use the external call graph node
+ if (Root == 0)
+ Root = ExternalCallingNode;
-void CallGraph::initialize(Module &M) {
- Mod = &M;
+ return false;
}
-void CallGraph::destroy() {
- if (FunctionMap.empty()) return;
-
- // Reset all node's use counts to zero before deleting them to prevent an
- // assertion from firing.
+INITIALIZE_PASS(CallGraph, "basiccg", "CallGraph Construction", false, true)
+
+char CallGraph::ID = 0;
+
+void CallGraph::releaseMemory() {
+ /// CallsExternalNode is not in the function map, delete it explicitly.
+ if (CallsExternalNode) {
+ CallsExternalNode->allReferencesDropped();
+ delete CallsExternalNode;
+ CallsExternalNode = 0;
+ }
+
+ if (FunctionMap.empty())
+ return;
+
+// Reset all node's use counts to zero before deleting them to prevent an
+// assertion from firing.
#ifndef NDEBUG
for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
I != E; ++I)
@@ -195,7 +114,14 @@ void CallGraph::destroy() {
FunctionMap.clear();
}
-void CallGraph::print(raw_ostream &OS, Module*) const {
+void CallGraph::print(raw_ostream &OS, const Module*) const {
+ OS << "CallGraph Root is: ";
+ if (Function *F = Root->getFunction())
+ OS << F->getName() << "\n";
+ else {
+ OS << "<<null function: 0x" << Root << ">>\n";
+ }
+
for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
I->second->print(OS);
}
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index a0d788f..182beca 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -22,7 +22,7 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/PassManagers.h"
+#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 92d0d23..7ec4644 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -189,7 +189,7 @@ char GlobalsModRef::ID = 0;
INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
"globalsmodref-aa", "Simple mod/ref analysis for globals",
false, true, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
"globalsmodref-aa", "Simple mod/ref analysis for globals",
false, true, false)
diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp
index 1c1816d..47357cf 100644
--- a/lib/Analysis/IPA/IPA.cpp
+++ b/lib/Analysis/IPA/IPA.cpp
@@ -19,8 +19,7 @@ using namespace llvm;
/// initializeIPA - Initialize all passes linked into the IPA library.
void llvm::initializeIPA(PassRegistry &Registry) {
- initializeBasicCallGraphPass(Registry);
- initializeCallGraphAnalysisGroup(Registry);
+ initializeCallGraphPass(Registry);
initializeCallGraphPrinterPass(Registry);
initializeCallGraphViewerPass(Registry);
initializeFindUsedTypesPass(Registry);
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 37d73a8..3bc796e 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -59,6 +59,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool ExposesReturnsTwice;
bool HasDynamicAlloca;
bool ContainsNoDuplicateCall;
+ bool HasReturn;
+ bool HasIndirectBr;
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
@@ -132,6 +134,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitExtractValue(ExtractValueInst &I);
bool visitInsertValue(InsertValueInst &I);
bool visitCallSite(CallSite CS);
+ bool visitReturnInst(ReturnInst &RI);
+ bool visitBranchInst(BranchInst &BI);
+ bool visitSwitchInst(SwitchInst &SI);
+ bool visitIndirectBrInst(IndirectBrInst &IBI);
+ bool visitResumeInst(ResumeInst &RI);
+ bool visitUnreachableInst(UnreachableInst &I);
public:
CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
@@ -139,12 +147,13 @@ public:
: TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
- ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0),
- NumVectorInstructions(0), FiftyPercentVectorBonus(0),
- TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
- NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
- NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
- SROACostSavings(0), SROACostSavingsLost(0) {}
+ ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
+ AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
+ FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+ NumInstructionsSimplified(0), SROACostSavings(0),
+ SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@@ -704,7 +713,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
}
bool CallAnalyzer::visitCallSite(CallSite CS) {
- if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
+ if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
!F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::ReturnsTwice)) {
// This aborts the entire analysis.
@@ -785,6 +794,60 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
return Base::visitCallSite(CS);
}
+bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
+ // At least one return instruction will be free after inlining.
+ bool Free = !HasReturn;
+ HasReturn = true;
+ return Free;
+}
+
+bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
+ // We model unconditional branches as essentially free -- they really
+ // shouldn't exist at all, but handling them makes the behavior of the
+ // inliner more regular and predictable. Interestingly, conditional branches
+ // which will fold away are also free.
+ return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
+ dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(BI.getCondition()));
+}
+
+bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
+ // We model unconditional switches as free, see the comments on handling
+ // branches.
+ return isa<ConstantInt>(SI.getCondition()) ||
+ dyn_cast_or_null<ConstantInt>(
+ SimplifiedValues.lookup(SI.getCondition()));
+}
+
+bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
+ // We never want to inline functions that contain an indirectbr. This is
+ // incorrect because all the blockaddress's (in static global initializers
+ // for example) would be referring to the original function, and this
+ // indirect jump would jump from the inlined copy of the function into the
+ // original function which is extremely undefined behavior.
+ // FIXME: This logic isn't really right; we can safely inline functions with
+ // indirectbr's as long as no other function or global references the
+ // blockaddress of a block within the current function. And as a QOI issue,
+ // if someone is using a blockaddress without an indirectbr, and that
+ // reference somehow ends up in another function or global, we probably don't
+ // want to inline this function.
+ HasIndirectBr = true;
+ return false;
+}
+
+bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
+ // FIXME: It's not clear that a single instruction is an accurate model for
+ // the inline cost of a resume instruction.
+ return false;
+}
+
+bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
+ // FIXME: It might be reasonably to discount the cost of instructions leading
+ // to unreachable as they have the lowest possible impact on both runtime and
+ // code size.
+ return true; // No actual code is needed for unreachable.
+}
+
bool CallAnalyzer::visitInstruction(Instruction &I) {
// Some instructions are free. All of the free intrinsics can also be
// handled by SROA, etc.
@@ -808,8 +871,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
- for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
- I != E; ++I) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
++NumInstructions;
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
@@ -825,7 +887,8 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
Cost += InlineConstants::InstrCost;
// If the visit this instruction detected an uninlinable pattern, abort.
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+ if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+ HasIndirectBr)
return false;
// If the caller is a recursive function then we don't want to inline
@@ -989,10 +1052,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
}
}
- // Track whether we've seen a return instruction. The first return
- // instruction is free, as at least one will usually disappear in inlining.
- bool HasReturn = false;
-
// Populate our simplified values by mapping from function arguments to call
// arguments with known important simplifications.
CallSite::arg_iterator CAI = CS.arg_begin();
@@ -1039,33 +1098,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
if (BB->empty())
continue;
- // Handle the terminator cost here where we can track returns and other
- // function-wide constructs.
- TerminatorInst *TI = BB->getTerminator();
-
- // We never want to inline functions that contain an indirectbr. This is
- // incorrect because all the blockaddress's (in static global initializers
- // for example) would be referring to the original function, and this
- // indirect jump would jump from the inlined copy of the function into the
- // original function which is extremely undefined behavior.
- // FIXME: This logic isn't really right; we can safely inline functions
- // with indirectbr's as long as no other function or global references the
- // blockaddress of a block within the current function. And as a QOI issue,
- // if someone is using a blockaddress without an indirectbr, and that
- // reference somehow ends up in another function or global, we probably
- // don't want to inline this function.
- if (isa<IndirectBrInst>(TI))
- return false;
-
- if (!HasReturn && isa<ReturnInst>(TI))
- HasReturn = true;
- else
- Cost += InlineConstants::InstrCost;
-
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
if (!analyzeBlock(BB)) {
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+ if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+ HasIndirectBr)
return false;
// If the caller is a recursive function then we don't want to inline
@@ -1078,6 +1115,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
break;
}
+ TerminatorInst *TI = BB->getTerminator();
+
// Add in the live successors by first checking whether we have terminator
// that may be simplified based on the values simplified by this call.
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -1171,6 +1210,22 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
return getInlineCost(CS, CS.getCalledFunction(), Threshold);
}
+/// \brief Test that two functions either have or have not the given attribute
+/// at the same time.
+static bool attributeMatches(Function *F1, Function *F2,
+ Attribute::AttrKind Attr) {
+ return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr);
+}
+
+/// \brief Test that there are no attribute conflicts between Caller and Callee
+/// that prevent inlining.
+static bool functionsHaveCompatibleAttributes(Function *Caller,
+ Function *Callee) {
+ return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
+ attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
+ attributeMatches(Caller, Callee, Attribute::SanitizeThread);
+}
+
InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
int Threshold) {
// Cannot inline indirect calls.
@@ -1179,20 +1234,26 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
// Calls to functions with always-inline attributes should be inlined
// whenever possible.
- if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::AlwaysInline)) {
+ if (Callee->hasFnAttribute(Attribute::AlwaysInline)) {
if (isInlineViable(*Callee))
return llvm::InlineCost::getAlways();
return llvm::InlineCost::getNever();
}
+ // Never inline functions with conflicting attributes (unless callee has
+ // always-inline attribute).
+ if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee))
+ return llvm::InlineCost::getNever();
+
+ // Don't inline this call if the caller has the optnone attribute.
+ if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
+ return llvm::InlineCost::getNever();
+
// Don't inline functions which can be redefined at link-time to mean
// something else. Don't inline functions marked noinline or call sites
// marked noinline.
if (Callee->mayBeOverridden() ||
- Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoInline) ||
- CS.isNoInline())
+ Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
return llvm::InlineCost::getNever();
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b275dfe..b867af1 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -668,7 +668,8 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
/// folding.
static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
- Value *&V) {
+ Value *&V,
+ bool AllowNonInbounds = false) {
assert(V->getType()->getScalarType()->isPointerTy());
// Without DataLayout, just be conservative for now. Theoretically, more could
@@ -685,7 +686,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
Visited.insert(V);
do {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset))
+ if ((!AllowNonInbounds && !GEP->isInBounds()) ||
+ !GEP->accumulateConstantOffset(*TD, Offset))
break;
V = GEP->getPointerOperand();
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
@@ -1737,7 +1739,7 @@ static Constant *computePointerICmp(const DataLayout *TD,
RHS = RHS->stripPointerCasts();
// A non-null pointer is not equal to a null pointer.
- if (llvm::isKnownNonNull(LHS) && isa<ConstantPointerNull>(RHS) &&
+ if (llvm::isKnownNonNull(LHS, TLI) && isa<ConstantPointerNull>(RHS) &&
(Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE))
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
@@ -1837,6 +1839,17 @@ static Constant *computePointerICmp(const DataLayout *TD,
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
}
+
+ // Even if an non-inbounds GEP occurs along the path we can still optimize
+ // equality comparisons concerning the result. We avoid walking the whole
+ // chain again by starting where the last calls to
+ // stripAndComputeConstantOffsets left off and accumulate the offsets.
+ Constant *LHSNoBound = stripAndComputeConstantOffsets(TD, LHS, true);
+ Constant *RHSNoBound = stripAndComputeConstantOffsets(TD, RHS, true);
+ if (LHS == RHS)
+ return ConstantExpr::getICmp(Pred,
+ ConstantExpr::getAdd(LHSOffset, LHSNoBound),
+ ConstantExpr::getAdd(RHSOffset, RHSNoBound));
}
// Otherwise, fail.
@@ -2946,6 +2959,7 @@ static bool IsIdempotent(Intrinsic::ID ID) {
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
+ case Intrinsic::round:
return true;
}
}
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 9393508..ec17f47 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -207,7 +207,7 @@ void Lint::visitCallSite(CallSite CS) {
&I);
FunctionType *FT = F->getFunctionType();
- unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+ unsigned NumActualArgs = CS.arg_size();
Assert1(FT->isVarArg() ?
FT->getNumParams() <= NumActualArgs :
@@ -504,14 +504,42 @@ void Lint::visitShl(BinaryOperator &I) {
"Undefined result: Shift count out of range", &I);
}
-static bool isZero(Value *V, DataLayout *TD) {
+static bool isZero(Value *V, DataLayout *DL) {
// Assume undef could be zero.
- if (isa<UndefValue>(V)) return true;
+ if (isa<UndefValue>(V))
+ return true;
+
+ VectorType *VecTy = dyn_cast<VectorType>(V->getType());
+ if (!VecTy) {
+ unsigned BitWidth = V->getType()->getIntegerBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, KnownZero, KnownOne, DL);
+ return KnownZero.isAllOnesValue();
+ }
+
+ // Per-component check doesn't work with zeroinitializer
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return false;
+
+ if (C->isZeroValue())
+ return true;
+
+ // For a vector, KnownZero will only be true if all values are zero, so check
+ // this per component
+ unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth();
+ for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
+ Constant *Elem = C->getAggregateElement(I);
+ if (isa<UndefValue>(Elem))
+ return true;
+
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(Elem, KnownZero, KnownOne, DL);
+ if (KnownZero.isAllOnesValue())
+ return true;
+ }
- unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, KnownZero, KnownOne, TD);
- return KnownZero.isAllOnesValue();
+ return false;
}
void Lint::visitSDiv(BinaryOperator &I) {
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 142ebed..e369633 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -177,10 +177,6 @@ PHINode *Loop::getCanonicalInductionVariable() const {
/// isLCSSAForm - Return true if the Loop is in LCSSA form
bool Loop::isLCSSAForm(DominatorTree &DT) const {
- // Sort the blocks vector so that we can use binary search to do quick
- // lookups.
- SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end());
-
for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
BasicBlock *BB = *BI;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
@@ -196,7 +192,7 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const {
// block they are defined in. Also, blocks not reachable from the
// entry are special; uses in them don't need to go through PHIs.
if (UserBB != BB &&
- !LoopBBs.count(UserBB) &&
+ !contains(UserBB) &&
DT.isReachableFromEntry(UserBB))
return false;
}
@@ -220,12 +216,12 @@ bool Loop::isSafeToClone() const {
// Return false if any loop blocks contain indirectbrs, or there are any calls
// to noduplicate functions.
for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
- if (isa<IndirectBrInst>((*I)->getTerminator())) {
+ if (isa<IndirectBrInst>((*I)->getTerminator()))
return false;
- } else if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {
+
+ if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()))
if (II->hasFnAttr(Attribute::NoDuplicate))
return false;
- }
for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
@@ -309,15 +305,15 @@ bool Loop::isAnnotatedParallel() const {
if (!II->mayReadOrWriteMemory())
continue;
- if (!II->getMetadata("llvm.mem.parallel_loop_access"))
- return false;
-
// The memory instruction can refer to the loop identifier metadata
// directly or indirectly through another list metadata (in case of
// nested parallel loops). The loop identifier metadata refers to
// itself so we can check both cases with the same routine.
- MDNode *loopIdMD =
- dyn_cast<MDNode>(II->getMetadata("llvm.mem.parallel_loop_access"));
+ MDNode *loopIdMD = II->getMetadata("llvm.mem.parallel_loop_access");
+
+ if (!loopIdMD)
+ return false;
+
bool loopIdMDFound = false;
for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) {
if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) {
@@ -337,9 +333,6 @@ bool Loop::isAnnotatedParallel() const {
/// hasDedicatedExits - Return true if no exit block for the loop
/// has a predecessor that is outside the loop.
bool Loop::hasDedicatedExits() const {
- // Sort the blocks vector so that we can use binary search to do quick
- // lookups.
- SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
// Each predecessor of each exit block of a normal loop is contained
// within the loop.
SmallVector<BasicBlock *, 4> ExitBlocks;
@@ -347,7 +340,7 @@ bool Loop::hasDedicatedExits() const {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
for (pred_iterator PI = pred_begin(ExitBlocks[i]),
PE = pred_end(ExitBlocks[i]); PI != PE; ++PI)
- if (!LoopBBs.count(*PI))
+ if (!contains(*PI))
return false;
// All the requirements are met.
return true;
@@ -362,11 +355,6 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
assert(hasDedicatedExits() &&
"getUniqueExitBlocks assumes the loop has canonical form exits!");
- // Sort the blocks vector so that we can use binary search to do quick
- // lookups.
- SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end());
- std::sort(LoopBBs.begin(), LoopBBs.end());
-
SmallVector<BasicBlock *, 32> switchExitBlocks;
for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
@@ -376,7 +364,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) {
// If block is inside the loop then it is not a exit block.
- if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+ if (contains(*I))
continue;
pred_iterator PI = pred_begin(*I);
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 0f0a1c9..1db0f63 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -31,12 +31,13 @@
using namespace llvm;
enum AllocType {
- MallocLike = 1<<0, // allocates
- CallocLike = 1<<1, // allocates + bzero
- ReallocLike = 1<<2, // reallocates
- StrDupLike = 1<<3,
+ OpNewLike = 1<<0, // allocates; never returns null
+ MallocLike = 1<<1 | OpNewLike, // allocates; may return null
+ CallocLike = 1<<2, // allocates + bzero
+ ReallocLike = 1<<3, // reallocates
+ StrDupLike = 1<<4,
AllocLike = MallocLike | CallocLike | StrDupLike,
- AnyAlloc = MallocLike | CallocLike | ReallocLike | StrDupLike
+ AnyAlloc = AllocLike | ReallocLike
};
struct AllocFnsTy {
@@ -52,20 +53,20 @@ struct AllocFnsTy {
static const AllocFnsTy AllocationFnData[] = {
{LibFunc::malloc, MallocLike, 1, 0, -1},
{LibFunc::valloc, MallocLike, 1, 0, -1},
- {LibFunc::Znwj, MallocLike, 1, 0, -1}, // new(unsigned int)
+ {LibFunc::Znwj, OpNewLike, 1, 0, -1}, // new(unsigned int)
{LibFunc::ZnwjRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow)
- {LibFunc::Znwm, MallocLike, 1, 0, -1}, // new(unsigned long)
+ {LibFunc::Znwm, OpNewLike, 1, 0, -1}, // new(unsigned long)
{LibFunc::ZnwmRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned long, nothrow)
- {LibFunc::Znaj, MallocLike, 1, 0, -1}, // new[](unsigned int)
+ {LibFunc::Znaj, OpNewLike, 1, 0, -1}, // new[](unsigned int)
{LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow)
- {LibFunc::Znam, MallocLike, 1, 0, -1}, // new[](unsigned long)
+ {LibFunc::Znam, OpNewLike, 1, 0, -1}, // new[](unsigned long)
{LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow)
- {LibFunc::posix_memalign, MallocLike, 3, 2, -1},
{LibFunc::calloc, CallocLike, 2, 0, 1},
{LibFunc::realloc, ReallocLike, 2, 1, -1},
{LibFunc::reallocf, ReallocLike, 2, 1, -1},
{LibFunc::strdup, StrDupLike, 1, -1, -1},
{LibFunc::strndup, StrDupLike, 2, 1, -1}
+ // TODO: Handle "int posix_memalign(void **, size_t, size_t)"
};
@@ -117,7 +118,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
return 0;
const AllocFnsTy *FnData = &AllocationFnData[i];
- if ((FnData->AllocTy & AllocTy) == 0)
+ if ((FnData->AllocTy & AllocTy) != FnData->AllocTy)
return 0;
// Check function prototype.
@@ -189,6 +190,13 @@ bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast);
}
+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates memory and never returns null (such as operator new).
+bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast);
+}
+
/// extractMallocCall - Returns the corresponding CallInst if the instruction
/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
/// ignore InvokeInst here.
@@ -197,7 +205,7 @@ const CallInst *llvm::extractMallocCall(const Value *I,
return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0;
}
-static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
+static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
const TargetLibraryInfo *TLI,
bool LookThroughSExt = false) {
if (!CI)
@@ -205,12 +213,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
// The size of the malloc's result type must be known to determine array size.
Type *T = getMallocAllocatedType(CI, TLI);
- if (!T || !T->isSized() || !TD)
+ if (!T || !T->isSized() || !DL)
return 0;
- unsigned ElementSize = TD->getTypeAllocSize(T);
+ unsigned ElementSize = DL->getTypeAllocSize(T);
if (StructType *ST = dyn_cast<StructType>(T))
- ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
+ ElementSize = DL->getStructLayout(ST)->getSizeInBytes();
// If malloc call's arg can be determined to be a multiple of ElementSize,
// return the multiple. Otherwise, return NULL.
@@ -227,10 +235,10 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
/// is a call to malloc whose array size can be determined and the array size
/// is not constant 1. Otherwise, return NULL.
const CallInst *llvm::isArrayMalloc(const Value *I,
- const DataLayout *TD,
+ const DataLayout *DL,
const TargetLibraryInfo *TLI) {
const CallInst *CI = extractMallocCall(I, TLI);
- Value *ArraySize = computeArraySize(CI, TD, TLI);
+ Value *ArraySize = computeArraySize(CI, DL, TLI);
if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize))
if (ConstSize->isOne())
@@ -288,11 +296,11 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI,
/// then return that multiple. For non-array mallocs, the multiple is
/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be
/// determined.
-Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *TD,
+Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL,
const TargetLibraryInfo *TLI,
bool LookThroughSExt) {
assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call");
- return computeArraySize(CI, TD, TLI, LookThroughSExt);
+ return computeArraySize(CI, DL, TLI, LookThroughSExt);
}
@@ -354,12 +362,12 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
/// object size in Size if successful, and false otherwise.
/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
/// byval arguments, and global variables.
-bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
+bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *DL,
const TargetLibraryInfo *TLI, bool RoundToAlign) {
- if (!TD)
+ if (!DL)
return false;
- ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
+ ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign);
SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
if (!Visitor.bothKnown(Data))
return false;
@@ -386,12 +394,12 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
return Size;
}
-ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
+ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL,
const TargetLibraryInfo *TLI,
LLVMContext &Context,
bool RoundToAlign)
-: TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) {
- IntegerType *IntTy = TD->getIntPtrType(Context);
+: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
+ IntegerType *IntTy = DL->getIntPtrType(Context);
IntTyBits = IntTy->getBitWidth();
Zero = APInt::getNullValue(IntTyBits);
}
@@ -434,7 +442,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
if (!I.getAllocatedType()->isSized())
return unknown();
- APInt Size(IntTyBits, TD->getTypeAllocSize(I.getAllocatedType()));
+ APInt Size(IntTyBits, DL->getTypeAllocSize(I.getAllocatedType()));
if (!I.isArrayAllocation())
return std::make_pair(align(Size, I.getAlignment()), Zero);
@@ -453,7 +461,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
return unknown();
}
PointerType *PT = cast<PointerType>(A.getType());
- APInt Size(IntTyBits, TD->getTypeAllocSize(PT->getElementType()));
+ APInt Size(IntTyBits, DL->getTypeAllocSize(PT->getElementType()));
return std::make_pair(align(Size, A.getParamAlignment()), Zero);
}
@@ -526,7 +534,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
SizeOffsetType PtrData = compute(GEP.getPointerOperand());
APInt Offset(IntTyBits, 0);
- if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*TD, Offset))
+ if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*DL, Offset))
return unknown();
return std::make_pair(PtrData.first, PtrData.second + Offset);
@@ -542,7 +550,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
if (!GV.hasDefinitiveInitializer())
return unknown();
- APInt Size(IntTyBits, TD->getTypeAllocSize(GV.getType()->getElementType()));
+ APInt Size(IntTyBits, DL->getTypeAllocSize(GV.getType()->getElementType()));
return std::make_pair(align(Size, GV.getAlignment()), Zero);
}
@@ -578,12 +586,13 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
return unknown();
}
-
-ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *TD,
- const TargetLibraryInfo *TLI,
- LLVMContext &Context)
-: TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) {
- IntTy = TD->getIntPtrType(Context);
+ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL,
+ const TargetLibraryInfo *TLI,
+ LLVMContext &Context,
+ bool RoundToAlign)
+: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
+ RoundToAlign(RoundToAlign) {
+ IntTy = DL->getIntPtrType(Context);
Zero = ConstantInt::get(IntTy, 0);
}
@@ -607,7 +616,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
- ObjectSizeOffsetVisitor Visitor(TD, TLI, Context);
+ ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, RoundToAlign);
SizeOffsetType Const = Visitor.compute(V);
if (Visitor.bothKnown(Const))
return std::make_pair(ConstantInt::get(Context, Const.first),
@@ -626,13 +635,15 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
Builder.SetInsertPoint(I);
- // record the pointers that were handled in this run, so that they can be
- // cleaned later if something fails
- SeenVals.insert(V);
-
// now compute the size and offset
SizeOffsetEvalType Result;
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+
+ // Record the pointers that were handled in this run, so that they can be
+ // cleaned later if something fails. We also use this set to break cycles that
+ // can occur in dead code.
+ if (!SeenVals.insert(V)) {
+ Result = unknown();
+ } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
Result = visitGEPOperator(*GEP);
} else if (Instruction *I = dyn_cast<Instruction>(V)) {
Result = visit(*I);
@@ -665,7 +676,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
assert(I.isArrayAllocation());
Value *ArraySize = I.getArraySize();
Value *Size = ConstantInt::get(ArraySize->getType(),
- TD->getTypeAllocSize(I.getAllocatedType()));
+ DL->getTypeAllocSize(I.getAllocatedType()));
Size = Builder.CreateMul(Size, ArraySize);
return std::make_pair(Size, Zero);
}
@@ -717,7 +728,7 @@ ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) {
if (!bothKnown(PtrData))
return unknown();
- Value *Offset = EmitGEPOffset(&Builder, *TD, &GEP, /*NoAssumptions=*/true);
+ Value *Offset = EmitGEPOffset(&Builder, *DL, &GEP, /*NoAssumptions=*/true);
Offset = Builder.CreateAdd(PtrData.second, Offset);
return std::make_pair(PtrData.first, Offset);
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index fe1c874..84ff2ee 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -371,18 +371,19 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Walk backwards through the basic block, looking for dependencies.
while (ScanIt != BB->begin()) {
+ Instruction *Inst = --ScanIt;
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ // Debug intrinsics don't (and can't) cause dependencies.
+ if (isa<DbgInfoIntrinsic>(II)) continue;
+
// Limit the amount of scanning we do so we don't end up with quadratic
// running time on extreme testcases.
--Limit;
if (!Limit)
return MemDepResult::getUnknown();
- Instruction *Inst = --ScanIt;
-
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- // Debug intrinsics don't (and can't) cause dependences.
- if (isa<DbgInfoIntrinsic>(II)) continue;
-
// If we reach a lifetime begin or end marker, then the query ends here
// because the value is undefined.
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp
deleted file mode 100644
index 30d213b..0000000
--- a/lib/Analysis/PathNumbering.cpp
+++ /dev/null
@@ -1,521 +0,0 @@
-//===- PathNumbering.cpp --------------------------------------*- C++ -*---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Ball-Larus path numbers uniquely identify paths through a directed acyclic
-// graph (DAG) [Ball96]. For a CFG backedges are removed and replaced by phony
-// edges to obtain a DAG, and thus the unique path numbers [Ball96].
-//
-// The purpose of this analysis is to enumerate the edges in a CFG in order
-// to obtain paths from path numbers in a convenient manner. As described in
-// [Ball96] edges can be enumerated such that given a path number by following
-// the CFG and updating the path number, the path is obtained.
-//
-// [Ball96]
-// T. Ball and J. R. Larus. "Efficient Path Profiling."
-// International Symposium on Microarchitecture, pages 46-57, 1996.
-// http://portal.acm.org/citation.cfm?id=243857
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ball-larus-numbering"
-
-#include "llvm/Analysis/PathNumbering.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/TypeBuilder.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <queue>
-#include <sstream>
-#include <stack>
-#include <string>
-#include <utility>
-
-using namespace llvm;
-
-// Are we enabling early termination
-static cl::opt<bool> ProcessEarlyTermination(
- "path-profile-early-termination", cl::Hidden,
- cl::desc("In path profiling, insert extra instrumentation to account for "
- "unexpected function termination."));
-
-// Returns the basic block for the BallLarusNode
-BasicBlock* BallLarusNode::getBlock() {
- return(_basicBlock);
-}
-
-// Returns the number of paths to the exit starting at the node.
-unsigned BallLarusNode::getNumberPaths() {
- return(_numberPaths);
-}
-
-// Sets the number of paths to the exit starting at the node.
-void BallLarusNode::setNumberPaths(unsigned numberPaths) {
- _numberPaths = numberPaths;
-}
-
-// Gets the NodeColor used in graph algorithms.
-BallLarusNode::NodeColor BallLarusNode::getColor() {
- return(_color);
-}
-
-// Sets the NodeColor used in graph algorithms.
-void BallLarusNode::setColor(BallLarusNode::NodeColor color) {
- _color = color;
-}
-
-// Returns an iterator over predecessor edges. Includes phony and
-// backedges.
-BLEdgeIterator BallLarusNode::predBegin() {
- return(_predEdges.begin());
-}
-
-// Returns the end sentinel for the predecessor iterator.
-BLEdgeIterator BallLarusNode::predEnd() {
- return(_predEdges.end());
-}
-
-// Returns the number of predecessor edges. Includes phony and
-// backedges.
-unsigned BallLarusNode::getNumberPredEdges() {
- return(_predEdges.size());
-}
-
-// Returns an iterator over successor edges. Includes phony and
-// backedges.
-BLEdgeIterator BallLarusNode::succBegin() {
- return(_succEdges.begin());
-}
-
-// Returns the end sentinel for the successor iterator.
-BLEdgeIterator BallLarusNode::succEnd() {
- return(_succEdges.end());
-}
-
-// Returns the number of successor edges. Includes phony and
-// backedges.
-unsigned BallLarusNode::getNumberSuccEdges() {
- return(_succEdges.size());
-}
-
-// Add an edge to the predecessor list.
-void BallLarusNode::addPredEdge(BallLarusEdge* edge) {
- _predEdges.push_back(edge);
-}
-
-// Remove an edge from the predecessor list.
-void BallLarusNode::removePredEdge(BallLarusEdge* edge) {
- removeEdge(_predEdges, edge);
-}
-
-// Add an edge to the successor list.
-void BallLarusNode::addSuccEdge(BallLarusEdge* edge) {
- _succEdges.push_back(edge);
-}
-
-// Remove an edge from the successor list.
-void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) {
- removeEdge(_succEdges, edge);
-}
-
-// Returns the name of the BasicBlock being represented. If BasicBlock
-// is null then returns "<null>". If BasicBlock has no name, then
-// "<unnamed>" is returned. Intended for use with debug output.
-std::string BallLarusNode::getName() {
- std::stringstream name;
-
- if(getBlock() != NULL) {
- if(getBlock()->hasName()) {
- std::string tempName(getBlock()->getName());
- name << tempName.c_str() << " (" << _uid << ")";
- } else
- name << "<unnamed> (" << _uid << ")";
- } else
- name << "<null> (" << _uid << ")";
-
- return name.str();
-}
-
-// Removes an edge from an edgeVector. Used by removePredEdge and
-// removeSuccEdge.
-void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) {
- // TODO: Avoid linear scan by using a set instead
- for(BLEdgeIterator i = v.begin(),
- end = v.end();
- i != end;
- ++i) {
- if((*i) == e) {
- v.erase(i);
- break;
- }
- }
-}
-
-// Returns the source node of this edge.
-BallLarusNode* BallLarusEdge::getSource() const {
- return(_source);
-}
-
-// Returns the target node of this edge.
-BallLarusNode* BallLarusEdge::getTarget() const {
- return(_target);
-}
-
-// Sets the type of the edge.
-BallLarusEdge::EdgeType BallLarusEdge::getType() const {
- return _edgeType;
-}
-
-// Gets the type of the edge.
-void BallLarusEdge::setType(EdgeType type) {
- _edgeType = type;
-}
-
-// Returns the weight of this edge. Used to decode path numbers to sequences
-// of basic blocks.
-unsigned BallLarusEdge::getWeight() {
- return(_weight);
-}
-
-// Sets the weight of the edge. Used during path numbering.
-void BallLarusEdge::setWeight(unsigned weight) {
- _weight = weight;
-}
-
-// Gets the phony edge originating at the root.
-BallLarusEdge* BallLarusEdge::getPhonyRoot() {
- return _phonyRoot;
-}
-
-// Sets the phony edge originating at the root.
-void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) {
- _phonyRoot = phonyRoot;
-}
-
-// Gets the phony edge terminating at the exit.
-BallLarusEdge* BallLarusEdge::getPhonyExit() {
- return _phonyExit;
-}
-
-// Sets the phony edge terminating at the exit.
-void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) {
- _phonyExit = phonyExit;
-}
-
-// Gets the associated real edge if this is a phony edge.
-BallLarusEdge* BallLarusEdge::getRealEdge() {
- return _realEdge;
-}
-
-// Sets the associated real edge if this is a phony edge.
-void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) {
- _realEdge = realEdge;
-}
-
-// Returns the duplicate number of the edge.
-unsigned BallLarusEdge::getDuplicateNumber() {
- return(_duplicateNumber);
-}
-
-// Initialization that requires virtual functions which are not fully
-// functional in the constructor.
-void BallLarusDag::init() {
- BLBlockNodeMap inDag;
- std::stack<BallLarusNode*> dfsStack;
-
- _root = addNode(&(_function.getEntryBlock()));
- _exit = addNode(NULL);
-
- // start search from root
- dfsStack.push(getRoot());
-
- // dfs to add each bb into the dag
- while(dfsStack.size())
- buildNode(inDag, dfsStack);
-
- // put in the final edge
- addEdge(getExit(),getRoot(),0);
-}
-
-// Frees all memory associated with the DAG.
-BallLarusDag::~BallLarusDag() {
- for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end;
- ++edge)
- delete (*edge);
-
- for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end;
- ++node)
- delete (*node);
-}
-
-// Calculate the path numbers by assigning edge increments as prescribed
-// in Ball-Larus path profiling.
-void BallLarusDag::calculatePathNumbers() {
- BallLarusNode* node;
- std::queue<BallLarusNode*> bfsQueue;
- bfsQueue.push(getExit());
-
- while(bfsQueue.size() > 0) {
- node = bfsQueue.front();
-
- DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n");
-
- bfsQueue.pop();
- unsigned prevPathNumber = node->getNumberPaths();
- calculatePathNumbersFrom(node);
-
- // Check for DAG splitting
- if( node->getNumberPaths() > 100000000 && node != getRoot() ) {
- // Add new phony edge from the split-node to the DAG's exit
- BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
- exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
-
- // Counters to handle the possibility of a multi-graph
- BasicBlock* oldTarget = 0;
- unsigned duplicateNumber = 0;
-
- // Iterate through each successor edge, adding phony edges
- for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
- succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) {
-
- if( (*succ)->getType() == BallLarusEdge::NORMAL ) {
- // is this edge a duplicate?
- if( oldTarget != (*succ)->getTarget()->getBlock() )
- duplicateNumber = 0;
-
- // create the new phony edge: root -> succ
- BallLarusEdge* rootEdge =
- addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++);
- rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
- rootEdge->setRealEdge(*succ);
-
- // split on this edge and reference it's exit/root phony edges
- (*succ)->setType(BallLarusEdge::SPLITEDGE);
- (*succ)->setPhonyRoot(rootEdge);
- (*succ)->setPhonyExit(exitEdge);
- (*succ)->setWeight(0);
- }
- }
-
- calculatePathNumbersFrom(node);
- }
-
- DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", "
- << node->getNumberPaths() << ".\n");
-
- if(prevPathNumber == 0 && node->getNumberPaths() != 0) {
- DEBUG(dbgs() << "node ready : " << node->getName() << "\n");
- for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd();
- pred != end; pred++) {
- if( (*pred)->getType() == BallLarusEdge::BACKEDGE ||
- (*pred)->getType() == BallLarusEdge::SPLITEDGE )
- continue;
-
- BallLarusNode* nextNode = (*pred)->getSource();
- // not yet visited?
- if(nextNode->getNumberPaths() == 0)
- bfsQueue.push(nextNode);
- }
- }
- }
-
- DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n");
-}
-
-// Returns the number of paths for the Dag.
-unsigned BallLarusDag::getNumberOfPaths() {
- return(getRoot()->getNumberPaths());
-}
-
-// Returns the root (i.e. entry) node for the DAG.
-BallLarusNode* BallLarusDag::getRoot() {
- return _root;
-}
-
-// Returns the exit node for the DAG.
-BallLarusNode* BallLarusDag::getExit() {
- return _exit;
-}
-
-// Returns the function for the DAG.
-Function& BallLarusDag::getFunction() {
- return(_function);
-}
-
-// Clears the node colors.
-void BallLarusDag::clearColors(BallLarusNode::NodeColor color) {
- for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++)
- (*nodeIt)->setColor(color);
-}
-
-// Processes one node and its imediate edges for building the DAG.
-void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
- BallLarusNode* currentNode = dfsStack.top();
- BasicBlock* currentBlock = currentNode->getBlock();
-
- if(currentNode->getColor() != BallLarusNode::WHITE) {
- // we have already visited this node
- dfsStack.pop();
- currentNode->setColor(BallLarusNode::BLACK);
- } else {
- // are there any external procedure calls?
- if( ProcessEarlyTermination ) {
- for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(),
- bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd;
- bbCurrent++ ) {
- Instruction& instr = *bbCurrent;
- if( instr.getOpcode() == Instruction::Call ) {
- BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0);
- callEdge->setType(BallLarusEdge::CALLEDGE_PHONY);
- break;
- }
- }
- }
-
- TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
- if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) ||
- isa<ResumeInst>(terminator))
- addEdge(currentNode, getExit(),0);
-
- currentNode->setColor(BallLarusNode::GRAY);
- inDag[currentBlock] = currentNode;
-
- BasicBlock* oldSuccessor = 0;
- unsigned duplicateNumber = 0;
-
- // iterate through this node's successors
- for(succ_iterator successor = succ_begin(currentBlock),
- succEnd = succ_end(currentBlock); successor != succEnd;
- oldSuccessor = *successor, ++successor ) {
- BasicBlock* succBB = *successor;
-
- // is this edge a duplicate?
- if (oldSuccessor == succBB)
- duplicateNumber++;
- else
- duplicateNumber = 0;
-
- buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber);
- }
- }
-}
-
-// Process an edge in the CFG for DAG building.
-void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>&
- dfsStack, BallLarusNode* currentNode,
- BasicBlock* succBB, unsigned duplicateCount) {
- BallLarusNode* succNode = inDag[succBB];
-
- if(succNode && succNode->getColor() == BallLarusNode::BLACK) {
- // visited node and forward edge
- addEdge(currentNode, succNode, duplicateCount);
- } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) {
- // visited node and back edge
- DEBUG(dbgs() << "Backedge detected.\n");
- addBackedge(currentNode, succNode, duplicateCount);
- } else {
- BallLarusNode* childNode;
- // not visited node and forward edge
- if(succNode) // an unvisited node that is child of a gray node
- childNode = succNode;
- else { // an unvisited node that is a child of a an unvisted node
- childNode = addNode(succBB);
- inDag[succBB] = childNode;
- }
- addEdge(currentNode, childNode, duplicateCount);
- dfsStack.push(childNode);
- }
-}
-
-// The weight on each edge is the increment required along any path that
-// contains that edge.
-void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) {
- if(node == getExit())
- // The Exit node must be base case
- node->setNumberPaths(1);
- else {
- unsigned sumPaths = 0;
- BallLarusNode* succNode;
-
- for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
- succ != end; succ++) {
- if( (*succ)->getType() == BallLarusEdge::BACKEDGE ||
- (*succ)->getType() == BallLarusEdge::SPLITEDGE )
- continue;
-
- (*succ)->setWeight(sumPaths);
- succNode = (*succ)->getTarget();
-
- if( !succNode->getNumberPaths() )
- return;
- sumPaths += succNode->getNumberPaths();
- }
-
- node->setNumberPaths(sumPaths);
- }
-}
-
-// Allows subclasses to determine which type of Node is created.
-// Override this method to produce subclasses of BallLarusNode if
-// necessary. The destructor of BallLarusDag will call free on each
-// pointer created.
-BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) {
- return( new BallLarusNode(BB) );
-}
-
-// Allows subclasses to determine which type of Edge is created.
-// Override this method to produce subclasses of BallLarusEdge if
-// necessary. The destructor of BallLarusDag will call free on each
-// pointer created.
-BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source,
- BallLarusNode* target,
- unsigned duplicateCount) {
- return( new BallLarusEdge(source, target, duplicateCount) );
-}
-
-// Proxy to node's constructor. Updates the DAG state.
-BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) {
- BallLarusNode* newNode = createNode(BB);
- _nodes.push_back(newNode);
- return( newNode );
-}
-
-// Proxy to edge's constructor. Updates the DAG state.
-BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source,
- BallLarusNode* target,
- unsigned duplicateCount) {
- BallLarusEdge* newEdge = createEdge(source, target, duplicateCount);
- _edges.push_back(newEdge);
- source->addSuccEdge(newEdge);
- target->addPredEdge(newEdge);
- return(newEdge);
-}
-
-// Adds a backedge with its phony edges. Updates the DAG state.
-void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target,
- unsigned duplicateCount) {
- BallLarusEdge* childEdge = addEdge(source, target, duplicateCount);
- childEdge->setType(BallLarusEdge::BACKEDGE);
-
- childEdge->setPhonyRoot(addEdge(getRoot(), target,0));
- childEdge->setPhonyExit(addEdge(source, getExit(),0));
-
- childEdge->getPhonyRoot()->setRealEdge(childEdge);
- childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY);
-
- childEdge->getPhonyExit()->setRealEdge(childEdge);
- childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY);
- _backEdges.push_back(childEdge);
-}
diff --git a/lib/Analysis/PathProfileInfo.cpp b/lib/Analysis/PathProfileInfo.cpp
deleted file mode 100644
index bc53221..0000000
--- a/lib/Analysis/PathProfileInfo.cpp
+++ /dev/null
@@ -1,433 +0,0 @@
-//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface used by optimizers to load path profiles,
-// and provides a loader pass which reads a path profile file.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "path-profile-info"
-
-#include "llvm/Analysis/PathProfileInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdio>
-
-using namespace llvm;
-
-// command line option for loading path profiles
-static cl::opt<std::string>
-PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"),
- cl::value_desc("filename"),
- cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden);
-
-namespace {
- class PathProfileLoaderPass : public ModulePass, public PathProfileInfo {
- public:
- PathProfileLoaderPass() : ModulePass(ID) { }
- ~PathProfileLoaderPass();
-
- // this pass doesn't change anything (only loads information)
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
-
- // the full name of the loader pass
- virtual const char* getPassName() const {
- return "Path Profiling Information Loader";
- }
-
- // required since this pass implements multiple inheritance
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &PathProfileInfo::ID)
- return (PathProfileInfo*)this;
- return this;
- }
-
- // entry point to run the pass
- bool runOnModule(Module &M);
-
- // pass identification
- static char ID;
-
- private:
- // make a reference table to refer to function by number
- void buildFunctionRefs(Module &M);
-
- // process argument info of a program from the input file
- void handleArgumentInfo();
-
- // process path number information from the input file
- void handlePathInfo();
-
- // array of references to the functions in the module
- std::vector<Function*> _functions;
-
- // path profile file handle
- FILE* _file;
-
- // path profile file name
- std::string _filename;
- };
-}
-
-// register PathLoader
-char PathProfileLoaderPass::ID = 0;
-
-INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information",
- NoPathProfileInfo)
-INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo,
- "path-profile-loader",
- "Load path profile information from file",
- false, true, false)
-
-char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID;
-
-// link PathLoader as a pass, and make it available as an optimisation
-ModulePass *llvm::createPathProfileLoaderPass() {
- return new PathProfileLoaderPass;
-}
-
-// ----------------------------------------------------------------------------
-// PathEdge implementation
-//
-ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target,
- unsigned duplicateNumber)
- : _source(source), _target(target), _duplicateNumber(duplicateNumber) {}
-
-// ----------------------------------------------------------------------------
-// Path implementation
-//
-
-ProfilePath::ProfilePath (unsigned int number, unsigned int count,
- double countStdDev, PathProfileInfo* ppi)
- : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {}
-
-double ProfilePath::getFrequency() const {
- return 100 * double(_count) /
- double(_ppi->_functionPathCounts[_ppi->_currentFunction]);
-}
-
-static BallLarusEdge* getNextEdge (BallLarusNode* node,
- unsigned int pathNumber) {
- BallLarusEdge* best = 0;
-
- for( BLEdgeIterator next = node->succBegin(),
- end = node->succEnd(); next != end; next++ ) {
- if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges
- (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges
- (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber
- (!best || (best->getWeight() < (*next)->getWeight())) ) // best one?
- best = *next;
- }
-
- return best;
-}
-
-ProfilePathEdgeVector* ProfilePath::getPathEdges() const {
- BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
- unsigned int increment = _number;
- ProfilePathEdgeVector* pev = new ProfilePathEdgeVector;
-
- while (currentNode != _ppi->_currentDag->getExit()) {
- BallLarusEdge* next = getNextEdge(currentNode, increment);
-
- increment -= next->getWeight();
-
- if( next->getType() != BallLarusEdge::BACKEDGE_PHONY &&
- next->getType() != BallLarusEdge::SPLITEDGE_PHONY &&
- next->getTarget() != _ppi->_currentDag->getExit() )
- pev->push_back(ProfilePathEdge(
- next->getSource()->getBlock(),
- next->getTarget()->getBlock(),
- next->getDuplicateNumber()));
-
- if( next->getType() == BallLarusEdge::BACKEDGE_PHONY &&
- next->getTarget() == _ppi->_currentDag->getExit() )
- pev->push_back(ProfilePathEdge(
- next->getRealEdge()->getSource()->getBlock(),
- next->getRealEdge()->getTarget()->getBlock(),
- next->getDuplicateNumber()));
-
- if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY &&
- next->getSource() == _ppi->_currentDag->getRoot() )
- pev->push_back(ProfilePathEdge(
- next->getRealEdge()->getSource()->getBlock(),
- next->getRealEdge()->getTarget()->getBlock(),
- next->getDuplicateNumber()));
-
- // set the new node
- currentNode = next->getTarget();
- }
-
- return pev;
-}
-
-ProfilePathBlockVector* ProfilePath::getPathBlocks() const {
- BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
- unsigned int increment = _number;
- ProfilePathBlockVector* pbv = new ProfilePathBlockVector;
-
- while (currentNode != _ppi->_currentDag->getExit()) {
- BallLarusEdge* next = getNextEdge(currentNode, increment);
- increment -= next->getWeight();
-
- // add block to the block list if it is a real edge
- if( next->getType() == BallLarusEdge::NORMAL)
- pbv->push_back (currentNode->getBlock());
- // make the back edge the last edge since we are at the end
- else if( next->getTarget() == _ppi->_currentDag->getExit() ) {
- pbv->push_back (currentNode->getBlock());
- pbv->push_back (next->getRealEdge()->getTarget()->getBlock());
- }
-
- // set the new node
- currentNode = next->getTarget();
- }
-
- return pbv;
-}
-
-BasicBlock* ProfilePath::getFirstBlockInPath() const {
- BallLarusNode* root = _ppi->_currentDag->getRoot();
- BallLarusEdge* edge = getNextEdge(root, _number);
-
- if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
- edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) )
- return edge->getTarget()->getBlock();
-
- return root->getBlock();
-}
-
-// ----------------------------------------------------------------------------
-// PathProfileInfo implementation
-//
-
-// Pass identification
-char llvm::PathProfileInfo::ID = 0;
-
-PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) {
-}
-
-PathProfileInfo::~PathProfileInfo() {
- if (_currentDag)
- delete _currentDag;
-}
-
-// set the function for which paths are currently begin processed
-void PathProfileInfo::setCurrentFunction(Function* F) {
- // Make sure it exists
- if (!F) return;
-
- if (_currentDag)
- delete _currentDag;
-
- _currentFunction = F;
- _currentDag = new BallLarusDag(*F);
- _currentDag->init();
- _currentDag->calculatePathNumbers();
-}
-
-// get the function for which paths are currently being processed
-Function* PathProfileInfo::getCurrentFunction() const {
- return _currentFunction;
-}
-
-// get the entry block of the function
-BasicBlock* PathProfileInfo::getCurrentFunctionEntry() {
- return _currentDag->getRoot()->getBlock();
-}
-
-// return the path based on its number
-ProfilePath* PathProfileInfo::getPath(unsigned int number) {
- return _functionPaths[_currentFunction][number];
-}
-
-// return the number of paths which a function may potentially execute
-unsigned int PathProfileInfo::getPotentialPathCount() {
- return _currentDag ? _currentDag->getNumberOfPaths() : 0;
-}
-
-// return an iterator for the beginning of a functions executed paths
-ProfilePathIterator PathProfileInfo::pathBegin() {
- return _functionPaths[_currentFunction].begin();
-}
-
-// return an iterator for the end of a functions executed paths
-ProfilePathIterator PathProfileInfo::pathEnd() {
- return _functionPaths[_currentFunction].end();
-}
-
-// returns the total number of paths run in the function
-unsigned int PathProfileInfo::pathsRun() {
- return _currentFunction ? _functionPaths[_currentFunction].size() : 0;
-}
-
-// ----------------------------------------------------------------------------
-// PathLoader implementation
-//
-
-// remove all generated paths
-PathProfileLoaderPass::~PathProfileLoaderPass() {
- for( FunctionPathIterator funcNext = _functionPaths.begin(),
- funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++)
- for( ProfilePathIterator pathNext = funcNext->second.begin(),
- pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++)
- delete pathNext->second;
-}
-
-// entry point of the pass; this loads and parses a file
-bool PathProfileLoaderPass::runOnModule(Module &M) {
- // get the filename and setup the module's function references
- _filename = PathProfileInfoFilename;
- buildFunctionRefs (M);
-
- if (!(_file = fopen(_filename.c_str(), "rb"))) {
- errs () << "error: input '" << _filename << "' file does not exist.\n";
- return false;
- }
-
- ProfilingType profType;
-
- while( fread(&profType, sizeof(ProfilingType), 1, _file) ) {
- switch (profType) {
- case ArgumentInfo:
- handleArgumentInfo ();
- break;
- case PathInfo:
- handlePathInfo ();
- break;
- default:
- errs () << "error: bad path profiling file syntax, " << profType << "\n";
- fclose (_file);
- return false;
- }
- }
-
- fclose (_file);
-
- return true;
-}
-
-// create a reference table for functions defined in the path profile file
-void PathProfileLoaderPass::buildFunctionRefs (Module &M) {
- _functions.push_back(0); // make the 0 index a null pointer
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
- if (F->isDeclaration())
- continue;
- _functions.push_back(F);
- }
-}
-
-// handle command like argument infor in the output file
-void PathProfileLoaderPass::handleArgumentInfo() {
- // get the argument list's length
- unsigned savedArgsLength;
- if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) {
- errs() << "warning: argument info header/data mismatch\n";
- return;
- }
-
- // allocate a buffer, and get the arguments
- char* args = new char[savedArgsLength+1];
- if( fread(args, 1, savedArgsLength, _file) != savedArgsLength )
- errs() << "warning: argument info header/data mismatch\n";
-
- args[savedArgsLength] = '\0';
- argList = std::string(args);
- delete [] args; // cleanup dynamic string
-
- // byte alignment
- if (savedArgsLength & 3)
- fseek(_file, 4-(savedArgsLength&3), SEEK_CUR);
-}
-
-// Handle path profile information in the output file
-void PathProfileLoaderPass::handlePathInfo () {
- // get the number of functions in this profile
- unsigned functionCount;
- if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) {
- errs() << "warning: path info header/data mismatch\n";
- return;
- }
-
- // gather path information for each function
- for (unsigned i = 0; i < functionCount; i++) {
- PathProfileHeader pathHeader;
- if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) {
- errs() << "warning: bad header for path function info\n";
- break;
- }
-
- Function* f = _functions[pathHeader.fnNumber];
-
- // dynamically allocate a table to store path numbers
- PathProfileTableEntry* pathTable =
- new PathProfileTableEntry[pathHeader.numEntries];
-
- if( fread(pathTable, sizeof(PathProfileTableEntry),
- pathHeader.numEntries, _file) != pathHeader.numEntries) {
- delete [] pathTable;
- errs() << "warning: path function info header/data mismatch\n";
- return;
- }
-
- // Build a new path for the current function
- unsigned int totalPaths = 0;
- for (unsigned int j = 0; j < pathHeader.numEntries; j++) {
- totalPaths += pathTable[j].pathCounter;
- _functionPaths[f][pathTable[j].pathNumber]
- = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter,
- 0, this);
- }
-
- _functionPathCounts[f] = totalPaths;
-
- delete [] pathTable;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// NoProfile PathProfileInfo implementation
-//
-
-namespace {
- struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo {
- static char ID; // Class identification, replacement for typeinfo
- NoPathProfileInfo() : ImmutablePass(ID) {
- initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry());
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &PathProfileInfo::ID)
- return (PathProfileInfo*)this;
- return this;
- }
-
- virtual const char *getPassName() const {
- return "NoPathProfileInfo";
- }
- };
-} // End of anonymous namespace
-
-char NoPathProfileInfo::ID = 0;
-// Register this pass...
-INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile",
- "No Path Profile Information", false, true, true)
-
-ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); }
diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp
deleted file mode 100644
index 48d7d05..0000000
--- a/lib/Analysis/PathProfileVerifier.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This verifier derives an edge profile file from current path profile
-// information
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "path-profile-verifier"
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/PathProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <stdio.h>
-
-using namespace llvm;
-
-namespace {
- class PathProfileVerifier : public ModulePass {
- private:
- bool runOnModule(Module &M);
-
- public:
- static char ID; // Pass identification, replacement for typeid
- PathProfileVerifier() : ModulePass(ID) {
- initializePathProfileVerifierPass(*PassRegistry::getPassRegistry());
- }
-
-
- virtual const char *getPassName() const {
- return "Path Profiler Verifier";
- }
-
- // The verifier requires the path profile and edge profile.
- virtual void getAnalysisUsage(AnalysisUsage& AU) const;
- };
-}
-
-static cl::opt<std::string>
-EdgeProfileFilename("path-profile-verifier-file",
- cl::init("edgefrompath.llvmprof.out"),
- cl::value_desc("filename"),
- cl::desc("Edge profile file generated by -path-profile-verifier"),
- cl::Hidden);
-
-char PathProfileVerifier::ID = 0;
-INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier",
- "Compare the path profile derived edge profile against the "
- "edge profile.", true, true)
-
-ModulePass *llvm::createPathProfileVerifierPass() {
- return new PathProfileVerifier();
-}
-
-// The verifier requires the path profile and edge profile.
-void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const {
- AU.addRequired<PathProfileInfo>();
- AU.addPreserved<PathProfileInfo>();
-}
-
-typedef std::map<unsigned, unsigned> DuplicateToIndexMap;
-typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap;
-typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap;
-
-// the verifier iterates through each path to gather the total
-// number of edge frequencies
-bool PathProfileVerifier::runOnModule (Module &M) {
- PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>();
-
- // setup a data structure to map path edges which index an
- // array of edge counters
- NestedBlockToIndexMap arrayMap;
- unsigned i = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
-
- arrayMap[(BasicBlock*)0][F->begin()][0] = i++;
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
-
- unsigned duplicate = 0;
- BasicBlock* prev = 0;
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e;
- prev = TI->getSuccessor(s), ++s) {
- if (prev == TI->getSuccessor(s))
- duplicate++;
- else duplicate = 0;
-
- arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++;
- }
- }
- }
-
- std::vector<unsigned> edgeArray(i);
-
- // iterate through each path and increment the edge counters as needed
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
-
- pathProfileInfo.setCurrentFunction(F);
-
- DEBUG(dbgs() << "function '" << F->getName() << "' ran "
- << pathProfileInfo.pathsRun()
- << "/" << pathProfileInfo.getPotentialPathCount()
- << " potential paths\n");
-
- for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(),
- endPath = pathProfileInfo.pathEnd();
- nextPath != endPath; nextPath++ ) {
- ProfilePath* currentPath = nextPath->second;
-
- ProfilePathEdgeVector* pev = currentPath->getPathEdges();
- DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
- << currentPath->getCount() << "\n");
- // setup the entry edge (normally path profiling doesn't care about this)
- if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
- edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]]
- += currentPath->getCount();
-
- for( ProfilePathEdgeIterator nextEdge = pev->begin(),
- endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) {
- if (nextEdge != pev->begin())
- DEBUG(dbgs() << " :: ");
-
- BasicBlock* source = nextEdge->getSource();
- BasicBlock* target = nextEdge->getTarget();
- unsigned duplicateNumber = nextEdge->getDuplicateNumber();
- DEBUG(dbgs() << source->getName() << " --{" << duplicateNumber
- << "}--> " << target->getName());
-
- // Ensure all the referenced edges exist
- // TODO: make this a separate function
- if( !arrayMap.count(source) ) {
- errs() << " error [" << F->getName() << "()]: source '"
- << source->getName()
- << "' does not exist in the array map.\n";
- } else if( !arrayMap[source].count(target) ) {
- errs() << " error [" << F->getName() << "()]: target '"
- << target->getName()
- << "' does not exist in the array map.\n";
- } else if( !arrayMap[source][target].count(duplicateNumber) ) {
- errs() << " error [" << F->getName() << "()]: edge "
- << source->getName() << " -> " << target->getName()
- << " duplicate number " << duplicateNumber
- << " does not exist in the array map.\n";
- } else {
- edgeArray[arrayMap[source][target][duplicateNumber]]
- += currentPath->getCount();
- }
- }
-
- DEBUG(errs() << "\n");
-
- delete pev;
- }
- }
-
- std::string errorInfo;
- std::string filename = EdgeProfileFilename;
-
- // Open a handle to the file
- FILE* edgeFile = fopen(filename.c_str(),"wb");
-
- if (!edgeFile) {
- errs() << "error: unable to open file '" << filename << "' for output.\n";
- return false;
- }
-
- errs() << "Generating edge profile '" << filename << "' ...\n";
-
- // write argument info
- unsigned type = ArgumentInfo;
- unsigned num = pathProfileInfo.argList.size();
- int zeros = 0;
-
- fwrite(&type,sizeof(unsigned),1,edgeFile);
- fwrite(&num,sizeof(unsigned),1,edgeFile);
- fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile);
- if (num&3)
- fwrite(&zeros, 1, 4-(num&3), edgeFile);
-
- type = EdgeInfo;
- num = edgeArray.size();
- fwrite(&type,sizeof(unsigned),1,edgeFile);
- fwrite(&num,sizeof(unsigned),1,edgeFile);
-
- // write each edge to the file
- for( std::vector<unsigned>::iterator s = edgeArray.begin(),
- e = edgeArray.end(); s != e; s++)
- fwrite(&*s, sizeof (unsigned), 1, edgeFile);
-
- fclose (edgeFile);
-
- return true;
-}
diff --git a/lib/Analysis/ProfileDataLoader.cpp b/lib/Analysis/ProfileDataLoader.cpp
deleted file mode 100644
index 3d0a1e2..0000000
--- a/lib/Analysis/ProfileDataLoader.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- ProfileDataLoader.cpp - Load profile information from disk ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The ProfileDataLoader class is used to load raw profiling data from the dump
-// file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/ProfileDataLoader.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Analysis/ProfileDataTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-#include <cstdio>
-#include <cstdlib>
-using namespace llvm;
-
-raw_ostream &llvm::operator<<(raw_ostream &O, std::pair<const BasicBlock *,
- const BasicBlock *> E) {
- O << "(";
-
- if (E.first)
- O << E.first->getName();
- else
- O << "0";
-
- O << ",";
-
- if (E.second)
- O << E.second->getName();
- else
- O << "0";
-
- return O << ")";
-}
-
-/// AddCounts - Add 'A' and 'B', accounting for the fact that the value of one
-/// (or both) may not be defined.
-static unsigned AddCounts(unsigned A, unsigned B) {
- // If either value is undefined, use the other.
- // Undefined + undefined = undefined.
- if (A == ProfileDataLoader::Uncounted) return B;
- if (B == ProfileDataLoader::Uncounted) return A;
-
- return A + B;
-}
-
-/// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F'
-template <typename T>
-static void ReadProfilingData(const char *ToolName, FILE *F,
- T *Data, size_t NumEntries) {
- // Read in the block of data...
- if (fread(Data, sizeof(T), NumEntries, F) != NumEntries)
- report_fatal_error(Twine(ToolName) + ": Profiling data truncated");
-}
-
-/// ReadProfilingNumEntries - Read how many entries are in this profiling data
-/// packet.
-static unsigned ReadProfilingNumEntries(const char *ToolName, FILE *F,
- bool ShouldByteSwap) {
- unsigned Entry;
- ReadProfilingData<unsigned>(ToolName, F, &Entry, 1);
- return ShouldByteSwap ? ByteSwap_32(Entry) : Entry;
-}
-
-/// ReadProfilingBlock - Read the number of entries in the next profiling data
-/// packet and then accumulate the entries into 'Data'.
-static void ReadProfilingBlock(const char *ToolName, FILE *F,
- bool ShouldByteSwap,
- SmallVectorImpl<unsigned> &Data) {
- // Read the number of entries...
- unsigned NumEntries = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap);
-
- // Read in the data.
- SmallVector<unsigned, 8> TempSpace(NumEntries);
- ReadProfilingData<unsigned>(ToolName, F, TempSpace.data(), NumEntries);
-
- // Make sure we have enough space ...
- if (Data.size() < NumEntries)
- Data.resize(NumEntries, ProfileDataLoader::Uncounted);
-
- // Accumulate the data we just read into the existing data.
- for (unsigned i = 0; i < NumEntries; ++i) {
- unsigned Entry = ShouldByteSwap ? ByteSwap_32(TempSpace[i]) : TempSpace[i];
- Data[i] = AddCounts(Entry, Data[i]);
- }
-}
-
-/// ReadProfilingArgBlock - Read the command line arguments that the progam was
-/// run with when the current profiling data packet(s) were generated.
-static void ReadProfilingArgBlock(const char *ToolName, FILE *F,
- bool ShouldByteSwap,
- SmallVectorImpl<std::string> &CommandLines) {
- // Read the number of bytes ...
- unsigned ArgLength = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap);
-
- // Read in the arguments (if there are any to read). Round up the length to
- // the nearest 4-byte multiple.
- SmallVector<char, 8> Args(ArgLength+4);
- if (ArgLength)
- ReadProfilingData<char>(ToolName, F, Args.data(), (ArgLength+3) & ~3);
-
- // Store the arguments.
- CommandLines.push_back(std::string(&Args[0], &Args[ArgLength]));
-}
-
-const unsigned ProfileDataLoader::Uncounted = ~0U;
-
-/// ProfileDataLoader ctor - Read the specified profiling data file, reporting
-/// a fatal error if the file is invalid or broken.
-ProfileDataLoader::ProfileDataLoader(const char *ToolName,
- const std::string &Filename)
- : Filename(Filename) {
- FILE *F = fopen(Filename.c_str(), "rb");
- if (F == 0)
- report_fatal_error(Twine(ToolName) + ": Error opening '" +
- Filename + "': ");
-
- // Keep reading packets until we run out of them.
- unsigned PacketType;
- while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
- // If the low eight bits of the packet are zero, we must be dealing with an
- // endianness mismatch. Byteswap all words read from the profiling
- // information. This can happen when the compiler host and target have
- // different endianness.
- bool ShouldByteSwap = (char)PacketType == 0;
- PacketType = ShouldByteSwap ? ByteSwap_32(PacketType) : PacketType;
-
- switch (PacketType) {
- case ArgumentInfo:
- ReadProfilingArgBlock(ToolName, F, ShouldByteSwap, CommandLines);
- break;
-
- case EdgeInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
- break;
-
- default:
- report_fatal_error(std::string(ToolName)
- + ": Unknown profiling packet type");
- break;
- }
- }
-
- fclose(F);
-}
diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp
deleted file mode 100644
index 2ee0093..0000000
--- a/lib/Analysis/ProfileDataLoaderPass.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-//===- ProfileDataLoaderPass.cpp - Set branch weight metadata from prof ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass loads profiling data from a dump file and sets branch weight
-// metadata.
-//
-// TODO: Replace all "profile-metadata-loader" strings with "profile-loader"
-// once ProfileInfo etc. has been removed.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-metadata-loader"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileDataLoader.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(NumEdgesRead, "The # of edges read.");
-STATISTIC(NumTermsAnnotated, "The # of terminator instructions annotated.");
-
-static cl::opt<std::string>
-ProfileMetadataFilename("profile-file", cl::init("llvmprof.out"),
- cl::value_desc("filename"),
- cl::desc("Profile file loaded by -profile-metadata-loader"));
-
-namespace {
- /// This pass loads profiling data from a dump file and sets branch weight
- /// metadata.
- class ProfileMetadataLoaderPass : public ModulePass {
- std::string Filename;
- public:
- static char ID; // Class identification, replacement for typeinfo
- explicit ProfileMetadataLoaderPass(const std::string &filename = "")
- : ModulePass(ID), Filename(filename) {
- initializeProfileMetadataLoaderPassPass(*PassRegistry::getPassRegistry());
- if (filename.empty()) Filename = ProfileMetadataFilename;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
-
- virtual const char *getPassName() const {
- return "Profile loader";
- }
-
- virtual void readEdge(unsigned, ProfileData&, ProfileData::Edge,
- ArrayRef<unsigned>);
- virtual unsigned matchEdges(Module&, ProfileData&, ArrayRef<unsigned>);
- virtual void setBranchWeightMetadata(Module&, ProfileData&);
-
- virtual bool runOnModule(Module &M);
- };
-} // End of anonymous namespace
-
-char ProfileMetadataLoaderPass::ID = 0;
-INITIALIZE_PASS_BEGIN(ProfileMetadataLoaderPass, "profile-metadata-loader",
- "Load profile information from llvmprof.out", false, true)
-INITIALIZE_PASS_END(ProfileMetadataLoaderPass, "profile-metadata-loader",
- "Load profile information from llvmprof.out", false, true)
-
-char &llvm::ProfileMetadataLoaderPassID = ProfileMetadataLoaderPass::ID;
-
-/// createProfileMetadataLoaderPass - This function returns a Pass that loads
-/// the profiling information for the module from the specified filename,
-/// making it available to the optimizers.
-ModulePass *llvm::createProfileMetadataLoaderPass() {
- return new ProfileMetadataLoaderPass();
-}
-ModulePass *llvm::createProfileMetadataLoaderPass(const std::string &Filename) {
- return new ProfileMetadataLoaderPass(Filename);
-}
-
-/// readEdge - Take the value from a profile counter and assign it to an edge.
-void ProfileMetadataLoaderPass::readEdge(unsigned ReadCount,
- ProfileData &PB, ProfileData::Edge e,
- ArrayRef<unsigned> Counters) {
- if (ReadCount >= Counters.size()) return;
-
- unsigned weight = Counters[ReadCount];
- assert(weight != ProfileDataLoader::Uncounted);
- PB.addEdgeWeight(e, weight);
-
- DEBUG(dbgs() << "-- Read Edge Counter for " << e
- << " (# "<< (ReadCount) << "): "
- << PB.getEdgeWeight(e) << "\n");
-}
-
-/// matchEdges - Link every profile counter with an edge.
-unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB,
- ArrayRef<unsigned> Counters) {
- if (Counters.size() == 0) return 0;
-
- unsigned ReadCount = 0;
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n");
- readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)),
- Counters);
- }
- }
- }
-
- return ReadCount;
-}
-
-/// setBranchWeightMetadata - Translate the counter values associated with each
-/// edge into branch weights for each conditional branch (a branch with 2 or
-/// more desinations).
-void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M,
- ProfileData &PB) {
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n");
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- unsigned NumSuccessors = TI->getNumSuccessors();
-
- // If there is only one successor then we can not set a branch
- // probability as the target is certain.
- if (NumSuccessors < 2) continue;
-
- // Load the weights of all edges leading from this terminator.
- DEBUG(dbgs() << "-- Terminator with " << NumSuccessors
- << " successors:\n");
- SmallVector<uint32_t, 4> Weights(NumSuccessors);
- for (unsigned s = 0 ; s < NumSuccessors ; ++s) {
- ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s));
- Weights[s] = (uint32_t)PB.getEdgeWeight(edge);
- DEBUG(dbgs() << "---- Edge '" << edge << "' has weight "
- << Weights[s] << "\n");
- }
-
- // Set branch weight metadata. This will set branch probabilities of
- // 100%/0% if that is true of the dynamic execution.
- // BranchProbabilityInfo can account for this when it loads this metadata
- // (it gives the unexectuted branch a weight of 1 for the purposes of
- // probability calculations).
- MDBuilder MDB(TI->getContext());
- MDNode *Node = MDB.createBranchWeights(Weights);
- TI->setMetadata(LLVMContext::MD_prof, Node);
- NumTermsAnnotated++;
- }
- }
-}
-
-bool ProfileMetadataLoaderPass::runOnModule(Module &M) {
- ProfileDataLoader PDL("profile-data-loader", Filename);
- ProfileData PB;
-
- ArrayRef<unsigned> Counters = PDL.getRawEdgeCounts();
-
- unsigned ReadCount = matchEdges(M, PB, Counters);
-
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- NumEdgesRead = ReadCount;
-
- setBranchWeightMetadata(M, PB);
-
- return ReadCount > 0;
-}
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
deleted file mode 100644
index 365b64c..0000000
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ /dev/null
@@ -1,426 +0,0 @@
-//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a concrete implementation of profiling information that
-// estimates the profiling information in a very crude and unimaginative way.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-estimator"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-static cl::opt<double>
-LoopWeight(
- "profile-estimator-loop-weight", cl::init(10),
- cl::value_desc("loop-weight"),
- cl::desc("Number of loop executions used for profile-estimator")
-);
-
-namespace {
- class ProfileEstimatorPass : public FunctionPass, public ProfileInfo {
- double ExecCount;
- LoopInfo *LI;
- std::set<BasicBlock*> BBToVisit;
- std::map<Loop*,double> LoopExitWeights;
- std::map<Edge,double> MinimalWeight;
- public:
- static char ID; // Class identification, replacement for typeinfo
- explicit ProfileEstimatorPass(const double execcount = 0)
- : FunctionPass(ID), ExecCount(execcount) {
- initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry());
- if (execcount == 0) ExecCount = LoopWeight;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<LoopInfo>();
- }
-
- virtual const char *getPassName() const {
- return "Profiling information estimator";
- }
-
- /// run - Estimate the profile information from the specified file.
- virtual bool runOnFunction(Function &F);
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &ProfileInfo::ID)
- return (ProfileInfo*)this;
- return this;
- }
-
- virtual void recurseBasicBlock(BasicBlock *BB);
-
- void inline printEdgeWeight(Edge);
- };
-} // End of anonymous namespace
-
-char ProfileEstimatorPass::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
- "Estimate profiling information", false, true, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
- "Estimate profiling information", false, true, false)
-
-namespace llvm {
- char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
-
- FunctionPass *createProfileEstimatorPass() {
- return new ProfileEstimatorPass();
- }
-
- /// createProfileEstimatorPass - This function returns a Pass that estimates
- /// profiling information using the given loop execution count.
- Pass *createProfileEstimatorPass(const unsigned execcount) {
- return new ProfileEstimatorPass(execcount);
- }
-}
-
-static double ignoreMissing(double w) {
- if (w == ProfileInfo::MissingValue) return 0;
- return w;
-}
-
-static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
- DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n");
-}
-
-void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
- DEBUG(dbgs() << "-- Weight of Edge " << E << ":"
- << format("%20.20g", getEdgeWeight(E)) << "\n");
-}
-
-// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
-// single block and then recurses into the successors.
-// The algorithm preserves the flow condition, meaning that the sum of the
-// weight of the incoming edges must be equal the block weight which must in
-// turn be equal to the sume of the weights of the outgoing edges.
-// Since the flow of an block is deterimined from the current state of the
-// flow, once an edge has a flow assigned this flow is never changed again,
-// otherwise it would be possible to violate the flow condition in another
-// block.
-void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
-
- // Break the recursion if this BasicBlock was already visited.
- if (BBToVisit.find(BB) == BBToVisit.end()) return;
-
- // Read the LoopInfo for this block.
- bool BBisHeader = LI->isLoopHeader(BB);
- Loop* BBLoop = LI->getLoopFor(BB);
-
- // To get the block weight, read all incoming edges.
- double BBWeight = 0;
- std::set<BasicBlock*> ProcessedPreds;
- for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- bbi != bbe; ++bbi ) {
- // If this block was not considered already, add weight.
- Edge edge = getEdge(*bbi,BB);
- double w = getEdgeWeight(edge);
- if (ProcessedPreds.insert(*bbi).second) {
- BBWeight += ignoreMissing(w);
- }
- // If this block is a loop header and the predecessor is contained in this
- // loop, thus the edge is a backedge, continue and do not check if the
- // value is valid.
- if (BBisHeader && BBLoop->contains(*bbi)) {
- printEdgeError(edge, "but is backedge, continuing");
- continue;
- }
- // If the edges value is missing (and this is no loop header, and this is
- // no backedge) return, this block is currently non estimatable.
- if (w == MissingValue) {
- printEdgeError(edge, "returning");
- return;
- }
- }
- if (getExecutionCount(BB) != MissingValue) {
- BBWeight = getExecutionCount(BB);
- }
-
- // Fetch all necessary information for current block.
- SmallVector<Edge, 8> ExitEdges;
- SmallVector<Edge, 8> Edges;
- if (BBLoop) {
- BBLoop->getExitEdges(ExitEdges);
- }
-
- // If this is a loop header, consider the following:
- // Exactly the flow that is entering this block, must exit this block too. So
- // do the following:
- // *) get all the exit edges, read the flow that is already leaving this
- // loop, remember the edges that do not have any flow on them right now.
- // (The edges that have already flow on them are most likely exiting edges of
- // other loops, do not touch those flows because the previously caclulated
- // loopheaders would not be exact anymore.)
- // *) In case there is not a single exiting edge left, create one at the loop
- // latch to prevent the flow from building up in the loop.
- // *) Take the flow that is not leaving the loop already and distribute it on
- // the remaining exiting edges.
- // (This ensures that all flow that enters the loop also leaves it.)
- // *) Increase the flow into the loop by increasing the weight of this block.
- // There is at least one incoming backedge that will bring us this flow later
- // on. (So that the flow condition in this node is valid again.)
- if (BBisHeader) {
- double incoming = BBWeight;
- // Subtract the flow leaving the loop.
- std::set<Edge> ProcessedExits;
- for (SmallVectorImpl<Edge>::iterator ei = ExitEdges.begin(),
- ee = ExitEdges.end(); ei != ee; ++ei) {
- if (ProcessedExits.insert(*ei).second) {
- double w = getEdgeWeight(*ei);
- if (w == MissingValue) {
- Edges.push_back(*ei);
- // Check if there is a necessary minimal weight, if yes, subtract it
- // from weight.
- if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
- incoming -= MinimalWeight[*ei];
- DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
- }
- } else {
- incoming -= w;
- }
- }
- }
- // If no exit edges, create one:
- if (Edges.size() == 0) {
- BasicBlock *Latch = BBLoop->getLoopLatch();
- if (Latch) {
- Edge edge = getEdge(Latch,0);
- EdgeInformation[BB->getParent()][edge] = BBWeight;
- printEdgeWeight(edge);
- edge = getEdge(Latch, BB);
- EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
- printEdgeWeight(edge);
- }
- }
-
- // Distribute remaining weight to the exting edges. To prevent fractions
- // from building up and provoking precision problems the weight which is to
- // be distributed is split and the rounded, the last edge gets a somewhat
- // bigger value, but we are close enough for an estimation.
- double fraction = floor(incoming/Edges.size());
- for (SmallVectorImpl<Edge>::iterator ei = Edges.begin(), ee = Edges.end();
- ei != ee; ++ei) {
- double w = 0;
- if (ei != (ee-1)) {
- w = fraction;
- incoming -= fraction;
- } else {
- w = incoming;
- }
- EdgeInformation[BB->getParent()][*ei] += w;
- // Read necessary minimal weight.
- if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
- EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
- DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
- }
- printEdgeWeight(*ei);
-
- // Add minimal weight to paths to all exit edges, this is used to ensure
- // that enough flow is reaching this edges.
- Path p;
- const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
- while (Dest != BB) {
- const BasicBlock *Parent = p.find(Dest)->second;
- Edge e = getEdge(Parent, Dest);
- if (MinimalWeight.find(e) == MinimalWeight.end()) {
- MinimalWeight[e] = 0;
- }
- MinimalWeight[e] += w;
- DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
- Dest = Parent;
- }
- }
- // Increase flow into the loop.
- BBWeight *= (ExecCount+1);
- }
-
- BlockInformation[BB->getParent()][BB] = BBWeight;
- // Up until now we considered only the loop exiting edges, now we have a
- // definite block weight and must distribute this onto the outgoing edges.
- // Since there may be already flow attached to some of the edges, read this
- // flow first and remember the edges that have still now flow attached.
- Edges.clear();
- std::set<BasicBlock*> ProcessedSuccs;
-
- succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- // Also check for (BB,0) edges that may already contain some flow. (But only
- // in case there are no successors.)
- if (bbi == bbe) {
- Edge edge = getEdge(BB,0);
- EdgeInformation[BB->getParent()][edge] = BBWeight;
- printEdgeWeight(edge);
- }
- for ( ; bbi != bbe; ++bbi ) {
- if (ProcessedSuccs.insert(*bbi).second) {
- Edge edge = getEdge(BB,*bbi);
- double w = getEdgeWeight(edge);
- if (w != MissingValue) {
- BBWeight -= getEdgeWeight(edge);
- } else {
- Edges.push_back(edge);
- // If minimal weight is necessary, reserve weight by subtracting weight
- // from block weight, this is readded later on.
- if (MinimalWeight.find(edge) != MinimalWeight.end()) {
- BBWeight -= MinimalWeight[edge];
- DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
- }
- }
- }
- }
-
- double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0;
- // Finally we know what flow is still not leaving the block, distribute this
- // flow onto the empty edges.
- for (SmallVectorImpl<Edge>::iterator ei = Edges.begin(), ee = Edges.end();
- ei != ee; ++ei) {
- if (ei != (ee-1)) {
- EdgeInformation[BB->getParent()][*ei] += fraction;
- BBWeight -= fraction;
- } else {
- EdgeInformation[BB->getParent()][*ei] += BBWeight;
- }
- // Readd minial necessary weight.
- if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
- EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
- DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
- }
- printEdgeWeight(*ei);
- }
-
- // This block is visited, mark this before the recursion.
- BBToVisit.erase(BB);
-
- // Recurse into successors.
- for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi) {
- recurseBasicBlock(*bbi);
- }
-}
-
-bool ProfileEstimatorPass::runOnFunction(Function &F) {
- if (F.isDeclaration()) return false;
-
- // Fetch LoopInfo and clear ProfileInfo for this function.
- LI = &getAnalysis<LoopInfo>();
- FunctionInformation.erase(&F);
- BlockInformation[&F].clear();
- EdgeInformation[&F].clear();
- BBToVisit.clear();
-
- // Mark all blocks as to visit.
- for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
- BBToVisit.insert(bi);
-
- // Clear Minimal Edges.
- MinimalWeight.clear();
-
- DEBUG(dbgs() << "Working on function " << F.getName() << "\n");
-
- // Since the entry block is the first one and has no predecessors, the edge
- // (0,entry) is inserted with the starting weight of 1.
- BasicBlock *entry = &F.getEntryBlock();
- BlockInformation[&F][entry] = pow(2.0, 32.0);
- Edge edge = getEdge(0,entry);
- EdgeInformation[&F][edge] = BlockInformation[&F][entry];
- printEdgeWeight(edge);
-
- // Since recurseBasicBlock() maybe returns with a block which was not fully
- // estimated, use recurseBasicBlock() until everything is calculated.
- bool cleanup = false;
- recurseBasicBlock(entry);
- while (BBToVisit.size() > 0 && !cleanup) {
- // Remember number of open blocks, this is later used to check if progress
- // was made.
- unsigned size = BBToVisit.size();
-
- // Try to calculate all blocks in turn.
- for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(),
- be = BBToVisit.end(); bi != be; ++bi) {
- recurseBasicBlock(*bi);
- // If at least one block was finished, break because iterator may be
- // invalid.
- if (BBToVisit.size() < size) break;
- }
-
- // If there was not a single block resolved, make some assumptions.
- if (BBToVisit.size() == size) {
- bool found = false;
- for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end();
- (BBI != BBE) && (!found); ++BBI) {
- BasicBlock *BB = *BBI;
- // Try each predecessor if it can be assumend.
- for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- (bbi != bbe) && (!found); ++bbi) {
- Edge e = getEdge(*bbi,BB);
- double w = getEdgeWeight(e);
- // Check that edge from predecessor is still free.
- if (w == MissingValue) {
- // Check if there is a circle from this block to predecessor.
- Path P;
- const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest);
- if (Dest != *bbi) {
- // If there is no circle, just set edge weight to 0
- EdgeInformation[&F][e] = 0;
- DEBUG(dbgs() << "Assuming edge weight: ");
- printEdgeWeight(e);
- found = true;
- }
- }
- }
- }
- if (!found) {
- cleanup = true;
- DEBUG(dbgs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n");
- }
- }
- }
- // In case there was no safe way to assume edges, set as a last measure,
- // set _everything_ to zero.
- if (cleanup) {
- FunctionInformation[&F] = 0;
- BlockInformation[&F].clear();
- EdgeInformation[&F].clear();
- for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
- const BasicBlock *BB = &(*FI);
- BlockInformation[&F][BB] = 0;
- const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB);
- if (predi == prede) {
- Edge e = getEdge(0,BB);
- setEdgeWeight(e,0);
- }
- for (;predi != prede; ++predi) {
- Edge e = getEdge(*predi,BB);
- setEdgeWeight(e,0);
- }
- succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB);
- if (succi == succe) {
- Edge e = getEdge(BB,0);
- setEdgeWeight(e,0);
- }
- for (;succi != succe; ++succi) {
- Edge e = getEdge(*succi,BB);
- setEdgeWeight(e,0);
- }
- }
- }
-
- return false;
-}
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
deleted file mode 100644
index 9626a48..0000000
--- a/lib/Analysis/ProfileInfo.cpp
+++ /dev/null
@@ -1,1079 +0,0 @@
-//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the abstract ProfileInfo interface, and the default
-// "no profile" implementation.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-info"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include <limits>
-#include <queue>
-#include <set>
-using namespace llvm;
-
-namespace llvm {
- template<> char ProfileInfoT<Function,BasicBlock>::ID = 0;
-}
-
-// Register the ProfileInfo interface, providing a nice name to refer to.
-INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo)
-
-namespace llvm {
-
-template <>
-ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {}
-template <>
-ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {}
-
-template <>
-ProfileInfoT<Function, BasicBlock>::ProfileInfoT() {
- MachineProfile = 0;
-}
-template <>
-ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
- if (MachineProfile) delete MachineProfile;
-}
-
-template<>
-char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
-
-template<>
-const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1;
-
-template<> const
-double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1;
-
-template<> double
-ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
- std::map<const Function*, BlockCounts>::iterator J =
- BlockInformation.find(BB->getParent());
- if (J != BlockInformation.end()) {
- BlockCounts::iterator I = J->second.find(BB);
- if (I != J->second.end())
- return I->second;
- }
-
- double Count = MissingValue;
-
- const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-
- // Are there zero predecessors of this block?
- if (PI == PE) {
- Edge e = getEdge(0, BB);
- Count = getEdgeWeight(e);
- } else {
- // Otherwise, if there are predecessors, the execution count of this block is
- // the sum of the edge frequencies from the incoming edges.
- std::set<const BasicBlock*> ProcessedPreds;
- Count = 0;
- for (; PI != PE; ++PI) {
- const BasicBlock *P = *PI;
- if (ProcessedPreds.insert(P).second) {
- double w = getEdgeWeight(getEdge(P, BB));
- if (w == MissingValue) {
- Count = MissingValue;
- break;
- }
- Count += w;
- }
- }
- }
-
- // If the predecessors did not suffice to get block weight, try successors.
- if (Count == MissingValue) {
-
- succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
-
- // Are there zero successors of this block?
- if (SI == SE) {
- Edge e = getEdge(BB,0);
- Count = getEdgeWeight(e);
- } else {
- std::set<const BasicBlock*> ProcessedSuccs;
- Count = 0;
- for (; SI != SE; ++SI)
- if (ProcessedSuccs.insert(*SI).second) {
- double w = getEdgeWeight(getEdge(BB, *SI));
- if (w == MissingValue) {
- Count = MissingValue;
- break;
- }
- Count += w;
- }
- }
- }
-
- if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
- return Count;
-}
-
-template<>
-double ProfileInfoT<MachineFunction, MachineBasicBlock>::
- getExecutionCount(const MachineBasicBlock *MBB) {
- std::map<const MachineFunction*, BlockCounts>::iterator J =
- BlockInformation.find(MBB->getParent());
- if (J != BlockInformation.end()) {
- BlockCounts::iterator I = J->second.find(MBB);
- if (I != J->second.end())
- return I->second;
- }
-
- return MissingValue;
-}
-
-template<>
-double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) {
- std::map<const Function*, double>::iterator J =
- FunctionInformation.find(F);
- if (J != FunctionInformation.end())
- return J->second;
-
- // isDeclaration() is checked here and not at start of function to allow
- // functions without a body still to have a execution count.
- if (F->isDeclaration()) return MissingValue;
-
- double Count = getExecutionCount(&F->getEntryBlock());
- if (Count != MissingValue) FunctionInformation[F] = Count;
- return Count;
-}
-
-template<>
-double ProfileInfoT<MachineFunction, MachineBasicBlock>::
- getExecutionCount(const MachineFunction *MF) {
- std::map<const MachineFunction*, double>::iterator J =
- FunctionInformation.find(MF);
- if (J != FunctionInformation.end())
- return J->second;
-
- double Count = getExecutionCount(&MF->front());
- if (Count != MissingValue) FunctionInformation[MF] = Count;
- return Count;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- setExecutionCount(const BasicBlock *BB, double w) {
- DEBUG(dbgs() << "Creating Block " << BB->getName()
- << " (weight: " << format("%.20g",w) << ")\n");
- BlockInformation[BB->getParent()][BB] = w;
-}
-
-template<>
-void ProfileInfoT<MachineFunction, MachineBasicBlock>::
- setExecutionCount(const MachineBasicBlock *MBB, double w) {
- DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName()
- << " (weight: " << format("%.20g",w) << ")\n");
- BlockInformation[MBB->getParent()][MBB] = w;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) {
- double oldw = getEdgeWeight(e);
- assert (oldw != MissingValue && "Adding weight to Edge with no previous weight");
- DEBUG(dbgs() << "Adding to Edge " << e
- << " (new weight: " << format("%.20g",oldw + w) << ")\n");
- EdgeInformation[getFunction(e)][e] = oldw + w;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- addExecutionCount(const BasicBlock *BB, double w) {
- double oldw = getExecutionCount(BB);
- assert (oldw != MissingValue && "Adding weight to Block with no previous weight");
- DEBUG(dbgs() << "Adding to Block " << BB->getName()
- << " (new weight: " << format("%.20g",oldw + w) << ")\n");
- BlockInformation[BB->getParent()][BB] = oldw + w;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) {
- std::map<const Function*, BlockCounts>::iterator J =
- BlockInformation.find(BB->getParent());
- if (J == BlockInformation.end()) return;
-
- DEBUG(dbgs() << "Deleting " << BB->getName() << "\n");
- J->second.erase(BB);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) {
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(getFunction(e));
- if (J == EdgeInformation.end()) return;
-
- DEBUG(dbgs() << "Deleting" << e << "\n");
- J->second.erase(e);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- replaceEdge(const Edge &oldedge, const Edge &newedge) {
- double w;
- if ((w = getEdgeWeight(newedge)) == MissingValue) {
- w = getEdgeWeight(oldedge);
- DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge << "\n");
- } else {
- w += getEdgeWeight(oldedge);
- DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge << "\n");
- }
- setEdgeWeight(newedge,w);
- removeEdge(oldedge);
-}
-
-template<>
-const BasicBlock *ProfileInfoT<Function,BasicBlock>::
- GetPath(const BasicBlock *Src, const BasicBlock *Dest,
- Path &P, unsigned Mode) {
- const BasicBlock *BB = 0;
- bool hasFoundPath = false;
-
- std::queue<const BasicBlock *> BFS;
- BFS.push(Src);
-
- while(BFS.size() && !hasFoundPath) {
- BB = BFS.front();
- BFS.pop();
-
- succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
- if (Succ == End) {
- P[(const BasicBlock*)0] = BB;
- if (Mode & GetPathToExit) {
- hasFoundPath = true;
- BB = 0;
- }
- }
- for(;Succ != End; ++Succ) {
- if (P.find(*Succ) != P.end()) continue;
- Edge e = getEdge(BB,*Succ);
- if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue;
- P[*Succ] = BB;
- BFS.push(*Succ);
- if ((Mode & GetPathToDest) && *Succ == Dest) {
- hasFoundPath = true;
- BB = *Succ;
- break;
- }
- if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) {
- hasFoundPath = true;
- BB = *Succ;
- break;
- }
- }
- }
-
- return BB;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- divertFlow(const Edge &oldedge, const Edge &newedge) {
- DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge );
-
- // First check if the old edge was taken, if not, just delete it...
- if (getEdgeWeight(oldedge) == 0) {
- removeEdge(oldedge);
- return;
- }
-
- Path P;
- P[newedge.first] = 0;
- P[newedge.second] = newedge.first;
- const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest);
-
- double w = getEdgeWeight (oldedge);
- DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n");
- do {
- const BasicBlock *Parent = P.find(BB)->second;
- Edge e = getEdge(Parent,BB);
- double oldw = getEdgeWeight(e);
- double oldc = getExecutionCount(e.first);
- setEdgeWeight(e, w+oldw);
- if (Parent != oldedge.first) {
- setExecutionCount(e.first, w+oldc);
- }
- BB = Parent;
- } while (BB != newedge.first);
- removeEdge(oldedge);
-}
-
-/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB.
-/// This checks all edges of the function the blocks reside in and replaces the
-/// occurrences of RmBB with DestBB.
-template<>
-void ProfileInfoT<Function,BasicBlock>::
- replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
- DEBUG(dbgs() << "Replacing " << RmBB->getName()
- << " with " << DestBB->getName() << "\n");
- const Function *F = DestBB->getParent();
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(F);
- if (J == EdgeInformation.end()) return;
-
- Edge e, newedge;
- bool erasededge = false;
- EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
- while(I != E) {
- e = (I++)->first;
- bool foundedge = false; bool eraseedge = false;
- if (e.first == RmBB) {
- if (e.second == DestBB) {
- eraseedge = true;
- } else {
- newedge = getEdge(DestBB, e.second);
- foundedge = true;
- }
- }
- if (e.second == RmBB) {
- if (e.first == DestBB) {
- eraseedge = true;
- } else {
- newedge = getEdge(e.first, DestBB);
- foundedge = true;
- }
- }
- if (foundedge) {
- replaceEdge(e, newedge);
- }
- if (eraseedge) {
- if (erasededge) {
- Edge newedge = getEdge(DestBB, DestBB);
- replaceEdge(e, newedge);
- } else {
- removeEdge(e);
- erasededge = true;
- }
- }
- }
-}
-
-/// Splits an edge in the ProfileInfo and redirects flow over NewBB.
-/// Since its possible that there is more than one edge in the CFG from FristBB
-/// to SecondBB its necessary to redirect the flow proporionally.
-template<>
-void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB,
- const BasicBlock *SecondBB,
- const BasicBlock *NewBB,
- bool MergeIdenticalEdges) {
- const Function *F = FirstBB->getParent();
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(F);
- if (J == EdgeInformation.end()) return;
-
- // Generate edges and read current weight.
- Edge e = getEdge(FirstBB, SecondBB);
- Edge n1 = getEdge(FirstBB, NewBB);
- Edge n2 = getEdge(NewBB, SecondBB);
- EdgeWeights &ECs = J->second;
- double w = ECs[e];
-
- int succ_count = 0;
- if (!MergeIdenticalEdges) {
- // First count the edges from FristBB to SecondBB, if there is more than
- // one, only slice out a proporional part for NewBB.
- for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB);
- BBI != BBE; ++BBI) {
- if (*BBI == SecondBB) succ_count++;
- }
- // When the NewBB is completely new, increment the count by one so that
- // the counts are properly distributed.
- if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++;
- } else {
- // When the edges are merged anyway, then redirect all flow.
- succ_count = 1;
- }
-
- // We know now how many edges there are from FirstBB to SecondBB, reroute a
- // proportional part of the edge weight over NewBB.
- double neww = floor(w / succ_count);
- ECs[n1] += neww;
- ECs[n2] += neww;
- BlockInformation[F][NewBB] += neww;
- if (succ_count == 1) {
- ECs.erase(e);
- } else {
- ECs[e] -= neww;
- }
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old,
- const BasicBlock* New) {
- const Function *F = Old->getParent();
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(F);
- if (J == EdgeInformation.end()) return;
-
- DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n");
-
- std::set<Edge> Edges;
- for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end();
- ewi != ewe; ++ewi) {
- Edge old = ewi->first;
- if (old.first == Old) {
- Edges.insert(old);
- }
- }
- for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end();
- EI != EE; ++EI) {
- Edge newedge = getEdge(New, EI->second);
- replaceEdge(*EI, newedge);
- }
-
- double w = getExecutionCount(Old);
- setEdgeWeight(getEdge(Old, New), w);
- setExecutionCount(New, w);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB,
- const BasicBlock* NewBB,
- BasicBlock *const *Preds,
- unsigned NumPreds) {
- const Function *F = BB->getParent();
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(F);
- if (J == EdgeInformation.end()) return;
-
- DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName()
- << " to " << NewBB->getName() << "\n");
-
- // Collect weight that was redirected over NewBB.
- double newweight = 0;
-
- std::set<const BasicBlock *> ProcessedPreds;
- // For all requestes Predecessors.
- for (unsigned pred = 0; pred < NumPreds; ++pred) {
- const BasicBlock * Pred = Preds[pred];
- if (ProcessedPreds.insert(Pred).second) {
- // Create edges and read old weight.
- Edge oldedge = getEdge(Pred, BB);
- Edge newedge = getEdge(Pred, NewBB);
-
- // Remember how much weight was redirected.
- newweight += getEdgeWeight(oldedge);
-
- replaceEdge(oldedge,newedge);
- }
- }
-
- Edge newedge = getEdge(NewBB,BB);
- setEdgeWeight(newedge, newweight);
- setExecutionCount(NewBB, newweight);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old,
- const Function *New) {
- DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with "
- << New->getName() << "\n");
- std::map<const Function*, EdgeWeights>::iterator J =
- EdgeInformation.find(Old);
- if(J != EdgeInformation.end()) {
- EdgeInformation[New] = J->second;
- }
- EdgeInformation.erase(Old);
- BlockInformation.erase(Old);
- FunctionInformation.erase(Old);
-}
-
-static double readEdgeOrRemember(ProfileInfo::Edge edge, double w,
- ProfileInfo::Edge &tocalc, unsigned &uncalc) {
- if (w == ProfileInfo::MissingValue) {
- tocalc = edge;
- uncalc++;
- return 0;
- } else {
- return w;
- }
-}
-
-template<>
-bool ProfileInfoT<Function,BasicBlock>::
- CalculateMissingEdge(const BasicBlock *BB, Edge &removed,
- bool assumeEmptySelf) {
- Edge edgetocalc;
- unsigned uncalculated = 0;
-
- // collect weights of all incoming and outgoing edges, rememer edges that
- // have no value
- double incount = 0;
- SmallSet<const BasicBlock*,8> pred_visited;
- const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- if (bbi==bbe) {
- Edge e = getEdge(0,BB);
- incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
- }
- for (;bbi != bbe; ++bbi) {
- if (pred_visited.insert(*bbi)) {
- Edge e = getEdge(*bbi,BB);
- incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
- }
- }
-
- double outcount = 0;
- SmallSet<const BasicBlock*,8> succ_visited;
- succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
- if (sbbi==sbbe) {
- Edge e = getEdge(BB,0);
- if (getEdgeWeight(e) == MissingValue) {
- double w = getExecutionCount(BB);
- if (w != MissingValue) {
- setEdgeWeight(e,w);
- removed = e;
- }
- }
- outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
- }
- for (;sbbi != sbbe; ++sbbi) {
- if (succ_visited.insert(*sbbi)) {
- Edge e = getEdge(BB,*sbbi);
- outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
- }
- }
-
- // if exactly one edge weight was missing, calculate it and remove it from
- // spanning tree
- if (uncalculated == 0 ) {
- return true;
- } else
- if (uncalculated == 1) {
- if (incount < outcount) {
- EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
- } else {
- EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
- }
- DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": "
- << format("%.20g", getEdgeWeight(edgetocalc)) << "\n");
- removed = edgetocalc;
- return true;
- } else
- if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) {
- setEdgeWeight(edgetocalc, incount * 10);
- removed = edgetocalc;
- return true;
- } else {
- return false;
- }
-}
-
-static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) {
- double w = PI->getEdgeWeight(e);
- if (w != ProfileInfo::MissingValue) {
- calcw += w;
- } else {
- misscount.insert(e);
- }
-}
-
-template<>
-bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) {
- double inWeight = 0;
- std::set<Edge> inMissing;
- std::set<const BasicBlock*> ProcessedPreds;
- const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- if (bbi == bbe) {
- readEdge(this,getEdge(0,BB),inWeight,inMissing);
- }
- for( ; bbi != bbe; ++bbi ) {
- if (ProcessedPreds.insert(*bbi).second) {
- readEdge(this,getEdge(*bbi,BB),inWeight,inMissing);
- }
- }
-
- double outWeight = 0;
- std::set<Edge> outMissing;
- std::set<const BasicBlock*> ProcessedSuccs;
- succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
- if (sbbi == sbbe)
- readEdge(this,getEdge(BB,0),outWeight,outMissing);
- for ( ; sbbi != sbbe; ++sbbi ) {
- if (ProcessedSuccs.insert(*sbbi).second) {
- readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing);
- }
- }
-
- double share;
- std::set<Edge>::iterator ei,ee;
- if (inMissing.size() == 0 && outMissing.size() > 0) {
- ei = outMissing.begin();
- ee = outMissing.end();
- share = inWeight/outMissing.size();
- setExecutionCount(BB,inWeight);
- } else
- if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) {
- ei = inMissing.begin();
- ee = inMissing.end();
- share = 0;
- setExecutionCount(BB,0);
- } else
- if (inMissing.size() == 0 && outMissing.size() == 0) {
- setExecutionCount(BB,outWeight);
- return true;
- } else {
- return false;
- }
- for ( ; ei != ee; ++ei ) {
- setEdgeWeight(*ei,share);
- }
- return true;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
-// if (getExecutionCount(&(F->getEntryBlock())) == 0) {
-// for (Function::const_iterator FI = F->begin(), FE = F->end();
-// FI != FE; ++FI) {
-// const BasicBlock* BB = &(*FI);
-// {
-// const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
-// if (NBB == End) {
-// setEdgeWeight(getEdge(0,BB),0);
-// }
-// for(;NBB != End; ++NBB) {
-// setEdgeWeight(getEdge(*NBB,BB),0);
-// }
-// }
-// {
-// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
-// if (NBB == End) {
-// setEdgeWeight(getEdge(0,BB),0);
-// }
-// for(;NBB != End; ++NBB) {
-// setEdgeWeight(getEdge(*NBB,BB),0);
-// }
-// }
-// }
-// return;
-// }
- // The set of BasicBlocks that are still unvisited.
- std::set<const BasicBlock*> Unvisited;
-
- // The set of return edges (Edges with no successors).
- std::set<Edge> ReturnEdges;
- double ReturnWeight = 0;
-
- // First iterate over the whole function and collect:
- // 1) The blocks in this function in the Unvisited set.
- // 2) The return edges in the ReturnEdges set.
- // 3) The flow that is leaving the function already via return edges.
-
- // Data structure for searching the function.
- std::queue<const BasicBlock *> BFS;
- const BasicBlock *BB = &(F->getEntryBlock());
- BFS.push(BB);
- Unvisited.insert(BB);
-
- while (BFS.size()) {
- BB = BFS.front(); BFS.pop();
- succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
- if (NBB == End) {
- Edge e = getEdge(BB,0);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- // If the return edge has no value, try to read value from block.
- double bw = getExecutionCount(BB);
- if (bw != MissingValue) {
- setEdgeWeight(e,bw);
- ReturnWeight += bw;
- } else {
- // If both return edge and block provide no value, collect edge.
- ReturnEdges.insert(e);
- }
- } else {
- // If the return edge has a proper value, collect it.
- ReturnWeight += w;
- }
- }
- for (;NBB != End; ++NBB) {
- if (Unvisited.insert(*NBB).second) {
- BFS.push(*NBB);
- }
- }
- }
-
- while (Unvisited.size() > 0) {
- unsigned oldUnvisitedCount = Unvisited.size();
- bool FoundPath = false;
-
- // If there is only one edge left, calculate it.
- if (ReturnEdges.size() == 1) {
- ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight;
-
- Edge e = *ReturnEdges.begin();
- setEdgeWeight(e,ReturnWeight);
- setExecutionCount(e.first,ReturnWeight);
-
- Unvisited.erase(e.first);
- ReturnEdges.erase(e);
- continue;
- }
-
- // Calculate all blocks where only one edge is missing, this may also
- // resolve furhter return edges.
- std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE) {
- const BasicBlock *BB = *FI; ++FI;
- Edge e;
- if(CalculateMissingEdge(BB,e,true)) {
- if (BlockInformation[F].find(BB) == BlockInformation[F].end()) {
- setExecutionCount(BB,getExecutionCount(BB));
- }
- Unvisited.erase(BB);
- if (e.first != 0 && e.second == 0) {
- ReturnEdges.erase(e);
- ReturnWeight += getEdgeWeight(e);
- }
- }
- }
- if (oldUnvisitedCount > Unvisited.size()) continue;
-
- // Estimate edge weights by dividing the flow proportionally.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE) {
- const BasicBlock *BB = *FI; ++FI;
- const BasicBlock *Dest = 0;
- bool AllEdgesHaveSameReturn = true;
- // Check each Successor, these must all end up in the same or an empty
- // return block otherwise its dangerous to do an estimation on them.
- for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
- Succ != End; ++Succ) {
- Path P;
- GetPath(*Succ, 0, P, GetPathToExit);
- if (Dest && Dest != P[(const BasicBlock*)0]) {
- AllEdgesHaveSameReturn = false;
- }
- Dest = P[(const BasicBlock*)0];
- }
- if (AllEdgesHaveSameReturn) {
- if(EstimateMissingEdges(BB)) {
- Unvisited.erase(BB);
- break;
- }
- }
- }
- if (oldUnvisitedCount > Unvisited.size()) continue;
-
- // Check if there is a path to an block that has a known value and redirect
- // flow accordingly.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- // Fetch path.
- const BasicBlock *BB = *FI; ++FI;
- Path P;
- const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue);
-
- // Calculate incoming flow.
- double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0;
- std::set<const BasicBlock *> Processed;
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(*NBB, BB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- iw += ew;
- invalid++;
- } else {
- // If the path contains the successor, this means its a backedge,
- // do not count as missing.
- if (P.find(*NBB) == P.end())
- inmissing++;
- }
- incount++;
- }
- }
- if (inmissing == incount) continue;
- if (invalid == 0) continue;
-
- // Subtract (already) outgoing flow.
- Processed.clear();
- for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(BB, *NBB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- iw -= ew;
- }
- }
- }
- if (iw < 0) continue;
-
- // Check the receiving end of the path if it can handle the flow.
- double ow = getExecutionCount(Dest);
- Processed.clear();
- for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(BB, *NBB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- ow -= ew;
- }
- }
- }
- if (ow < 0) continue;
-
- // Determine how much flow shall be used.
- double ew = getEdgeWeight(getEdge(P[Dest],Dest));
- if (ew != MissingValue) {
- ew = ew<ow?ew:ow;
- ew = ew<iw?ew:iw;
- } else {
- if (inmissing == 0)
- ew = iw;
- }
-
- // Create flow.
- if (ew != MissingValue) {
- do {
- Edge e = getEdge(P[Dest],Dest);
- if (getEdgeWeight(e) == MissingValue) {
- setEdgeWeight(e,ew);
- FoundPath = true;
- }
- Dest = P[Dest];
- } while (Dest != BB);
- }
- }
- if (FoundPath) continue;
-
- // Calculate a block with self loop.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- const BasicBlock *BB = *FI; ++FI;
- bool SelfEdgeFound = false;
- for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
- NBB != End; ++NBB) {
- if (*NBB == BB) {
- SelfEdgeFound = true;
- break;
- }
- }
- if (SelfEdgeFound) {
- Edge e = getEdge(BB,BB);
- if (getEdgeWeight(e) == MissingValue) {
- double iw = 0;
- std::set<const BasicBlock *> Processed;
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(*NBB, BB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- iw += ew;
- }
- }
- }
- setEdgeWeight(e,iw * 10);
- FoundPath = true;
- }
- }
- }
- if (FoundPath) continue;
-
- // Determine backedges, set them to zero.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- const BasicBlock *BB = *FI; ++FI;
- const BasicBlock *Dest = 0;
- Path P;
- bool BackEdgeFound = false;
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges);
- if (Dest == *NBB) {
- BackEdgeFound = true;
- break;
- }
- }
- if (BackEdgeFound) {
- Edge e = getEdge(Dest,BB);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- setEdgeWeight(e,0);
- FoundPath = true;
- }
- do {
- Edge e = getEdge(P[Dest], Dest);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- setEdgeWeight(e,0);
- FoundPath = true;
- }
- Dest = P[Dest];
- } while (Dest != BB);
- }
- }
- if (FoundPath) continue;
-
- // Channel flow to return block.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- const BasicBlock *BB = *FI; ++FI;
-
- Path P;
- const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
- Dest = P[(const BasicBlock*)0];
- if (!Dest) continue;
-
- if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
- // Calculate incoming flow.
- double iw = 0;
- std::set<const BasicBlock *> Processed;
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- if (Processed.insert(*NBB).second) {
- Edge e = getEdge(*NBB, BB);
- double ew = getEdgeWeight(e);
- if (ew != MissingValue) {
- iw += ew;
- }
- }
- }
- do {
- Edge e = getEdge(P[Dest], Dest);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- setEdgeWeight(e,iw);
- FoundPath = true;
- } else {
- assert(0 && "Edge should not have value already!");
- }
- Dest = P[Dest];
- } while (Dest != BB);
- }
- }
- if (FoundPath) continue;
-
- // Speculatively set edges to zero.
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE && !FoundPath) {
- const BasicBlock *BB = *FI; ++FI;
-
- for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
- NBB != End; ++NBB) {
- Edge e = getEdge(*NBB,BB);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- setEdgeWeight(e,0);
- FoundPath = true;
- break;
- }
- }
- }
- if (FoundPath) continue;
-
- errs() << "{";
- FI = Unvisited.begin(), FE = Unvisited.end();
- while(FI != FE) {
- const BasicBlock *BB = *FI; ++FI;
- dbgs() << BB->getName();
- if (FI != FE)
- dbgs() << ",";
- }
- errs() << "}";
-
- errs() << "ASSERT: could not repair function";
- assert(0 && "could not repair function");
- }
-
- EdgeWeights J = EdgeInformation[F];
- for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) {
- Edge e = EI->first;
-
- bool SuccFound = false;
- if (e.first != 0) {
- succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first);
- if (NBB == End) {
- if (0 == e.second) {
- SuccFound = true;
- }
- }
- for (;NBB != End; ++NBB) {
- if (*NBB == e.second) {
- SuccFound = true;
- break;
- }
- }
- if (!SuccFound) {
- removeEdge(e);
- }
- }
- }
-}
-
-raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) {
- return O << MF->getFunction()->getName() << "(MF)";
-}
-
-raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) {
- return O << MBB->getBasicBlock()->getName() << "(MB)";
-}
-
-raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) {
- O << "(";
-
- if (E.first)
- O << E.first;
- else
- O << "0";
-
- O << ",";
-
- if (E.second)
- O << E.second;
- else
- O << "0";
-
- return O << ")";
-}
-
-} // namespace llvm
-
-//===----------------------------------------------------------------------===//
-// NoProfile ProfileInfo implementation
-//
-
-namespace {
- struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
- static char ID; // Class identification, replacement for typeinfo
- NoProfileInfo() : ImmutablePass(ID) {
- initializeNoProfileInfoPass(*PassRegistry::getPassRegistry());
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &ProfileInfo::ID)
- return (ProfileInfo*)this;
- return this;
- }
-
- virtual const char *getPassName() const {
- return "NoProfileInfo";
- }
- };
-} // End of anonymous namespace
-
-char NoProfileInfo::ID = 0;
-// Register this pass...
-INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
- "No Profile Information", false, true, true)
-
-ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp
deleted file mode 100644
index f1f3e94..0000000
--- a/lib/Analysis/ProfileInfoLoader.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The ProfileInfoLoader class is used to load and represent profiling
-// information read in from the dump file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdio>
-#include <cstdlib>
-using namespace llvm;
-
-// ByteSwap - Byteswap 'Var' if 'Really' is true.
-//
-static inline unsigned ByteSwap(unsigned Var, bool Really) {
- if (!Really) return Var;
- return ((Var & (255U<< 0U)) << 24U) |
- ((Var & (255U<< 8U)) << 8U) |
- ((Var & (255U<<16U)) >> 8U) |
- ((Var & (255U<<24U)) >> 24U);
-}
-
-static unsigned AddCounts(unsigned A, unsigned B) {
- // If either value is undefined, use the other.
- if (A == ProfileInfoLoader::Uncounted) return B;
- if (B == ProfileInfoLoader::Uncounted) return A;
- return A + B;
-}
-
-static void ReadProfilingBlock(const char *ToolName, FILE *F,
- bool ShouldByteSwap,
- std::vector<unsigned> &Data) {
- // Read the number of entries...
- unsigned NumEntries;
- if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) {
- errs() << ToolName << ": data packet truncated!\n";
- perror(0);
- exit(1);
- }
- NumEntries = ByteSwap(NumEntries, ShouldByteSwap);
-
- // Read the counts...
- std::vector<unsigned> TempSpace(NumEntries);
-
- // Read in the block of data...
- if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) {
- errs() << ToolName << ": data packet truncated!\n";
- perror(0);
- exit(1);
- }
-
- // Make sure we have enough space... The space is initialised to -1 to
- // facitiltate the loading of missing values for OptimalEdgeProfiling.
- if (Data.size() < NumEntries)
- Data.resize(NumEntries, ProfileInfoLoader::Uncounted);
-
- // Accumulate the data we just read into the data.
- if (!ShouldByteSwap) {
- for (unsigned i = 0; i != NumEntries; ++i) {
- Data[i] = AddCounts(TempSpace[i], Data[i]);
- }
- } else {
- for (unsigned i = 0; i != NumEntries; ++i) {
- Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]);
- }
- }
-}
-
-const unsigned ProfileInfoLoader::Uncounted = ~0U;
-
-// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the
-// program if the file is invalid or broken.
-//
-ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
- const std::string &Filename)
- : Filename(Filename) {
- FILE *F = fopen(Filename.c_str(), "rb");
- if (F == 0) {
- errs() << ToolName << ": Error opening '" << Filename << "': ";
- perror(0);
- exit(1);
- }
-
- // Keep reading packets until we run out of them.
- unsigned PacketType;
- while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
- // If the low eight bits of the packet are zero, we must be dealing with an
- // endianness mismatch. Byteswap all words read from the profiling
- // information.
- bool ShouldByteSwap = (char)PacketType == 0;
- PacketType = ByteSwap(PacketType, ShouldByteSwap);
-
- switch (PacketType) {
- case ArgumentInfo: {
- unsigned ArgLength;
- if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) {
- errs() << ToolName << ": arguments packet truncated!\n";
- perror(0);
- exit(1);
- }
- ArgLength = ByteSwap(ArgLength, ShouldByteSwap);
-
- // Read in the arguments...
- std::vector<char> Chars(ArgLength+4);
-
- if (ArgLength)
- if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) {
- errs() << ToolName << ": arguments packet truncated!\n";
- perror(0);
- exit(1);
- }
- CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength]));
- break;
- }
-
- case FunctionInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts);
- break;
-
- case BlockInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts);
- break;
-
- case EdgeInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
- break;
-
- case OptEdgeInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts);
- break;
-
- case BBTraceInfo:
- ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace);
- break;
-
- default:
- errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n";
- exit(1);
- }
- }
-
- fclose(F);
-}
-
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
deleted file mode 100644
index 346f8d6..0000000
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a concrete implementation of profiling information that
-// loads the information from a profile dump file.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-loader"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <set>
-using namespace llvm;
-
-STATISTIC(NumEdgesRead, "The # of edges read.");
-
-static cl::opt<std::string>
-ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
- cl::value_desc("filename"),
- cl::desc("Profile file loaded by -profile-loader"));
-
-namespace {
- class LoaderPass : public ModulePass, public ProfileInfo {
- std::string Filename;
- std::set<Edge> SpanningTree;
- std::set<const BasicBlock*> BBisUnvisited;
- unsigned ReadCount;
- public:
- static char ID; // Class identification, replacement for typeinfo
- explicit LoaderPass(const std::string &filename = "")
- : ModulePass(ID), Filename(filename) {
- initializeLoaderPassPass(*PassRegistry::getPassRegistry());
- if (filename.empty()) Filename = ProfileInfoFilename;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- }
-
- virtual const char *getPassName() const {
- return "Profiling information loader";
- }
-
- // recurseBasicBlock() - Calculates the edge weights for as much basic
- // blocks as possbile.
- virtual void recurseBasicBlock(const BasicBlock *BB);
- virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &);
- virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&);
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
- if (PI == &ProfileInfo::ID)
- return (ProfileInfo*)this;
- return this;
- }
-
- /// run - Load the profile information from the specified file.
- virtual bool runOnModule(Module &M);
- };
-} // End of anonymous namespace
-
-char LoaderPass::ID = 0;
-INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
- "Load profile information from llvmprof.out", false, true, false)
-
-char &llvm::ProfileLoaderPassID = LoaderPass::ID;
-
-ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
-
-/// createProfileLoaderPass - This function returns a Pass that loads the
-/// profiling information for the module from the specified filename, making it
-/// available to the optimizers.
-Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
- return new LoaderPass(Filename);
-}
-
-void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc,
- unsigned &uncalc, double &count) {
- double w;
- if ((w = getEdgeWeight(edge)) == MissingValue) {
- tocalc = edge;
- uncalc++;
- } else {
- count+=w;
- }
-}
-
-// recurseBasicBlock - Visits all neighbours of a block and then tries to
-// calculate the missing edge values.
-void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
-
- // break recursion if already visited
- if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return;
- BBisUnvisited.erase(BB);
- if (!BB) return;
-
- for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi) {
- recurseBasicBlock(*bbi);
- }
- for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- bbi != bbe; ++bbi) {
- recurseBasicBlock(*bbi);
- }
-
- Edge tocalc;
- if (CalculateMissingEdge(BB, tocalc)) {
- SpanningTree.erase(tocalc);
- }
-}
-
-void LoaderPass::readEdge(ProfileInfo::Edge e,
- std::vector<unsigned> &ECs) {
- if (ReadCount < ECs.size()) {
- double weight = ECs[ReadCount++];
- if (weight != ProfileInfoLoader::Uncounted) {
- // Here the data realm changes from the unsigned of the file to the
- // double of the ProfileInfo. This conversion is save because we know
- // that everything thats representable in unsinged is also representable
- // in double.
- EdgeInformation[getFunction(e)][e] += (double)weight;
-
- DEBUG(dbgs() << "--Read Edge Counter for " << e
- << " (# "<< (ReadCount-1) << "): "
- << (unsigned)getEdgeWeight(e) << "\n");
- } else {
- // This happens only if reading optimal profiling information, not when
- // reading regular profiling information.
- SpanningTree.insert(e);
- }
- }
-}
-
-bool LoaderPass::runOnModule(Module &M) {
- ProfileInfoLoader PIL("profile-loader", Filename);
-
- EdgeInformation.clear();
- std::vector<unsigned> Counters = PIL.getRawEdgeCounts();
- if (Counters.size() > 0) {
- ReadCount = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Working on " << F->getName() << "\n");
- readEdge(getEdge(0,&F->getEntryBlock()), Counters);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
- }
- }
- }
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- NumEdgesRead = ReadCount;
- }
-
- Counters = PIL.getRawOptimalEdgeCounts();
- if (Counters.size() > 0) {
- ReadCount = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Working on " << F->getName() << "\n");
- readEdge(getEdge(0,&F->getEntryBlock()), Counters);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumSuccessors() == 0) {
- readEdge(getEdge(BB,0), Counters);
- }
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
- }
- }
- while (SpanningTree.size() > 0) {
-
- unsigned size = SpanningTree.size();
-
- BBisUnvisited.clear();
- for (std::set<Edge>::iterator ei = SpanningTree.begin(),
- ee = SpanningTree.end(); ei != ee; ++ei) {
- BBisUnvisited.insert(ei->first);
- BBisUnvisited.insert(ei->second);
- }
- while (BBisUnvisited.size() > 0) {
- recurseBasicBlock(*BBisUnvisited.begin());
- }
-
- if (SpanningTree.size() == size) {
- DEBUG(dbgs()<<"{");
- for (std::set<Edge>::iterator ei = SpanningTree.begin(),
- ee = SpanningTree.end(); ei != ee; ++ei) {
- DEBUG(dbgs()<< *ei <<",");
- }
- assert(0 && "No edge calculated!");
- }
-
- }
- }
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- NumEdgesRead = ReadCount;
- }
-
- BlockInformation.clear();
- Counters = PIL.getRawBlockCounts();
- if (Counters.size() > 0) {
- ReadCount = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- if (ReadCount < Counters.size())
- // Here the data realm changes from the unsigned of the file to the
- // double of the ProfileInfo. This conversion is save because we know
- // that everything thats representable in unsinged is also
- // representable in double.
- BlockInformation[F][BB] = (double)Counters[ReadCount++];
- }
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- }
-
- FunctionInformation.clear();
- Counters = PIL.getRawFunctionCounts();
- if (Counters.size() > 0) {
- ReadCount = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- if (ReadCount < Counters.size())
- // Here the data realm changes from the unsigned of the file to the
- // double of the ProfileInfo. This conversion is save because we know
- // that everything thats representable in unsinged is also
- // representable in double.
- FunctionInformation[F] = (double)Counters[ReadCount++];
- }
- if (ReadCount != Counters.size()) {
- errs() << "WARNING: profile information is inconsistent with "
- << "the current program!\n";
- }
- }
-
- return false;
-}
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
deleted file mode 100644
index c8896de..0000000
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ /dev/null
@@ -1,383 +0,0 @@
-//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass that checks profiling information for
-// plausibility.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-verifier"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/raw_ostream.h"
-#include <set>
-using namespace llvm;
-
-static cl::opt<bool,false>
-ProfileVerifierDisableAssertions("profile-verifier-noassert",
- cl::desc("Disable assertions"));
-
-namespace {
- template<class FType, class BType>
- class ProfileVerifierPassT : public FunctionPass {
-
- struct DetailedBlockInfo {
- const BType *BB;
- double BBWeight;
- double inWeight;
- int inCount;
- double outWeight;
- int outCount;
- };
-
- ProfileInfoT<FType, BType> *PI;
- std::set<const BType*> BBisVisited;
- std::set<const FType*> FisVisited;
- bool DisableAssertions;
-
- // When debugging is enabled, the verifier prints a whole slew of debug
- // information, otherwise its just the assert. These are all the helper
- // functions.
- bool PrintedDebugTree;
- std::set<const BType*> BBisPrinted;
- void debugEntry(DetailedBlockInfo*);
- void printDebugInfo(const BType *BB);
-
- public:
- static char ID; // Class identification, replacement for typeinfo
-
- explicit ProfileVerifierPassT () : FunctionPass(ID) {
- initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
- DisableAssertions = ProfileVerifierDisableAssertions;
- }
- explicit ProfileVerifierPassT (bool da) : FunctionPass(ID),
- DisableAssertions(da) {
- initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<ProfileInfoT<FType, BType> >();
- }
-
- const char *getPassName() const {
- return "Profiling information verifier";
- }
-
- /// run - Verify the profile information.
- bool runOnFunction(FType &F);
- void recurseBasicBlock(const BType*);
-
- bool exitReachable(const FType*);
- double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge);
- void CheckValue(bool, const char*, DetailedBlockInfo*);
- };
-
- typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass;
-
- template<class FType, class BType>
- void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) {
-
- if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
-
- double BBWeight = PI->getExecutionCount(BB);
- if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; }
- double inWeight = 0;
- int inCount = 0;
- std::set<const BType*> ProcessedPreds;
- for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- bbi != bbe; ++bbi ) {
- if (ProcessedPreds.insert(*bbi).second) {
- typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB);
- double EdgeWeight = PI->getEdgeWeight(E);
- if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
- dbgs() << "calculated in-edge " << E << ": "
- << format("%20.20g",EdgeWeight) << "\n";
- inWeight += EdgeWeight;
- inCount++;
- }
- }
- double outWeight = 0;
- int outCount = 0;
- std::set<const BType*> ProcessedSuccs;
- for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi ) {
- if (ProcessedSuccs.insert(*bbi).second) {
- typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi);
- double EdgeWeight = PI->getEdgeWeight(E);
- if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
- dbgs() << "calculated out-edge " << E << ": "
- << format("%20.20g",EdgeWeight) << "\n";
- outWeight += EdgeWeight;
- outCount++;
- }
- }
- dbgs() << "Block " << BB->getName() << " in "
- << BB->getParent()->getName() << ":"
- << "BBWeight=" << format("%20.20g",BBWeight) << ","
- << "inWeight=" << format("%20.20g",inWeight) << ","
- << "inCount=" << inCount << ","
- << "outWeight=" << format("%20.20g",outWeight) << ","
- << "outCount" << outCount << "\n";
-
- // mark as visited and recurse into subnodes
- BBisPrinted.insert(BB);
- for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi ) {
- printDebugInfo(*bbi);
- }
- }
-
- template<class FType, class BType>
- void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
- dbgs() << "TROUBLE: Block " << DI->BB->getName() << " in "
- << DI->BB->getParent()->getName() << ":"
- << "BBWeight=" << format("%20.20g",DI->BBWeight) << ","
- << "inWeight=" << format("%20.20g",DI->inWeight) << ","
- << "inCount=" << DI->inCount << ","
- << "outWeight=" << format("%20.20g",DI->outWeight) << ","
- << "outCount=" << DI->outCount << "\n";
- if (!PrintedDebugTree) {
- PrintedDebugTree = true;
- printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
- }
- }
-
- // This compares A and B for equality.
- static bool Equals(double A, double B) {
- return A == B;
- }
-
- // This checks if the function "exit" is reachable from an given function
- // via calls, this is necessary to check if a profile is valid despite the
- // counts not fitting exactly.
- template<class FType, class BType>
- bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) {
- if (!F) return false;
-
- if (FisVisited.count(F)) return false;
-
- FType *Exit = F->getParent()->getFunction("exit");
- if (Exit == F) {
- return true;
- }
-
- FisVisited.insert(F);
- bool exits = false;
- for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
- if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
- FType *F = CI->getCalledFunction();
- if (F) {
- exits |= exitReachable(F);
- } else {
- // This is a call to a pointer, all bets are off...
- exits = true;
- }
- if (exits) break;
- }
- }
- return exits;
- }
-
- #define ASSERTMESSAGE(M) \
- { dbgs() << "ASSERT:" << (M) << "\n"; \
- if (!DisableAssertions) assert(0 && (M)); }
-
- template<class FType, class BType>
- double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) {
- double EdgeWeight = PI->getEdgeWeight(E);
- if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
- dbgs() << "Edge " << E << " in Function "
- << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
- ASSERTMESSAGE("Edge has missing value");
- return 0;
- } else {
- if (EdgeWeight < 0) {
- dbgs() << "Edge " << E << " in Function "
- << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
- ASSERTMESSAGE("Edge has negative value");
- }
- return EdgeWeight;
- }
- }
-
- template<class FType, class BType>
- void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error,
- const char *Message,
- DetailedBlockInfo *DI) {
- if (Error) {
- DEBUG(debugEntry(DI));
- dbgs() << "Block " << DI->BB->getName() << " in Function "
- << DI->BB->getParent()->getName() << ": ";
- ASSERTMESSAGE(Message);
- }
- return;
- }
-
- // This calculates the Information for a block and then recurses into the
- // successors.
- template<class FType, class BType>
- void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) {
-
- // Break the recursion by remembering all visited blocks.
- if (BBisVisited.find(BB) != BBisVisited.end()) return;
-
- // Use a data structure to store all the information, this can then be handed
- // to debug printers.
- DetailedBlockInfo DI;
- DI.BB = BB;
- DI.outCount = DI.inCount = 0;
- DI.inWeight = DI.outWeight = 0;
-
- // Read predecessors.
- std::set<const BType*> ProcessedPreds;
- const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
- // If there are none, check for (0,BB) edge.
- if (bpi == bpe) {
- DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
- DI.inCount++;
- }
- for (;bpi != bpe; ++bpi) {
- if (ProcessedPreds.insert(*bpi).second) {
- DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
- DI.inCount++;
- }
- }
-
- // Read successors.
- std::set<const BType*> ProcessedSuccs;
- succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- // If there is an (0,BB) edge, consider it too. (This is done not only when
- // there are no successors, but every time; not every function contains
- // return blocks with no successors (think loop latch as return block)).
- double w = PI->getEdgeWeight(PI->getEdge(BB,0));
- if (w != ProfileInfoT<FType, BType>::MissingValue) {
- DI.outWeight += w;
- DI.outCount++;
- }
- for (;bbi != bbe; ++bbi) {
- if (ProcessedSuccs.insert(*bbi).second) {
- DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
- DI.outCount++;
- }
- }
-
- // Read block weight.
- DI.BBWeight = PI->getExecutionCount(BB);
- CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue,
- "BasicBlock has missing value", &DI);
- CheckValue(DI.BBWeight < 0,
- "BasicBlock has negative value", &DI);
-
- // Check if this block is a setjmp target.
- bool isSetJmpTarget = false;
- if (DI.outWeight > DI.inWeight) {
- for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
- i != ie; ++i) {
- if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
- FType *F = CI->getCalledFunction();
- if (F && (F->getName() == "_setjmp")) {
- isSetJmpTarget = true; break;
- }
- }
- }
- }
- // Check if this block is eventually reaching exit.
- bool isExitReachable = false;
- if (DI.inWeight > DI.outWeight) {
- for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
- i != ie; ++i) {
- if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
- FType *F = CI->getCalledFunction();
- if (F) {
- FisVisited.clear();
- isExitReachable |= exitReachable(F);
- } else {
- // This is a call to a pointer, all bets are off...
- isExitReachable = true;
- }
- if (isExitReachable) break;
- }
- }
- }
-
- if (DI.inCount > 0 && DI.outCount == 0) {
- // If this is a block with no successors.
- if (!isSetJmpTarget) {
- CheckValue(!Equals(DI.inWeight,DI.BBWeight),
- "inWeight and BBWeight do not match", &DI);
- }
- } else if (DI.inCount == 0 && DI.outCount > 0) {
- // If this is a block with no predecessors.
- if (!isExitReachable)
- CheckValue(!Equals(DI.BBWeight,DI.outWeight),
- "BBWeight and outWeight do not match", &DI);
- } else {
- // If this block has successors and predecessors.
- if (DI.inWeight > DI.outWeight && !isExitReachable)
- CheckValue(!Equals(DI.inWeight,DI.outWeight),
- "inWeight and outWeight do not match", &DI);
- if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
- CheckValue(!Equals(DI.inWeight,DI.outWeight),
- "inWeight and outWeight do not match", &DI);
- }
-
-
- // Mark this block as visited, rescurse into successors.
- BBisVisited.insert(BB);
- for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi ) {
- recurseBasicBlock(*bbi);
- }
- }
-
- template<class FType, class BType>
- bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) {
- PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >();
- if (!PI)
- ASSERTMESSAGE("No ProfileInfo available");
-
- // Prepare global variables.
- PrintedDebugTree = false;
- BBisVisited.clear();
-
- // Fetch entry block and recurse into it.
- const BType *entry = &F.getEntryBlock();
- recurseBasicBlock(entry);
-
- if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry))
- ASSERTMESSAGE("Function count and entry block count do not match");
-
- return false;
- }
-
- template<class FType, class BType>
- char ProfileVerifierPassT<FType, BType>::ID = 0;
-}
-
-INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier",
- "Verify profiling information", false, true)
-INITIALIZE_AG_DEPENDENCY(ProfileInfo)
-INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier",
- "Verify profiling information", false, true)
-
-namespace llvm {
- FunctionPass *createProfileVerifierPass() {
- return new ProfileVerifierPass(ProfileVerifierDisableAssertions);
- }
-}
-
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 8577025..5635688 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -9,6 +9,7 @@
// Detects single entry single exit regions in the control flow graph.
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "region"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
@@ -17,12 +18,9 @@
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-
-#define DEBUG_TYPE "region"
#include "llvm/Support/Debug.h"
-
-#include <set>
#include <algorithm>
+#include <set>
using namespace llvm;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index f5d095b..0a02f4e 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -2590,55 +2590,39 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
}
-const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
+const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
// If we have DataLayout, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
if (TD)
- return getConstant(TD->getIntPtrType(getContext()),
- TD->getTypeAllocSize(AllocTy));
+ return getConstant(IntTy, TD->getTypeAllocSize(AllocTy));
Constant *C = ConstantExpr::getSizeOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ assert(Ty == IntTy && "Effective SCEV type doesn't match");
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
-const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
- Constant *C = ConstantExpr::getAlignOf(AllocTy);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
- C = Folded;
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
- return getTruncateOrZeroExtend(getSCEV(C), Ty);
-}
-
-const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
+const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
+ StructType *STy,
unsigned FieldNo) {
// If we have DataLayout, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
- if (TD)
- return getConstant(TD->getIntPtrType(getContext()),
+ if (TD) {
+ return getConstant(IntTy,
TD->getStructLayout(STy)->getElementOffset(FieldNo));
+ }
Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
- return getTruncateOrZeroExtend(getSCEV(C), Ty);
-}
-const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
- Constant *FieldNo) {
- Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
- C = Folded;
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
@@ -2703,12 +2687,15 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
- if (Ty->isIntegerTy())
+ if (Ty->isIntegerTy()) {
return Ty;
+ }
// The only other support type is pointer.
assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
- if (TD) return TD->getIntPtrType(getContext());
+
+ if (TD)
+ return TD->getIntPtrType(Ty);
// Without DataLayout, conservatively assume pointers are 64-bit.
return Type::getInt64Ty(getContext());
@@ -3101,15 +3088,26 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
Flags = setFlags(Flags, SCEV::FlagNUW);
if (OBO->hasNoSignedWrap())
Flags = setFlags(Flags, SCEV::FlagNSW);
- } else if (const GEPOperator *GEP =
- dyn_cast<GEPOperator>(BEValueV)) {
+ } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
// If the increment is an inbounds GEP, then we know the address
// space cannot be wrapped around. We cannot make any guarantee
// about signed or unsigned overflow because pointers are
// unsigned but we may have a negative index from the base
- // pointer.
- if (GEP->isInBounds())
+ // pointer. We can guarantee that no unsigned wrap occurs if the
+ // indices form a positive value.
+ if (GEP->isInBounds()) {
Flags = setFlags(Flags, SCEV::FlagNW);
+
+ const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
+ if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ }
+ } else if (const SubOperator *OBO =
+ dyn_cast<SubOperator>(BEValueV)) {
+ if (OBO->hasNoUnsignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (OBO->hasNoSignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNSW);
}
const SCEV *StartVal = getSCEV(StartValueV);
@@ -3177,18 +3175,18 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
/// operations. This allows them to be analyzed by regular SCEV code.
///
const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
+ Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+ Value *Base = GEP->getOperand(0);
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!Base->getType()->getPointerElementType()->isSized())
+ return getUnknown(GEP);
// Don't blindly transfer the inbounds flag from the GEP instruction to the
// Add expression, because the Instruction may be guarded by control flow
// and the no-overflow bits may not be valid for the expression in any
// context.
- bool isInBounds = GEP->isInBounds();
+ SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
- Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
- Value *Base = GEP->getOperand(0);
- // Don't attempt to analyze GEPs over unsized objects.
- if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
- return getUnknown(GEP);
const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
gep_type_iterator GTI = gep_type_begin(GEP);
for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
@@ -3199,21 +3197,19 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
- const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+ const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
// Add the field offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, FieldOffset);
} else {
// For an array, add the element offset, explicitly scaled.
- const SCEV *ElementSize = getSizeOfExpr(*GTI);
+ const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI);
const SCEV *IndexS = getSCEV(Index);
// Getelementptr indices are signed.
IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
// Multiply the index by the element size to compute the element offset.
- const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
- isInBounds ? SCEV::FlagNSW :
- SCEV::FlagAnyWrap);
+ const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap);
// Add the element offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, LocalOffset);
@@ -3224,8 +3220,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
const SCEV *BaseS = getSCEV(Base);
// Add the total offset from all the GEP indices to the base.
- return getAddExpr(BaseS, TotalOffset,
- isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
+ return getAddExpr(BaseS, TotalOffset, Wrap);
}
/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -4616,25 +4611,17 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
if (EL.hasAnyInfo()) return EL;
break;
}
- case ICmpInst::ICMP_SLT: {
- ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr);
- if (EL.hasAnyInfo()) return EL;
- break;
- }
- case ICmpInst::ICMP_SGT: {
- ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
- getNotSCEV(RHS), L, true, IsSubExpr);
- if (EL.hasAnyInfo()) return EL;
- break;
- }
- case ICmpInst::ICMP_ULT: {
- ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr);
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT: { // while (X < Y)
+ bool IsSigned = Cond == ICmpInst::ICMP_SLT;
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
- case ICmpInst::ICMP_UGT: {
- ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
- getNotSCEV(RHS), L, false, IsSubExpr);
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT: { // while (X > Y)
+ bool IsSigned = Cond == ICmpInst::ICMP_SGT;
+ ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
@@ -5072,15 +5059,21 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
/// original value V is returned.
const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
// Check to see if we've folded this expression at this loop before.
- std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
- std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
- Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
- if (!Pair.second)
- return Pair.first->second ? Pair.first->second : V;
-
+ SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
+ for (unsigned u = 0; u < Values.size(); u++) {
+ if (Values[u].first == L)
+ return Values[u].second ? Values[u].second : V;
+ }
+ Values.push_back(std::make_pair(L, static_cast<const SCEV *>(0)));
// Otherwise compute it.
const SCEV *C = computeSCEVAtScope(V, L);
- ValuesAtScopes[V][L] = C;
+ SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
+ for (unsigned u = Values2.size(); u > 0; u--) {
+ if (Values2[u - 1].first == L) {
+ Values2[u - 1].second = C;
+ break;
+ }
+ }
return C;
}
@@ -5119,18 +5112,23 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
case scAddExpr: {
const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
- if (C->getType()->isPointerTy())
- C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext()));
+ if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+ unsigned AS = PTy->getAddressSpace();
+ Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
+ C = ConstantExpr::getBitCast(C, DestPtrTy);
+ }
for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
if (!C2) return 0;
// First pointer!
if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
+ unsigned AS = C2->getType()->getPointerAddressSpace();
std::swap(C, C2);
+ Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
// The offsets have been converted to bytes. We can add bytes to an
// i8* by GEP with the byte count in the first index.
- C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext()));
+ C = ConstantExpr::getBitCast(C, DestPtrTy);
}
// Don't bother trying to sum two pointers. We probably can't
@@ -5138,8 +5136,8 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
if (C2->getType()->isPointerTy())
return 0;
- if (C->getType()->isPointerTy()) {
- if (cast<PointerType>(C->getType())->getElementType()->isStructTy())
+ if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+ if (PTy->getElementType()->isStructTy())
C2 = ConstantExpr::getIntegerCast(
C2, Type::getInt32Ty(C->getContext()), true);
C = ConstantExpr::getGetElementPtr(C, C2);
@@ -6336,45 +6334,72 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
return false;
}
-/// getBECount - Subtract the end and start values and divide by the step,
-/// rounding up, to get the number of times the backedge is executed. Return
-/// CouldNotCompute if an intermediate computation overflows.
-const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
- const SCEV *End,
- const SCEV *Step,
- bool NoWrap) {
- assert(!isKnownNegative(Step) &&
- "This code doesn't handle negative strides yet!");
-
- Type *Ty = Start->getType();
-
- // When Start == End, we have an exact BECount == 0. Short-circuit this case
- // here because SCEV may not be able to determine that the unsigned division
- // after rounding is zero.
- if (Start == End)
- return getConstant(Ty, 0);
-
- const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
- const SCEV *Diff = getMinusSCEV(End, Start);
- const SCEV *RoundUp = getAddExpr(Step, NegOne);
-
- // Add an adjustment to the difference between End and Start so that
- // the division will effectively round up.
- const SCEV *Add = getAddExpr(Diff, RoundUp);
-
- if (!NoWrap) {
- // Check Add for unsigned overflow.
- // TODO: More sophisticated things could be done here.
- Type *WideTy = IntegerType::get(getContext(),
- getTypeSizeInBits(Ty) + 1);
- const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
- const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
- const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
- if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
- return getCouldNotCompute();
+// Verify if an linear IV with positive stride can overflow when in a
+// less-than comparison, knowing the invariant term of the comparison, the
+// stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
+ bool IsSigned, bool NoWrap) {
+ if (NoWrap) return false;
+
+ unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+ const SCEV *One = getConstant(Stride->getType(), 1);
+
+ if (IsSigned) {
+ APInt MaxRHS = getSignedRange(RHS).getSignedMax();
+ APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
+ APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+ .getSignedMax();
+
+ // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
+ return (MaxValue - MaxStrideMinusOne).slt(MaxRHS);
+ }
+
+ APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
+ APInt MaxValue = APInt::getMaxValue(BitWidth);
+ APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+ .getUnsignedMax();
+
+ // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
+ return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
+}
+
+// Verify if an linear IV with negative stride can overflow when in a
+// greater-than comparison, knowing the invariant term of the comparison,
+// the stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
+ bool IsSigned, bool NoWrap) {
+ if (NoWrap) return false;
+
+ unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+ const SCEV *One = getConstant(Stride->getType(), 1);
+
+ if (IsSigned) {
+ APInt MinRHS = getSignedRange(RHS).getSignedMin();
+ APInt MinValue = APInt::getSignedMinValue(BitWidth);
+ APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+ .getSignedMax();
+
+ // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
+ return (MinValue + MaxStrideMinusOne).sgt(MinRHS);
}
- return getUDivExpr(Add, Step);
+ APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
+ APInt MinValue = APInt::getMinValue(BitWidth);
+ APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+ .getUnsignedMax();
+
+ // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
+ return (MinValue + MaxStrideMinusOne).ugt(MinRHS);
+}
+
+// Compute the backedge taken count knowing the interval difference, the
+// stride and presence of the equality in the comparison.
+const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
+ bool Equality) {
+ const SCEV *One = getConstant(Step->getType(), 1);
+ Delta = Equality ? getAddExpr(Delta, Step)
+ : getAddExpr(Delta, getMinusSCEV(Step, One));
+ return getUDivExpr(Delta, Step);
}
/// HowManyLessThans - Return the number of times a backedge containing the
@@ -6386,119 +6411,144 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
/// a subexpression that cannot overflow before evaluating true.
ScalarEvolution::ExitLimit
ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
- const Loop *L, bool isSigned,
+ const Loop *L, bool IsSigned,
bool IsSubExpr) {
- // Only handle: "ADDREC < LoopInvariant".
- if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
+ // We handle only IV < Invariant
+ if (!isLoopInvariant(RHS, L))
+ return getCouldNotCompute();
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
- if (!AddRec || AddRec->getLoop() != L)
+ const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+ // Avoid weird loops
+ if (!IV || IV->getLoop() != L || !IV->isAffine())
return getCouldNotCompute();
- // Check to see if we have a flag which makes analysis easy.
- bool NoWrap = false;
- if (!IsSubExpr) {
- NoWrap = AddRec->getNoWrapFlags(
- (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
- | SCEV::FlagNW));
- }
- if (AddRec->isAffine()) {
- unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
- const SCEV *Step = AddRec->getStepRecurrence(*this);
+ bool NoWrap = !IsSubExpr &&
+ IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
- if (Step->isZero())
- return getCouldNotCompute();
- if (Step->isOne()) {
- // With unit stride, the iteration never steps past the limit value.
- } else if (isKnownPositive(Step)) {
- // Test whether a positive iteration can step past the limit
- // value and past the maximum value for its type in a single step.
- // Note that it's not sufficient to check NoWrap here, because even
- // though the value after a wrap is undefined, it's not undefined
- // behavior, so if wrap does occur, the loop could either terminate or
- // loop infinitely, but in either case, the loop is guaranteed to
- // iterate at least until the iteration where the wrapping occurs.
- const SCEV *One = getConstant(Step->getType(), 1);
- if (isSigned) {
- APInt Max = APInt::getSignedMaxValue(BitWidth);
- if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
- .slt(getSignedRange(RHS).getSignedMax()))
- return getCouldNotCompute();
- } else {
- APInt Max = APInt::getMaxValue(BitWidth);
- if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
- .ult(getUnsignedRange(RHS).getUnsignedMax()))
- return getCouldNotCompute();
- }
- } else
- // TODO: Handle negative strides here and below.
- return getCouldNotCompute();
+ const SCEV *Stride = IV->getStepRecurrence(*this);
- // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
- // m. So, we count the number of iterations in which {n,+,s} < m is true.
- // Note that we cannot simply return max(m-n,0)/s because it's not safe to
- // treat m-n as signed nor unsigned due to overflow possibility.
-
- // First, we get the value of the LHS in the first iteration: n
- const SCEV *Start = AddRec->getOperand(0);
-
- // Determine the minimum constant start value.
- const SCEV *MinStart = getConstant(isSigned ?
- getSignedRange(Start).getSignedMin() :
- getUnsignedRange(Start).getUnsignedMin());
-
- // If we know that the condition is true in order to enter the loop,
- // then we know that it will run exactly (m-n)/s times. Otherwise, we
- // only know that it will execute (max(m,n)-n)/s times. In both cases,
- // the division must round up.
- const SCEV *End = RHS;
- if (!isLoopEntryGuardedByCond(L,
- isSigned ? ICmpInst::ICMP_SLT :
- ICmpInst::ICMP_ULT,
- getMinusSCEV(Start, Step), RHS))
- End = isSigned ? getSMaxExpr(RHS, Start)
- : getUMaxExpr(RHS, Start);
-
- // Determine the maximum constant end value.
- const SCEV *MaxEnd = getConstant(isSigned ?
- getSignedRange(End).getSignedMax() :
- getUnsignedRange(End).getUnsignedMax());
-
- // If MaxEnd is within a step of the maximum integer value in its type,
- // adjust it down to the minimum value which would produce the same effect.
- // This allows the subsequent ceiling division of (N+(step-1))/step to
- // compute the correct value.
- const SCEV *StepMinusOne = getMinusSCEV(Step,
- getConstant(Step->getType(), 1));
- MaxEnd = isSigned ?
- getSMinExpr(MaxEnd,
- getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
- StepMinusOne)) :
- getUMinExpr(MaxEnd,
- getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
- StepMinusOne));
-
- // Finally, we subtract these two values and divide, rounding up, to get
- // the number of times the backedge is executed.
- const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
-
- // The maximum backedge count is similar, except using the minimum start
- // value and the maximum end value.
- // If we already have an exact constant BECount, use it instead.
- const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount
- : getBECount(MinStart, MaxEnd, Step, NoWrap);
-
- // If the stride is nonconstant, and NoWrap == true, then
- // getBECount(MinStart, MaxEnd) may not compute. This would result in an
- // exact BECount and invalid MaxBECount, which should be avoided to catch
- // more optimization opportunities.
- if (isa<SCEVCouldNotCompute>(MaxBECount))
- MaxBECount = BECount;
-
- return ExitLimit(BECount, MaxBECount);
- }
+ // Avoid negative or zero stride values
+ if (!isKnownPositive(Stride))
+ return getCouldNotCompute();
- return getCouldNotCompute();
+ // Avoid proven overflow cases: this will ensure that the backedge taken count
+ // will not generate any unsigned overflow. Relaxed no-overflow conditions
+ // exploit NoWrapFlags, allowing to optimize in presence of undefined
+ // behaviors like the case of C language.
+ if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+ return getCouldNotCompute();
+
+ ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
+ : ICmpInst::ICMP_ULT;
+ const SCEV *Start = IV->getStart();
+ const SCEV *End = RHS;
+ if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
+ End = IsSigned ? getSMaxExpr(RHS, Start)
+ : getUMaxExpr(RHS, Start);
+
+ const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+
+ APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
+ : getUnsignedRange(Start).getUnsignedMin();
+
+ APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+ : getUnsignedRange(Stride).getUnsignedMin();
+
+ unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+ APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
+ : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+
+ // Although End can be a MAX expression we estimate MaxEnd considering only
+ // the case End = RHS. This is safe because in the other case (End - Start)
+ // is zero, leading to a zero maximum backedge taken count.
+ APInt MaxEnd =
+ IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
+ : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
+
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (isa<SCEVConstant>(BECount))
+ MaxBECount = BECount;
+ else
+ MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
+ getConstant(MinStride), false);
+
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
+ MaxBECount = BECount;
+
+ return ExitLimit(BECount, MaxBECount);
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
+ const Loop *L, bool IsSigned,
+ bool IsSubExpr) {
+ // We handle only IV > Invariant
+ if (!isLoopInvariant(RHS, L))
+ return getCouldNotCompute();
+
+ const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+ // Avoid weird loops
+ if (!IV || IV->getLoop() != L || !IV->isAffine())
+ return getCouldNotCompute();
+
+ bool NoWrap = !IsSubExpr &&
+ IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
+
+ const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
+
+ // Avoid negative or zero stride values
+ if (!isKnownPositive(Stride))
+ return getCouldNotCompute();
+
+ // Avoid proven overflow cases: this will ensure that the backedge taken count
+ // will not generate any unsigned overflow. Relaxed no-overflow conditions
+ // exploit NoWrapFlags, allowing to optimize in presence of undefined
+ // behaviors like the case of C language.
+ if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
+ return getCouldNotCompute();
+
+ ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT
+ : ICmpInst::ICMP_UGT;
+
+ const SCEV *Start = IV->getStart();
+ const SCEV *End = RHS;
+ if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS))
+ End = IsSigned ? getSMinExpr(RHS, Start)
+ : getUMinExpr(RHS, Start);
+
+ const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
+
+ APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
+ : getUnsignedRange(Start).getUnsignedMax();
+
+ APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+ : getUnsignedRange(Stride).getUnsignedMin();
+
+ unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+ APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
+ : APInt::getMinValue(BitWidth) + (MinStride - 1);
+
+ // Although End can be a MIN expression we estimate MinEnd considering only
+ // the case End = RHS. This is safe because in the other case (Start - End)
+ // is zero, leading to a zero maximum backedge taken count.
+ APInt MinEnd =
+ IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
+ : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
+
+
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (isa<SCEVConstant>(BECount))
+ MaxBECount = BECount;
+ else
+ MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
+ getConstant(MinStride), false);
+
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
+ MaxBECount = BECount;
+
+ return ExitLimit(BECount, MaxBECount);
}
/// getNumIterationsInRange - Return the number of iterations of this loop that
@@ -6627,7 +6677,534 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
return SE.getCouldNotCompute();
}
+static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue().abs();
+ APInt B = C2->getValue()->getValue().abs();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.zext(ABW);
+ else if (ABW < BBW)
+ A = A.zext(BBW);
+
+ return APIntOps::GreatestCommonDivisor(A, B);
+}
+
+static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue();
+ APInt B = C2->getValue()->getValue();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.sext(ABW);
+ else if (ABW < BBW)
+ A = A.sext(BBW);
+
+ return APIntOps::srem(A, B);
+}
+
+static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue();
+ APInt B = C2->getValue()->getValue();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.sext(ABW);
+ else if (ABW < BBW)
+ A = A.sext(BBW);
+
+ return APIntOps::sdiv(A, B);
+}
+
+namespace {
+struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> {
+public:
+ // Pattern match Step into Start. When Step is a multiply expression, find
+ // the largest subexpression of Step that appears in Start. When Start is an
+ // add expression, try to match Step in the subexpressions of Start, non
+ // matching subexpressions are returned under Remainder.
+ static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start,
+ const SCEV *Step, const SCEV **Remainder) {
+ assert(Remainder && "Remainder should not be NULL");
+ SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0));
+ const SCEV *Res = R.visit(Start);
+ *Remainder = R.Remainder;
+ return Res;
+ }
+
+ SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R)
+ : SE(S), GCD(G), Remainder(R) {
+ Zero = SE.getConstant(GCD->getType(), 0);
+ One = SE.getConstant(GCD->getType(), 1);
+ }
+
+ const SCEV *visitConstant(const SCEVConstant *Constant) {
+ if (GCD == Constant || Constant == Zero)
+ return GCD;
+
+ if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) {
+ const SCEV *Res = SE.getConstant(gcd(Constant, CGCD));
+ if (Res != One)
+ return Res;
+
+ Remainder = SE.getConstant(srem(Constant, CGCD));
+ Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder));
+ Res = SE.getConstant(gcd(Constant, CGCD));
+ return Res;
+ }
+
+ // When GCD is not a constant, it could be that the GCD is an Add, Mul,
+ // AddRec, etc., in which case we want to find out how many times the
+ // Constant divides the GCD: we then return that as the new GCD.
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, GCD, Constant, &Rem);
+
+ if (Res == One || Rem != Zero) {
+ Remainder = Constant;
+ return One;
+ }
+
+ assert(isa<SCEVConstant>(Res) && "Res should be a constant");
+ Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res)));
+ return Res;
+ }
+
+ const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+ if (GCD == Expr)
+ return GCD;
+
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem);
+
+ // FIXME: There may be ambiguous situations: for instance,
+ // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m).
+ // The order in which the AddExpr is traversed computes a different GCD
+ // and Remainder.
+ if (Res != One)
+ GCD = Res;
+ if (Rem != Zero)
+ Remainder = SE.getAddExpr(Remainder, Rem);
+ }
+
+ return GCD;
+ }
+
+ const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+ if (GCD == Expr)
+ return GCD;
+
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ if (Expr->getOperand(i) == GCD)
+ return GCD;
+ }
+
+ // If we have not returned yet, it means that GCD is not part of Expr.
+ const SCEV *PartialGCD = One;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem);
+ if (Rem != Zero)
+ // GCD does not divide Expr->getOperand(i).
+ continue;
+
+ if (Res == GCD)
+ return GCD;
+ PartialGCD = SE.getMulExpr(PartialGCD, Res);
+ if (PartialGCD == GCD)
+ return GCD;
+ }
+
+ if (PartialGCD != One)
+ return PartialGCD;
+
+ Remainder = Expr;
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD);
+ if (!Mul)
+ return PartialGCD;
+
+ // When the GCD is a multiply expression, try to decompose it:
+ // this occurs when Step does not divide the Start expression
+ // as in: {(-4 + (3 * %m)),+,(2 * %m)}
+ for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) {
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem);
+ if (Rem == Zero) {
+ Remainder = Rem;
+ return Res;
+ }
+ }
+
+ return PartialGCD;
+ }
+
+ const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ if (GCD == Expr)
+ return GCD;
+
+ if (!Expr->isAffine()) {
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *Rem = Zero;
+ const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem);
+ if (Rem != Zero)
+ Remainder = SE.getAddExpr(Remainder, Rem);
+
+ Rem = Zero;
+ Res = findGCD(SE, Expr->getOperand(1), Res, &Rem);
+ if (Rem != Zero) {
+ Remainder = Expr;
+ return GCD;
+ }
+
+ return Res;
+ }
+
+ const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (GCD != Expr)
+ Remainder = Expr;
+ return GCD;
+ }
+
+ const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+ return One;
+ }
+
+private:
+ ScalarEvolution &SE;
+ const SCEV *GCD, *Remainder, *Zero, *One;
+};
+
+struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> {
+public:
+ // Remove from Start all multiples of Step.
+ static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start,
+ const SCEV *Step) {
+ SCEVDivision D(SE, Step);
+ const SCEV *Rem = D.Zero;
+ (void)Rem;
+ // The division is guaranteed to succeed: Step should divide Start with no
+ // remainder.
+ assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero &&
+ "Step should divide Start with no remainder.");
+ return D.visit(Start);
+ }
+
+ SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) {
+ Zero = SE.getConstant(GCD->getType(), 0);
+ One = SE.getConstant(GCD->getType(), 1);
+ }
+
+ const SCEV *visitConstant(const SCEVConstant *Constant) {
+ if (GCD == Constant)
+ return One;
+
+ if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD))
+ return SE.getConstant(sdiv(Constant, CGCD));
+ return Constant;
+ }
+
+ const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+
+ SmallVector<const SCEV *, 2> Operands;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+ Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
+
+ if (Operands.size() == 1)
+ return Operands[0];
+ return SE.getAddExpr(Operands);
+ }
+
+ const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+
+ bool FoundGCDTerm = false;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+ if (Expr->getOperand(i) == GCD)
+ FoundGCDTerm = true;
+
+ SmallVector<const SCEV *, 2> Operands;
+ if (FoundGCDTerm) {
+ FoundGCDTerm = false;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ if (FoundGCDTerm)
+ Operands.push_back(Expr->getOperand(i));
+ else if (Expr->getOperand(i) == GCD)
+ FoundGCDTerm = true;
+ else
+ Operands.push_back(Expr->getOperand(i));
+ }
+ } else {
+ FoundGCDTerm = false;
+ const SCEV *PartialGCD = One;
+ for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+ if (PartialGCD == GCD) {
+ Operands.push_back(Expr->getOperand(i));
+ continue;
+ }
+
+ const SCEV *Rem = Zero;
+ const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem);
+ if (Rem == Zero) {
+ PartialGCD = SE.getMulExpr(PartialGCD, Res);
+ Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
+ } else {
+ Operands.push_back(Expr->getOperand(i));
+ }
+ }
+ }
+
+ if (Operands.size() == 1)
+ return Operands[0];
+ return SE.getMulExpr(Operands);
+ }
+
+ const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+
+ assert(Expr->isAffine() && "Expr should be affine");
+
+ const SCEV *Start = divide(SE, Expr->getStart(), GCD);
+ const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD);
+
+ return SE.getAddRecExpr(Start, Step, Expr->getLoop(),
+ Expr->getNoWrapFlags());
+ }
+
+ const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (GCD == Expr)
+ return One;
+ return Expr;
+ }
+
+ const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+ return Expr;
+ }
+
+private:
+ ScalarEvolution &SE;
+ const SCEV *GCD, *Zero, *One;
+};
+}
+
+/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
+/// sizes of an array access. Returns the remainder of the delinearization that
+/// is the offset start of the array. The SCEV->delinearize algorithm computes
+/// the multiples of SCEV coefficients: that is a pattern matching of sub
+/// expressions in the stride and base of a SCEV corresponding to the
+/// computation of a GCD (greatest common divisor) of base and stride. When
+/// SCEV->delinearize fails, it returns the SCEV unchanged.
+///
+/// For example: when analyzing the memory access A[i][j][k] in this loop nest
+///
+/// void foo(long n, long m, long o, double A[n][m][o]) {
+///
+/// for (long i = 0; i < n; i++)
+/// for (long j = 0; j < m; j++)
+/// for (long k = 0; k < o; k++)
+/// A[i][j][k] = 1.0;
+/// }
+///
+/// the delinearization input is the following AddRec SCEV:
+///
+/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+///
+/// From this SCEV, we are able to say that the base offset of the access is %A
+/// because it appears as an offset that does not divide any of the strides in
+/// the loops:
+///
+/// CHECK: Base offset: %A
+///
+/// and then SCEV->delinearize determines the size of some of the dimensions of
+/// the array as these are the multiples by which the strides are happening:
+///
+/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+///
+/// Note that the outermost dimension remains of UnknownSize because there are
+/// no strides that would help identifying the size of the last dimension: when
+/// the array has been statically allocated, one could compute the size of that
+/// dimension by dividing the overall size of the array by the size of the known
+/// dimensions: %m * %o * 8.
+///
+/// Finally delinearize provides the access functions for the array reference
+/// that does correspond to A[i][j][k] of the above C testcase:
+///
+/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///
+/// The testcases are checking the output of a function pass:
+/// DelinearizationPass that walks through all loads and stores of a function
+/// asking for the SCEV of the memory access with respect to all enclosing
+/// loops, calling SCEV->delinearize on that and printing the results.
+const SCEV *
+SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes) const {
+ // Early exit in case this SCEV is not an affine multivariate function.
+ if (!this->isAffine())
+ return this;
+
+ const SCEV *Start = this->getStart();
+ const SCEV *Step = this->getStepRecurrence(SE);
+
+ // Build the SCEV representation of the cannonical induction variable in the
+ // loop of this SCEV.
+ const SCEV *Zero = SE.getConstant(this->getType(), 0);
+ const SCEV *One = SE.getConstant(this->getType(), 1);
+ const SCEV *IV =
+ SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags());
+
+ DEBUG(dbgs() << "(delinearize: " << *this << "\n");
+
+ // Currently we fail to delinearize when the stride of this SCEV is 1. We
+ // could decide to not fail in this case: we could just return 1 for the size
+ // of the subscript, and this same SCEV for the access function.
+ if (Step == One) {
+ DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
+ return this;
+ }
+
+ // Find the GCD and Remainder of the Start and Step coefficients of this SCEV.
+ const SCEV *Remainder = NULL;
+ const SCEV *GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder);
+
+ DEBUG(dbgs() << "GCD: " << *GCD << "\n");
+ DEBUG(dbgs() << "Remainder: " << *Remainder << "\n");
+
+ // Same remark as above: we currently fail the delinearization, although we
+ // can very well handle this special case.
+ if (GCD == One) {
+ DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
+ return this;
+ }
+
+ // As findGCD computed Remainder, GCD divides "Start - Remainder." The
+ // Quotient is then this SCEV without Remainder, scaled down by the GCD. The
+ // Quotient is what will be used in the next subscript delinearization.
+ const SCEV *Quotient =
+ SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD);
+ DEBUG(dbgs() << "Quotient: " << *Quotient << "\n");
+
+ const SCEV *Rem;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient))
+ // Recursively call delinearize on the Quotient until there are no more
+ // multiples that can be recognized.
+ Rem = AR->delinearize(SE, Subscripts, Sizes);
+ else
+ Rem = Quotient;
+
+ // Scale up the cannonical induction variable IV by whatever remains from the
+ // Step after division by the GCD: the GCD is the size of all the sub-array.
+ if (Step != GCD) {
+ Step = SCEVDivision::divide(SE, Step, GCD);
+ IV = SE.getMulExpr(IV, Step);
+ }
+ // The access function in the current subscript is computed as the cannonical
+ // induction variable IV (potentially scaled up by the step) and offset by
+ // Rem, the offset of delinearization in the sub-array.
+ const SCEV *Index = SE.getAddExpr(IV, Rem);
+
+ // Record the access function and the size of the current subscript.
+ Subscripts.push_back(Index);
+ Sizes.push_back(GCD);
+
+#ifndef NDEBUG
+ int Size = Sizes.size();
+ DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n");
+ DEBUG(dbgs() << "ArrayDecl[UnknownSize]");
+ for (int i = 0; i < Size - 1; i++)
+ DEBUG(dbgs() << "[" << *Sizes[i] << "]");
+ DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n");
+
+ DEBUG(dbgs() << "ArrayRef");
+ for (int i = 0; i < Size; i++)
+ DEBUG(dbgs() << "[" << *Subscripts[i] << "]");
+ DEBUG(dbgs() << "\n)\n");
+#endif
+
+ return Remainder;
+}
//===----------------------------------------------------------------------===//
// SCEVCallbackVH Class Implementation
@@ -6683,7 +7260,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
//===----------------------------------------------------------------------===//
ScalarEvolution::ScalarEvolution()
- : FunctionPass(ID), FirstUnknown(0) {
+ : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) {
initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
}
@@ -6821,14 +7398,21 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
ScalarEvolution::LoopDisposition
ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
- std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
- std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
- Values.insert(std::make_pair(L, LoopVariant));
- if (!Pair.second)
- return Pair.first->second;
-
+ SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S];
+ for (unsigned u = 0; u < Values.size(); u++) {
+ if (Values[u].first == L)
+ return Values[u].second;
+ }
+ Values.push_back(std::make_pair(L, LoopVariant));
LoopDisposition D = computeLoopDisposition(S, L);
- return LoopDispositions[S][L] = D;
+ SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S];
+ for (unsigned u = Values2.size(); u > 0; u--) {
+ if (Values2[u - 1].first == L) {
+ Values2[u - 1].second = D;
+ break;
+ }
+ }
+ return D;
}
ScalarEvolution::LoopDisposition
@@ -6920,14 +7504,21 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
ScalarEvolution::BlockDisposition
ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
- std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
- std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
- Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
- if (!Pair.second)
- return Pair.first->second;
-
+ SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S];
+ for (unsigned u = 0; u < Values.size(); u++) {
+ if (Values[u].first == BB)
+ return Values[u].second;
+ }
+ Values.push_back(std::make_pair(BB, DoesNotDominateBlock));
BlockDisposition D = computeBlockDisposition(S, BB);
- return BlockDispositions[S][BB] = D;
+ SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S];
+ for (unsigned u = Values2.size(); u > 0; u--) {
+ if (Values2[u - 1].first == BB) {
+ Values2[u - 1].second = D;
+ break;
+ }
+ }
+ return D;
}
ScalarEvolution::BlockDisposition
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index c434b40..86a557b 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -176,8 +177,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
}
// Save the original insertion point so we can restore it when we're done.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
+ BuilderType::InsertPointGuard Guard(Builder);
// Move the insertion point out of as many loops as we can.
while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
@@ -191,13 +192,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
// If we haven't found this binop, insert it.
Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
- BO->setDebugLoc(SaveInsertPt->getDebugLoc());
+ BO->setDebugLoc(Loc);
rememberInstruction(BO);
- // Restore the original insert point.
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
-
return BO;
}
@@ -406,6 +403,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// without the other.
SplitAddRecs(Ops, Ty, SE);
+ Type *IntPtrTy = SE.TD
+ ? SE.TD->getIntPtrType(PTy)
+ : Type::getInt64Ty(PTy->getContext());
+
// Descend down the pointer's type and attempt to convert the other
// operands into GEP indices, at each level. The first index in a GEP
// indexes into the array implied by the pointer operand; the rest of
@@ -416,7 +417,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// array indexing.
SmallVector<const SCEV *, 8> ScaledOps;
if (ElTy->isSized()) {
- const SCEV *ElSize = SE.getSizeOfExpr(ElTy);
+ const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
if (!ElSize->isZero()) {
SmallVector<const SCEV *, 8> NewOps;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
@@ -548,8 +549,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
}
// Save the original insertion point so we can restore it when we're done.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPointGuard Guard(Builder);
// Move the insertion point out of as many loops as we can.
while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
@@ -565,16 +565,11 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
rememberInstruction(GEP);
- // Restore the original insert point.
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
-
return GEP;
}
// Save the original insertion point so we can restore it when we're done.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPoint SaveInsertPt = Builder.saveIP();
// Move the insertion point out of as many loops as we can.
while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
@@ -610,8 +605,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
rememberInstruction(GEP);
// Restore the original insert point.
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ Builder.restoreIP(SaveInsertPt);
return expand(SE.getAddExpr(Ops));
}
@@ -1076,8 +1070,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
}
// Save the original insertion point so we can restore it when we're done.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPointGuard Guard(Builder);
// Another AddRec may need to be recursively expanded below. For example, if
// this AddRec is quadratic, the StepV may itself be an AddRec in this
@@ -1144,10 +1137,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
PN->addIncoming(IncV, Pred);
}
- // Restore the original insert point.
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
-
// After expanding subexpressions, restore the PostIncLoops set so the caller
// can ensure that IVIncrement dominates the current uses.
PostIncLoops = SavedPostIncLoops;
@@ -1232,19 +1221,19 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
!ExpandTy->isPointerTy() && Step->isNonConstantNegative();
if (useSubtract)
Step = SE.getNegativeSCEV(Step);
- // Expand the step somewhere that dominates the loop header.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
- Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
- // Restore the insertion point to the place where the caller has
- // determined dominates all uses.
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ Value *StepV;
+ {
+ // Expand the step somewhere that dominates the loop header.
+ BuilderType::InsertPointGuard Guard(Builder);
+ StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+ }
Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
}
}
// Re-apply any non-loop-dominating scale.
if (PostLoopScale) {
+ assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
Result = InsertNoopCastOfTo(Result, IntTy);
Result = Builder.CreateMul(Result,
expandCodeFor(PostLoopScale, IntTy));
@@ -1289,16 +1278,14 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
S->getNoWrapFlags(SCEV::FlagNW)));
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
BasicBlock::iterator NewInsertPt =
llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
+ BuilderType::InsertPointGuard Guard(Builder);
while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
isa<LandingPadInst>(NewInsertPt))
++NewInsertPt;
V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
NewInsertPt);
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
}
@@ -1342,9 +1329,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
Header->begin());
rememberInstruction(CanonicalIV);
+ SmallSet<BasicBlock *, 4> PredSeen;
Constant *One = ConstantInt::get(Ty, 1);
for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
BasicBlock *HP = *HPI;
+ if (!PredSeen.insert(HP))
+ continue;
+
if (L->contains(HP)) {
// Insert a unit add instruction right before the terminator
// corresponding to the back-edge.
@@ -1527,8 +1518,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
if (I != InsertedExpressions.end())
return I->second;
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
// Expand the expression into instructions.
@@ -1541,8 +1531,6 @@ Value *SCEVExpander::expand(const SCEV *S) {
// a postinc expansion, it could be reused by a non postinc user, but only if
// its insertion point was already at the head of the loop.
InsertedExpressions[std::make_pair(S, InsertPt)] = V;
-
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
}
@@ -1553,10 +1541,6 @@ void SCEVExpander::rememberInstruction(Value *I) {
InsertedValues.insert(I);
}
-void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
- Builder.SetInsertPoint(BB, I);
-}
-
/// getOrInsertCanonicalInductionVariable - This method returns the
/// canonical induction variable of the specified type for the specified
/// loop (inserting one if there is none). A canonical induction variable
@@ -1572,11 +1556,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
// Emit code for it.
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ BuilderType::InsertPointGuard Guard(Builder);
PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
- if (SaveInsertBB)
- restoreInsertPoint(SaveInsertBB, SaveInsertPt);
return V;
}
@@ -1724,28 +1705,43 @@ namespace {
// Currently, we only allow division by a nonzero constant here. If this is
// inadequate, we could easily allow division by SCEVUnknown by using
// ValueTracking to check isKnownNonZero().
+//
+// We cannot generally expand recurrences unless the step dominates the loop
+// header. The expander handles the special case of affine recurrences by
+// scaling the recurrence outside the loop, but this technique isn't generally
+// applicable. Expanding a nested recurrence outside a loop requires computing
+// binomial coefficients. This could be done, but the recurrence has to be in a
+// perfectly reduced form, which can't be guaranteed.
struct SCEVFindUnsafe {
+ ScalarEvolution &SE;
bool IsUnsafe;
- SCEVFindUnsafe(): IsUnsafe(false) {}
+ SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
bool follow(const SCEV *S) {
- const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S);
- if (!D)
- return true;
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
- if (SC && !SC->getValue()->isZero())
- return true;
- IsUnsafe = true;
- return false;
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
+ if (!SC || SC->getValue()->isZero()) {
+ IsUnsafe = true;
+ return false;
+ }
+ }
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
+ IsUnsafe = true;
+ return false;
+ }
+ }
+ return true;
}
bool isDone() const { return IsUnsafe; }
};
}
namespace llvm {
-bool isSafeToExpand(const SCEV *S) {
- SCEVFindUnsafe Search;
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
+ SCEVFindUnsafe Search(SE);
visitAll(S, Search);
return !Search.IsUnsafe;
}
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index dd2ed4f..f110616 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -119,11 +119,19 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
switch (Kind) {
case NormalizeAutodetect:
- if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
- const SCEV *TransformedStep =
- TransformSubExpr(AR->getStepRecurrence(SE),
- User, OperandValToReplace);
- Result = SE.getMinusSCEV(Result, TransformedStep);
+ // Normalize this SCEV by subtracting the expression for the final step.
+ // We only allow affine AddRecs to be normalized, otherwise we would not
+ // be able to correctly denormalize.
+ // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2}
+ // Normalized form: {-2,+,1,+,2}
+ // Denormalized form: {1,+,3,+,2}
+ //
+ // However, denormalization would use the a different step expression than
+ // normalization (see getPostIncExpr), generating the wrong final
+ // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2}
+ if (AR->isAffine() &&
+ IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+ Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
Loops.insert(L);
}
#if 0
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 4ad7162..0353295 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -96,6 +96,11 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return PrevTTI->isLoweredToCall(F);
}
+void TargetTransformInfo::getUnrollingPreferences(Loop *L,
+ UnrollingPreferences &UP) const {
+ PrevTTI->getUnrollingPreferences(L, UP);
+}
+
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
return PrevTTI->isLegalAddImmediate(Imm);
}
@@ -145,6 +150,10 @@ TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
return PrevTTI->getPopcntSupport(IntTyWidthInBit);
}
+bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
+ return PrevTTI->haveFastSqrt(Ty);
+}
+
unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
return PrevTTI->getIntImmCost(Imm, Ty);
}
@@ -215,6 +224,11 @@ unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp,
return PrevTTI->getAddressComputationCost(Tp, IsComplex);
}
+unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
+ bool IsPairwise) const {
+ return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise);
+}
+
namespace {
struct NoTTI : ImmutablePass, TargetTransformInfo {
@@ -265,26 +279,34 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
// Otherwise, the default basic cost is used.
return TCC_Basic;
- case Instruction::IntToPtr:
+ case Instruction::IntToPtr: {
+ if (!DL)
+ return TCC_Basic;
+
// An inttoptr cast is free so long as the input is a legal integer type
// which doesn't contain values outside the range of a pointer.
- if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) &&
- OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits())
+ unsigned OpSize = OpTy->getScalarSizeInBits();
+ if (DL->isLegalInteger(OpSize) &&
+ OpSize <= DL->getPointerTypeSizeInBits(Ty))
return TCC_Free;
// Otherwise it's not a no-op.
return TCC_Basic;
+ }
+ case Instruction::PtrToInt: {
+ if (!DL)
+ return TCC_Basic;
- case Instruction::PtrToInt:
// A ptrtoint cast is free so long as the result is large enough to store
// the pointer, and a legal integer type.
- if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) &&
- Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits())
+ unsigned DestSize = Ty->getScalarSizeInBits();
+ if (DL->isLegalInteger(DestSize) &&
+ DestSize >= DL->getPointerTypeSizeInBits(OpTy))
return TCC_Free;
// Otherwise it's not a no-op.
return TCC_Basic;
-
+ }
case Instruction::Trunc:
// trunc to a native type is free (assuming the target has compare and
// shift-right of the same width).
@@ -457,6 +479,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
return true;
}
+ void getUnrollingPreferences(Loop *, UnrollingPreferences &) const { }
+
bool isLegalAddImmediate(int64_t Imm) const {
return false;
}
@@ -505,6 +529,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
return PSK_Software;
}
+ bool haveFastSqrt(Type *Ty) const {
+ return false;
+ }
+
unsigned getIntImmCost(const APInt &Imm, Type *Ty) const {
return 1;
}
@@ -569,6 +597,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
unsigned getAddressComputationCost(Type *Tp, bool) const {
return 0;
}
+
+ unsigned getReductionCost(unsigned, Type *, bool) const {
+ return 1;
+ }
};
} // end anonymous namespace
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index bbf3c3a..6791d4b 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -16,7 +16,12 @@
// typical C/C++ TBAA, but it can also be used to implement custom alias
// analysis behavior for other languages.
//
-// The current metadata format is very simple. TBAA MDNodes have up to
+// We now support two types of metadata format: scalar TBAA and struct-path
+// aware TBAA. After all testing cases are upgraded to use struct-path aware
+// TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA
+// can be dropped.
+//
+// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
// three fields, e.g.:
// !0 = metadata !{ metadata !"an example type tree" }
// !1 = metadata !{ metadata !"int", metadata !0 }
@@ -40,6 +45,65 @@
// should return true; see
// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
//
+// With struct-path aware TBAA, the MDNodes attached to an instruction using
+// "!tbaa" are called path tag nodes.
+//
+// The path tag node has 4 fields with the last field being optional.
+//
+// The first field is the base type node, it can be a struct type node
+// or a scalar type node. The second field is the access type node, it
+// must be a scalar type node. The third field is the offset into the base type.
+// The last field has the same meaning as the last field of our scalar TBAA:
+// it's an integer which if equal to 1 indicates that the access is "constant".
+//
+// The struct type node has a name and a list of pairs, one pair for each member
+// of the struct. The first element of each pair is a type node (a struct type
+// node or a sclar type node), specifying the type of the member, the second
+// element of each pair is the offset of the member.
+//
+// Given an example
+// typedef struct {
+// short s;
+// } A;
+// typedef struct {
+// uint16_t s;
+// A a;
+// } B;
+//
+// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store
+// instruction. The base type is !4 (struct B), the access type is !2 (scalar
+// type short) and the offset is 4.
+//
+// !0 = metadata !{metadata !"Simple C/C++ TBAA"}
+// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node
+// !2 = metadata !{metadata !"short", metadata !1} // Scalar type node
+// !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node
+// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4}
+// // Struct type node
+// !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node
+//
+// The struct type nodes and the scalar type nodes form a type DAG.
+// Root (!0)
+// char (!1) -- edge to Root
+// short (!2) -- edge to char
+// A (!3) -- edge with offset 0 to short
+// B (!4) -- edge with offset 0 to short and edge with offset 4 to A
+//
+// To check if two tags (tagX and tagY) can alias, we start from the base type
+// of tagX, follow the edge with the correct offset in the type DAG and adjust
+// the offset until we reach the base type of tagY or until we reach the Root
+// node.
+// If we reach the base type of tagY, compare the adjusted offset with
+// offset of tagY, return Alias if the offsets are the same, return NoAlias
+// otherwise.
+// If we reach the Root node, perform the above starting from base type of tagY
+// to see if we reach base type of tagX.
+//
+// If they have different roots, they're part of different potentially
+// unrelated type systems, so we return Alias to be conservative.
+// If neither node is an ancestor of the other and they have the same root,
+// then we say NoAlias.
+//
// TODO: The current metadata format doesn't support struct
// fields. For example:
// struct X {
@@ -71,7 +135,6 @@ using namespace llvm;
// achieved by stripping the !tbaa tags from IR, but this option is sometimes
// more convenient.
static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
-static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false));
namespace {
/// TBAANode - This is a simple wrapper around an MDNode which provides a
@@ -168,8 +231,12 @@ namespace {
if (Node->getNumOperands() < 2)
return TBAAStructTypeNode();
- // Special handling for a scalar type node.
+ // Fast path for a scalar type node and a struct type node with a single
+ // field.
if (Node->getNumOperands() <= 3) {
+ uint64_t Cur = Node->getNumOperands() == 2 ? 0 :
+ cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+ Offset -= Cur;
MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
if (!P)
return TBAAStructTypeNode();
@@ -259,12 +326,21 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AliasAnalysis::getAnalysisUsage(AU);
}
+/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
+/// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA
+/// format.
+static bool isStructPathTBAA(const MDNode *MD) {
+ // Anonymous TBAA root starts with a MDNode and dragonegg uses it as
+ // a TBAA tag.
+ return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
+}
+
/// Aliases - Test whether the type represented by A may alias the
/// type represented by B.
bool
TypeBasedAliasAnalysis::Aliases(const MDNode *A,
const MDNode *B) const {
- if (EnableStructPathTBAA)
+ if (isStructPathTBAA(A))
return PathAliases(A, B);
// Keep track of the root node for A and B.
@@ -397,8 +473,8 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
// If this is an "immutable" type, we can assume the pointer is pointing
// to constant memory.
- if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
- (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
+ if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
+ (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
return true;
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -414,8 +490,8 @@ TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
// If this is an "immutable" type, we can assume the call doesn't write
// to memory.
if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
- if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
- (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
+ if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
+ (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
Min = OnlyReadsMemory;
return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
@@ -458,6 +534,25 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
return AliasAnalysis::getModRefInfo(CS1, CS2);
}
+bool MDNode::isTBAAVtableAccess() const {
+ if (!isStructPathTBAA(this)) {
+ if (getNumOperands() < 1) return false;
+ if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
+ if (Tag1->getString() == "vtable pointer") return true;
+ }
+ return false;
+ }
+
+ // For struct-path aware TBAA, we use the access type of the tag.
+ if (getNumOperands() < 2) return false;
+ MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
+ if (!Tag) return false;
+ if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
+ if (Tag1->getString() == "vtable pointer") return true;
+ }
+ return false;
+}
+
MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
if (!A || !B)
return NULL;
@@ -466,7 +561,8 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
return A;
// For struct-path aware TBAA, we use the access type of the tag.
- if (EnableStructPathTBAA) {
+ bool StructPath = isStructPathTBAA(A);
+ if (StructPath) {
A = cast_or_null<MDNode>(A->getOperand(1));
if (!A) return 0;
B = cast_or_null<MDNode>(B->getOperand(1));
@@ -499,7 +595,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
--IA;
--IB;
}
- if (!EnableStructPathTBAA)
+ if (!StructPath)
return Ret;
if (!Ret)
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 4591af8..e39ee62 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalAlias.h"
@@ -39,8 +40,8 @@ const unsigned MaxDepth = 6;
static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
if (unsigned BitWidth = Ty->getScalarSizeInBits())
return BitWidth;
- assert(isa<PointerType>(Ty) && "Expected a pointer type!");
- return TD ? TD->getPointerSizeInBits() : 0;
+
+ return TD ? TD->getPointerTypeSizeInBits(Ty) : 0;
}
static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
@@ -629,9 +630,19 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
Value *Index = I->getOperand(i);
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
// Handle struct member offset arithmetic.
- if (!TD) return;
- const StructLayout *SL = TD->getStructLayout(STy);
+ if (!TD)
+ return;
+
+ // Handle case when index is vector zeroinitializer
+ Constant *CIndex = cast<Constant>(Index);
+ if (CIndex->isZeroValue())
+ continue;
+
+ if (CIndex->getType()->isVectorTy())
+ Index = CIndex->getSplatValue();
+
unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+ const StructLayout *SL = TD->getStructLayout(STy);
uint64_t Offset = SL->getElementOffset(Idx);
TrailZ = std::min<unsigned>(TrailZ,
countTrailingZeros(Offset));
@@ -749,7 +760,6 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
break;
}
- case Intrinsic::x86_sse42_crc32_64_8:
case Intrinsic::x86_sse42_crc32_64_64:
KnownZero = APInt::getHighBitsSet(64, 32);
break;
@@ -1704,20 +1714,24 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
/// it can be expressed as a base pointer plus a constant offset. Return the
/// base and offset to the caller.
Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const DataLayout *TD) {
+ const DataLayout *DL) {
// Without DataLayout, conservatively assume 64-bit offsets, which is
// the widest we support.
- unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+ unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(Ptr->getType()) : 64;
APInt ByteOffset(BitWidth, 0);
while (1) {
if (Ptr->getType()->isVectorTy())
break;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- APInt GEPOffset(BitWidth, 0);
- if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset))
- break;
- ByteOffset += GEPOffset;
+ if (DL) {
+ APInt GEPOffset(BitWidth, 0);
+ if (!GEP->accumulateConstantOffset(*DL, GEPOffset))
+ break;
+
+ ByteOffset += GEPOffset;
+ }
+
Ptr = GEP->getPointerOperand();
} else if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
Ptr = cast<Operator>(Ptr)->getOperand(0);
@@ -2050,7 +2064,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
/// isKnownNonNull - Return true if we know that the specified value is never
/// null.
-bool llvm::isKnownNonNull(const Value *V) {
+bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
// Alloca never returns null, malloc might.
if (isa<AllocaInst>(V)) return true;
@@ -2061,5 +2075,10 @@ bool llvm::isKnownNonNull(const Value *V) {
// Global values are not null unless extern weak.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return !GV->hasExternalWeakLinkage();
+
+ // operator new never returns null.
+ if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true))
+ return true;
+
return false;
}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 48675ac..1e6085b 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -478,12 +478,10 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(private);
KEYWORD(linker_private);
KEYWORD(linker_private_weak);
- KEYWORD(linker_private_weak_def_auto); // FIXME: For backwards compatibility.
KEYWORD(internal);
KEYWORD(available_externally);
KEYWORD(linkonce);
KEYWORD(linkonce_odr);
- KEYWORD(linkonce_odr_auto_hide);
KEYWORD(weak);
KEYWORD(weak_odr);
KEYWORD(appending);
@@ -540,6 +538,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(alignstack);
KEYWORD(inteldialect);
KEYWORD(gc);
+ KEYWORD(prefix);
KEYWORD(ccc);
KEYWORD(fastcc);
@@ -558,6 +557,8 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(intel_ocl_bicc);
KEYWORD(x86_64_sysvcc);
KEYWORD(x86_64_win64cc);
+ KEYWORD(webkit_jscc);
+ KEYWORD(anyregcc);
KEYWORD(cc);
KEYWORD(c);
@@ -583,6 +584,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(noredzone);
KEYWORD(noreturn);
KEYWORD(nounwind);
+ KEYWORD(optnone);
KEYWORD(optsize);
KEYWORD(readnone);
KEYWORD(readonly);
@@ -663,6 +665,7 @@ lltok::Kind LLLexer::LexIdentifier() {
INSTKEYWORD(inttoptr, IntToPtr);
INSTKEYWORD(ptrtoint, PtrToInt);
INSTKEYWORD(bitcast, BitCast);
+ INSTKEYWORD(addrspacecast, AddrSpaceCast);
INSTKEYWORD(select, Select);
INSTKEYWORD(va_arg, VAArg);
INSTKEYWORD(ret, Ret);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 62a07f5..3b903cd 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueSymbolTable.h"
@@ -65,6 +66,9 @@ bool LLParser::ValidateEndOfModule() {
ForwardRefInstMetadata.clear();
}
+ for (unsigned I = 0, E = InstsWithTBAATag.size(); I < E; I++)
+ UpgradeInstWithTBAATag(InstsWithTBAATag[I]);
+
// Handle any function attribute group forward references.
for (std::map<Value*, std::vector<unsigned> >::iterator
I = ForwardRefAttrGroups.begin(), E = ForwardRefAttrGroups.end();
@@ -178,6 +182,8 @@ bool LLParser::ValidateEndOfModule() {
for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
+ UpgradeDebugInfo(*M);
+
return false;
}
@@ -242,13 +248,11 @@ bool LLParser::ParseTopLevelEntities() {
case lltok::kw_private: // OptionalLinkage
case lltok::kw_linker_private: // OptionalLinkage
case lltok::kw_linker_private_weak: // OptionalLinkage
- case lltok::kw_linker_private_weak_def_auto: // FIXME: backwards compat.
case lltok::kw_internal: // OptionalLinkage
case lltok::kw_weak: // OptionalLinkage
case lltok::kw_weak_odr: // OptionalLinkage
case lltok::kw_linkonce: // OptionalLinkage
case lltok::kw_linkonce_odr: // OptionalLinkage
- case lltok::kw_linkonce_odr_auto_hide: // OptionalLinkage
case lltok::kw_appending: // OptionalLinkage
case lltok::kw_dllexport: // OptionalLinkage
case lltok::kw_common: // OptionalLinkage
@@ -623,18 +627,14 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
unsigned Visibility) {
assert(Lex.getKind() == lltok::kw_alias);
Lex.Lex();
- unsigned Linkage;
LocTy LinkageLoc = Lex.getLoc();
- if (ParseOptionalLinkage(Linkage))
+ unsigned L;
+ if (ParseOptionalLinkage(L))
return true;
- if (Linkage != GlobalValue::ExternalLinkage &&
- Linkage != GlobalValue::WeakAnyLinkage &&
- Linkage != GlobalValue::WeakODRLinkage &&
- Linkage != GlobalValue::InternalLinkage &&
- Linkage != GlobalValue::PrivateLinkage &&
- Linkage != GlobalValue::LinkerPrivateLinkage &&
- Linkage != GlobalValue::LinkerPrivateWeakLinkage)
+ GlobalValue::LinkageTypes Linkage = (GlobalValue::LinkageTypes) L;
+
+ if(!GlobalAlias::isValidLinkage(Linkage))
return Error(LinkageLoc, "invalid linkage type for alias");
Constant *Aliasee;
@@ -922,6 +922,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break;
case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break;
case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break;
+ case lltok::kw_optnone: B.addAttribute(Attribute::OptimizeNone); break;
case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break;
case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break;
case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break;
@@ -1180,6 +1181,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_noredzone:
case lltok::kw_noreturn:
case lltok::kw_nounwind:
+ case lltok::kw_optnone:
case lltok::kw_optsize:
case lltok::kw_returns_twice:
case lltok::kw_sanitize_address:
@@ -1238,6 +1240,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_noredzone:
case lltok::kw_noreturn:
case lltok::kw_nounwind:
+ case lltok::kw_optnone:
case lltok::kw_optsize:
case lltok::kw_returns_twice:
case lltok::kw_sanitize_address:
@@ -1269,7 +1272,6 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
/// ::= 'weak_odr'
/// ::= 'linkonce'
/// ::= 'linkonce_odr'
-/// ::= 'linkonce_odr_auto_hide'
/// ::= 'available_externally'
/// ::= 'appending'
/// ::= 'dllexport'
@@ -1291,10 +1293,6 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break;
case lltok::kw_linkonce: Res = GlobalValue::LinkOnceAnyLinkage; break;
case lltok::kw_linkonce_odr: Res = GlobalValue::LinkOnceODRLinkage; break;
- case lltok::kw_linkonce_odr_auto_hide:
- case lltok::kw_linker_private_weak_def_auto: // FIXME: For backwards compat.
- Res = GlobalValue::LinkOnceODRAutoHideLinkage;
- break;
case lltok::kw_available_externally:
Res = GlobalValue::AvailableExternallyLinkage;
break;
@@ -1346,6 +1344,8 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
/// ::= 'spir_kernel'
/// ::= 'x86_64_sysvcc'
/// ::= 'x86_64_win64cc'
+/// ::= 'webkit_jscc'
+/// ::= 'anyregcc'
/// ::= 'cc' UINT
///
bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
@@ -1368,6 +1368,8 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break;
case lltok::kw_x86_64_sysvcc: CC = CallingConv::X86_64_SysV; break;
case lltok::kw_x86_64_win64cc: CC = CallingConv::X86_64_Win64; break;
+ case lltok::kw_webkit_jscc: CC = CallingConv::WebKit_JS; break;
+ case lltok::kw_anyregcc: CC = CallingConv::AnyReg; break;
case lltok::kw_cc: {
unsigned ArbitraryCC;
Lex.Lex();
@@ -1424,6 +1426,9 @@ bool LLParser::ParseInstructionMetadata(Instruction *Inst,
}
}
+ if (MDK == LLVMContext::MD_tbaa)
+ InstsWithTBAATag.push_back(Inst);
+
// If this is the end of the list, we're done.
} while (EatIfPresent(lltok::comma));
return false;
@@ -2383,7 +2388,6 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
Lex.Lex();
ValID Fn, Label;
- LocTy FnLoc, LabelLoc;
if (ParseToken(lltok::lparen, "expected '(' in block address expression") ||
ParseValID(Fn) ||
@@ -2413,6 +2417,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
case lltok::kw_fptrunc:
case lltok::kw_fpext:
case lltok::kw_bitcast:
+ case lltok::kw_addrspacecast:
case lltok::kw_uitofp:
case lltok::kw_sitofp:
case lltok::kw_fptoui:
@@ -2919,7 +2924,7 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
/// FunctionHeader
/// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
/// OptUnnamedAddr Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
-/// OptionalAlign OptGC
+/// OptionalAlign OptGC OptionalPrefix
bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
// Parse the linkage.
LocTy LinkageLoc = Lex.getLoc();
@@ -2953,7 +2958,6 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
case GlobalValue::AvailableExternallyLinkage:
case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::LinkOnceODRLinkage:
- case GlobalValue::LinkOnceODRAutoHideLinkage:
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
case GlobalValue::DLLExportLinkage:
@@ -2998,6 +3002,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
std::string GC;
bool UnnamedAddr;
LocTy UnnamedAddrLoc;
+ Constant *Prefix = 0;
if (ParseArgumentList(ArgList, isVarArg) ||
ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
@@ -3008,7 +3013,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
ParseStringConstant(Section)) ||
ParseOptionalAlignment(Alignment) ||
(EatIfPresent(lltok::kw_gc) &&
- ParseStringConstant(GC)))
+ ParseStringConstant(GC)) ||
+ (EatIfPresent(lltok::kw_prefix) &&
+ ParseGlobalTypeAndValue(Prefix)))
return true;
if (FuncAttrs.contains(Attribute::Builtin))
@@ -3106,6 +3113,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
Fn->setAlignment(Alignment);
Fn->setSection(Section);
if (!GC.empty()) Fn->setGC(GC.c_str());
+ Fn->setPrefixData(Prefix);
ForwardRefAttrGroups[Fn] = FwdRefAttrGrps;
// Add all of the arguments we parsed to the function.
@@ -3171,7 +3179,6 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
// Parse the instructions in this block until we get a terminator.
Instruction *Inst;
- SmallVector<std::pair<unsigned, MDNode *>, 4> MetadataOnInst;
do {
// This instruction may have three possibilities for a name: a) none
// specified, b) name specified "%foo =", c) number specified: "%4 =".
@@ -3299,6 +3306,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_fptrunc:
case lltok::kw_fpext:
case lltok::kw_bitcast:
+ case lltok::kw_addrspacecast:
case lltok::kw_uitofp:
case lltok::kw_sitofp:
case lltok::kw_fptoui:
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 594281e..ded776c 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -107,6 +107,8 @@ namespace llvm {
};
DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
+ SmallVector<Instruction*, 64> InstsWithTBAATag;
+
// Type resolution handling data structures. The location is set when we
// have processed a use of the type but not a definition yet.
StringMap<std::pair<Type*, LocTy> > NamedTypes;
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 9cf4c2c..786d84d 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -38,9 +38,8 @@ namespace lltok {
kw_global, kw_constant,
kw_private, kw_linker_private, kw_linker_private_weak,
- kw_linker_private_weak_def_auto, // FIXME: For backwards compatibility.
kw_internal,
- kw_linkonce, kw_linkonce_odr, kw_linkonce_odr_auto_hide,
+ kw_linkonce, kw_linkonce_odr,
kw_weak, kw_weak_odr, kw_appending,
kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
kw_default, kw_hidden, kw_protected,
@@ -81,6 +80,7 @@ namespace lltok {
kw_alignstack,
kw_inteldialect,
kw_gc,
+ kw_prefix,
kw_c,
kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
@@ -91,6 +91,7 @@ namespace lltok {
kw_ptx_kernel, kw_ptx_device,
kw_spir_kernel, kw_spir_func,
kw_x86_64_sysvcc, kw_x86_64_win64cc,
+ kw_webkit_jscc, kw_anyregcc,
// Attributes:
kw_attributes,
@@ -114,6 +115,7 @@ namespace lltok {
kw_noredzone,
kw_noreturn,
kw_nounwind,
+ kw_optnone,
kw_optsize,
kw_readnone,
kw_readonly,
@@ -148,6 +150,7 @@ namespace lltok {
kw_phi, kw_call,
kw_trunc, kw_zext, kw_sext, kw_fptrunc, kw_fpext, kw_uitofp, kw_sitofp,
kw_fptoui, kw_fptosi, kw_inttoptr, kw_ptrtoint, kw_bitcast,
+ kw_addrspacecast,
kw_select, kw_va_arg,
kw_landingpad, kw_personality, kw_cleanup, kw_catch, kw_filter,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index e6d7b50..ce3b7d1 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Operator.h"
@@ -89,7 +90,6 @@ static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
case 12: return GlobalValue::AvailableExternallyLinkage;
case 13: return GlobalValue::LinkerPrivateLinkage;
case 14: return GlobalValue::LinkerPrivateWeakLinkage;
- case 15: return GlobalValue::LinkOnceODRAutoHideLinkage;
}
}
@@ -128,6 +128,7 @@ static int GetDecodedCastOpcode(unsigned Val) {
case bitc::CAST_PTRTOINT: return Instruction::PtrToInt;
case bitc::CAST_INTTOPTR: return Instruction::IntToPtr;
case bitc::CAST_BITCAST : return Instruction::BitCast;
+ case bitc::CAST_ADDRSPACECAST: return Instruction::AddrSpaceCast;
}
}
static int GetDecodedBinaryOpcode(unsigned Val, Type *Ty) {
@@ -450,12 +451,12 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B,
(EncodedAttrs & 0xffff));
}
-bool BitcodeReader::ParseAttributeBlock() {
+error_code BitcodeReader::ParseAttributeBlock() {
if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
- return Error("Malformed block record");
+ return Error(InvalidRecord);
if (!MAttributes.empty())
- return Error("Multiple PARAMATTR blocks found!");
+ return Error(InvalidMultipleBlocks);
SmallVector<uint64_t, 64> Record;
@@ -468,9 +469,9 @@ bool BitcodeReader::ParseAttributeBlock() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error("Error at end of PARAMATTR block");
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return false;
+ return error_code::success();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -484,7 +485,7 @@ bool BitcodeReader::ParseAttributeBlock() {
case bitc::PARAMATTR_CODE_ENTRY_OLD: { // ENTRY: [paramidx0, attr0, ...]
// FIXME: Remove in 4.0.
if (Record.size() & 1)
- return Error("Invalid ENTRY record");
+ return Error(InvalidRecord);
for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
AttrBuilder B;
@@ -508,131 +509,102 @@ bool BitcodeReader::ParseAttributeBlock() {
}
}
-bool BitcodeReader::ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind) {
+// Returns Attribute::None on unrecognized codes.
+static Attribute::AttrKind GetAttrFromCode(uint64_t Code) {
switch (Code) {
+ default:
+ return Attribute::None;
case bitc::ATTR_KIND_ALIGNMENT:
- *Kind = Attribute::Alignment;
- return false;
+ return Attribute::Alignment;
case bitc::ATTR_KIND_ALWAYS_INLINE:
- *Kind = Attribute::AlwaysInline;
- return false;
+ return Attribute::AlwaysInline;
case bitc::ATTR_KIND_BUILTIN:
- *Kind = Attribute::Builtin;
- return false;
+ return Attribute::Builtin;
case bitc::ATTR_KIND_BY_VAL:
- *Kind = Attribute::ByVal;
- return false;
+ return Attribute::ByVal;
case bitc::ATTR_KIND_COLD:
- *Kind = Attribute::Cold;
- return false;
+ return Attribute::Cold;
case bitc::ATTR_KIND_INLINE_HINT:
- *Kind = Attribute::InlineHint;
- return false;
+ return Attribute::InlineHint;
case bitc::ATTR_KIND_IN_REG:
- *Kind = Attribute::InReg;
- return false;
+ return Attribute::InReg;
case bitc::ATTR_KIND_MIN_SIZE:
- *Kind = Attribute::MinSize;
- return false;
+ return Attribute::MinSize;
case bitc::ATTR_KIND_NAKED:
- *Kind = Attribute::Naked;
- return false;
+ return Attribute::Naked;
case bitc::ATTR_KIND_NEST:
- *Kind = Attribute::Nest;
- return false;
+ return Attribute::Nest;
case bitc::ATTR_KIND_NO_ALIAS:
- *Kind = Attribute::NoAlias;
- return false;
+ return Attribute::NoAlias;
case bitc::ATTR_KIND_NO_BUILTIN:
- *Kind = Attribute::NoBuiltin;
- return false;
+ return Attribute::NoBuiltin;
case bitc::ATTR_KIND_NO_CAPTURE:
- *Kind = Attribute::NoCapture;
- return false;
+ return Attribute::NoCapture;
case bitc::ATTR_KIND_NO_DUPLICATE:
- *Kind = Attribute::NoDuplicate;
- return false;
+ return Attribute::NoDuplicate;
case bitc::ATTR_KIND_NO_IMPLICIT_FLOAT:
- *Kind = Attribute::NoImplicitFloat;
- return false;
+ return Attribute::NoImplicitFloat;
case bitc::ATTR_KIND_NO_INLINE:
- *Kind = Attribute::NoInline;
- return false;
+ return Attribute::NoInline;
case bitc::ATTR_KIND_NON_LAZY_BIND:
- *Kind = Attribute::NonLazyBind;
- return false;
+ return Attribute::NonLazyBind;
case bitc::ATTR_KIND_NO_RED_ZONE:
- *Kind = Attribute::NoRedZone;
- return false;
+ return Attribute::NoRedZone;
case bitc::ATTR_KIND_NO_RETURN:
- *Kind = Attribute::NoReturn;
- return false;
+ return Attribute::NoReturn;
case bitc::ATTR_KIND_NO_UNWIND:
- *Kind = Attribute::NoUnwind;
- return false;
+ return Attribute::NoUnwind;
case bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE:
- *Kind = Attribute::OptimizeForSize;
- return false;
+ return Attribute::OptimizeForSize;
+ case bitc::ATTR_KIND_OPTIMIZE_NONE:
+ return Attribute::OptimizeNone;
case bitc::ATTR_KIND_READ_NONE:
- *Kind = Attribute::ReadNone;
- return false;
+ return Attribute::ReadNone;
case bitc::ATTR_KIND_READ_ONLY:
- *Kind = Attribute::ReadOnly;
- return false;
+ return Attribute::ReadOnly;
case bitc::ATTR_KIND_RETURNED:
- *Kind = Attribute::Returned;
- return false;
+ return Attribute::Returned;
case bitc::ATTR_KIND_RETURNS_TWICE:
- *Kind = Attribute::ReturnsTwice;
- return false;
+ return Attribute::ReturnsTwice;
case bitc::ATTR_KIND_S_EXT:
- *Kind = Attribute::SExt;
- return false;
+ return Attribute::SExt;
case bitc::ATTR_KIND_STACK_ALIGNMENT:
- *Kind = Attribute::StackAlignment;
- return false;
+ return Attribute::StackAlignment;
case bitc::ATTR_KIND_STACK_PROTECT:
- *Kind = Attribute::StackProtect;
- return false;
+ return Attribute::StackProtect;
case bitc::ATTR_KIND_STACK_PROTECT_REQ:
- *Kind = Attribute::StackProtectReq;
- return false;
+ return Attribute::StackProtectReq;
case bitc::ATTR_KIND_STACK_PROTECT_STRONG:
- *Kind = Attribute::StackProtectStrong;
- return false;
+ return Attribute::StackProtectStrong;
case bitc::ATTR_KIND_STRUCT_RET:
- *Kind = Attribute::StructRet;
- return false;
+ return Attribute::StructRet;
case bitc::ATTR_KIND_SANITIZE_ADDRESS:
- *Kind = Attribute::SanitizeAddress;
- return false;
+ return Attribute::SanitizeAddress;
case bitc::ATTR_KIND_SANITIZE_THREAD:
- *Kind = Attribute::SanitizeThread;
- return false;
+ return Attribute::SanitizeThread;
case bitc::ATTR_KIND_SANITIZE_MEMORY:
- *Kind = Attribute::SanitizeMemory;
- return false;
+ return Attribute::SanitizeMemory;
case bitc::ATTR_KIND_UW_TABLE:
- *Kind = Attribute::UWTable;
- return false;
+ return Attribute::UWTable;
case bitc::ATTR_KIND_Z_EXT:
- *Kind = Attribute::ZExt;
- return false;
- default:
- std::string Buf;
- raw_string_ostream fmt(Buf);
- fmt << "Unknown attribute kind (" << Code << ")";
- fmt.flush();
- return Error(Buf.c_str());
+ return Attribute::ZExt;
}
}
-bool BitcodeReader::ParseAttributeGroupBlock() {
+error_code BitcodeReader::ParseAttrKind(uint64_t Code,
+ Attribute::AttrKind *Kind) {
+ *Kind = GetAttrFromCode(Code);
+ if (*Kind == Attribute::None)
+ return Error(InvalidValue);
+ return error_code::success();
+}
+
+error_code BitcodeReader::ParseAttributeGroupBlock() {
if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID))
- return Error("Malformed block record");
+ return Error(InvalidRecord);
if (!MAttributeGroups.empty())
- return Error("Multiple PARAMATTR_GROUP blocks found!");
+ return Error(InvalidMultipleBlocks);
SmallVector<uint64_t, 64> Record;
@@ -643,9 +615,9 @@ bool BitcodeReader::ParseAttributeGroupBlock() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error("Error at end of PARAMATTR_GROUP block");
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return false;
+ return error_code::success();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -658,7 +630,7 @@ bool BitcodeReader::ParseAttributeGroupBlock() {
break;
case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...]
if (Record.size() < 3)
- return Error("Invalid ENTRY record");
+ return Error(InvalidRecord);
uint64_t GrpID = Record[0];
uint64_t Idx = Record[1]; // Index of the object this attribute refers to.
@@ -667,14 +639,14 @@ bool BitcodeReader::ParseAttributeGroupBlock() {
for (unsigned i = 2, e = Record.size(); i != e; ++i) {
if (Record[i] == 0) { // Enum attribute
Attribute::AttrKind Kind;
- if (ParseAttrKind(Record[++i], &Kind))
- return true;
+ if (error_code EC = ParseAttrKind(Record[++i], &Kind))
+ return EC;
B.addAttribute(Kind);
} else if (Record[i] == 1) { // Align attribute
Attribute::AttrKind Kind;
- if (ParseAttrKind(Record[++i], &Kind))
- return true;
+ if (error_code EC = ParseAttrKind(Record[++i], &Kind))
+ return EC;
if (Kind == Attribute::Alignment)
B.addAlignmentAttr(Record[++i]);
else
@@ -709,16 +681,16 @@ bool BitcodeReader::ParseAttributeGroupBlock() {
}
}
-bool BitcodeReader::ParseTypeTable() {
+error_code BitcodeReader::ParseTypeTable() {
if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
- return Error("Malformed block record");
+ return Error(InvalidRecord);
return ParseTypeTableBody();
}
-bool BitcodeReader::ParseTypeTableBody() {
+error_code BitcodeReader::ParseTypeTableBody() {
if (!TypeList.empty())
- return Error("Multiple TYPE_BLOCKs found!");
+ return Error(InvalidMultipleBlocks);
SmallVector<uint64_t, 64> Record;
unsigned NumRecords = 0;
@@ -732,12 +704,11 @@ bool BitcodeReader::ParseTypeTableBody() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- Error("Error in the type table block");
- return true;
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
if (NumRecords != TypeList.size())
- return Error("Invalid type forward reference in TYPE_BLOCK");
- return false;
+ return Error(MalformedBlock);
+ return error_code::success();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -747,12 +718,13 @@ bool BitcodeReader::ParseTypeTableBody() {
Record.clear();
Type *ResultTy = 0;
switch (Stream.readRecord(Entry.ID, Record)) {
- default: return Error("unknown type in type table");
+ default:
+ return Error(InvalidValue);
case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
// TYPE_CODE_NUMENTRY contains a count of the number of types in the
// type list. This allows us to reserve space.
if (Record.size() < 1)
- return Error("Invalid TYPE_CODE_NUMENTRY record");
+ return Error(InvalidRecord);
TypeList.resize(Record[0]);
continue;
case bitc::TYPE_CODE_VOID: // VOID
@@ -787,19 +759,20 @@ bool BitcodeReader::ParseTypeTableBody() {
break;
case bitc::TYPE_CODE_INTEGER: // INTEGER: [width]
if (Record.size() < 1)
- return Error("Invalid Integer type record");
+ return Error(InvalidRecord);
ResultTy = IntegerType::get(Context, Record[0]);
break;
case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
// [pointee type, address space]
if (Record.size() < 1)
- return Error("Invalid POINTER type record");
+ return Error(InvalidRecord);
unsigned AddressSpace = 0;
if (Record.size() == 2)
AddressSpace = Record[1];
ResultTy = getTypeByID(Record[0]);
- if (ResultTy == 0) return Error("invalid element type in pointer type");
+ if (ResultTy == 0)
+ return Error(InvalidType);
ResultTy = PointerType::get(ResultTy, AddressSpace);
break;
}
@@ -807,7 +780,7 @@ bool BitcodeReader::ParseTypeTableBody() {
// FIXME: attrid is dead, remove it in LLVM 4.0
// FUNCTION: [vararg, attrid, retty, paramty x N]
if (Record.size() < 3)
- return Error("Invalid FUNCTION type record");
+ return Error(InvalidRecord);
SmallVector<Type*, 8> ArgTys;
for (unsigned i = 3, e = Record.size(); i != e; ++i) {
if (Type *T = getTypeByID(Record[i]))
@@ -818,7 +791,7 @@ bool BitcodeReader::ParseTypeTableBody() {
ResultTy = getTypeByID(Record[2]);
if (ResultTy == 0 || ArgTys.size() < Record.size()-3)
- return Error("invalid type in function type");
+ return Error(InvalidType);
ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
break;
@@ -826,7 +799,7 @@ bool BitcodeReader::ParseTypeTableBody() {
case bitc::TYPE_CODE_FUNCTION: {
// FUNCTION: [vararg, retty, paramty x N]
if (Record.size() < 2)
- return Error("Invalid FUNCTION type record");
+ return Error(InvalidRecord);
SmallVector<Type*, 8> ArgTys;
for (unsigned i = 2, e = Record.size(); i != e; ++i) {
if (Type *T = getTypeByID(Record[i]))
@@ -837,14 +810,14 @@ bool BitcodeReader::ParseTypeTableBody() {
ResultTy = getTypeByID(Record[1]);
if (ResultTy == 0 || ArgTys.size() < Record.size()-2)
- return Error("invalid type in function type");
+ return Error(InvalidType);
ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
break;
}
case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N]
if (Record.size() < 1)
- return Error("Invalid STRUCT type record");
+ return Error(InvalidRecord);
SmallVector<Type*, 8> EltTys;
for (unsigned i = 1, e = Record.size(); i != e; ++i) {
if (Type *T = getTypeByID(Record[i]))
@@ -853,21 +826,21 @@ bool BitcodeReader::ParseTypeTableBody() {
break;
}
if (EltTys.size() != Record.size()-1)
- return Error("invalid type in struct type");
+ return Error(InvalidType);
ResultTy = StructType::get(Context, EltTys, Record[0]);
break;
}
case bitc::TYPE_CODE_STRUCT_NAME: // STRUCT_NAME: [strchr x N]
if (ConvertToString(Record, 0, TypeName))
- return Error("Invalid STRUCT_NAME record");
+ return Error(InvalidRecord);
continue;
case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
if (Record.size() < 1)
- return Error("Invalid STRUCT type record");
+ return Error(InvalidRecord);
if (NumRecords >= TypeList.size())
- return Error("invalid TYPE table");
+ return Error(InvalidTYPETable);
// Check to see if this was forward referenced, if so fill in the temp.
StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
@@ -886,17 +859,17 @@ bool BitcodeReader::ParseTypeTableBody() {
break;
}
if (EltTys.size() != Record.size()-1)
- return Error("invalid STRUCT type record");
+ return Error(InvalidRecord);
Res->setBody(EltTys, Record[0]);
ResultTy = Res;
break;
}
case bitc::TYPE_CODE_OPAQUE: { // OPAQUE: []
if (Record.size() != 1)
- return Error("Invalid OPAQUE type record");
+ return Error(InvalidRecord);
if (NumRecords >= TypeList.size())
- return Error("invalid TYPE table");
+ return Error(InvalidTYPETable);
// Check to see if this was forward referenced, if so fill in the temp.
StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
@@ -911,33 +884,33 @@ bool BitcodeReader::ParseTypeTableBody() {
}
case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
if (Record.size() < 2)
- return Error("Invalid ARRAY type record");
+ return Error(InvalidRecord);
if ((ResultTy = getTypeByID(Record[1])))
ResultTy = ArrayType::get(ResultTy, Record[0]);
else
- return Error("Invalid ARRAY type element");
+ return Error(InvalidType);
break;
case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty]
if (Record.size() < 2)
- return Error("Invalid VECTOR type record");
+ return Error(InvalidRecord);
if ((ResultTy = getTypeByID(Record[1])))
ResultTy = VectorType::get(ResultTy, Record[0]);
else
- return Error("Invalid ARRAY type element");
+ return Error(InvalidType);
break;
}
if (NumRecords >= TypeList.size())
- return Error("invalid TYPE table");
+ return Error(InvalidTYPETable);
assert(ResultTy && "Didn't read a type?");
assert(TypeList[NumRecords] == 0 && "Already read type?");
TypeList[NumRecords++] = ResultTy;
}
}
-bool BitcodeReader::ParseValueSymbolTable() {
+error_code BitcodeReader::ParseValueSymbolTable() {
if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
- return Error("Malformed block record");
+ return Error(InvalidRecord);
SmallVector<uint64_t, 64> Record;
@@ -949,9 +922,9 @@ bool BitcodeReader::ParseValueSymbolTable() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error("malformed value symbol table block");
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return false;
+ return error_code::success();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -964,10 +937,10 @@ bool BitcodeReader::ParseValueSymbolTable() {
break;
case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N]
if (ConvertToString(Record, 1, ValueName))
- return Error("Invalid VST_ENTRY record");
+ return Error(InvalidRecord);
unsigned ValueID = Record[0];
if (ValueID >= ValueList.size())
- return Error("Invalid Value ID in VST_ENTRY record");
+ return Error(InvalidRecord);
Value *V = ValueList[ValueID];
V->setName(StringRef(ValueName.data(), ValueName.size()));
@@ -976,10 +949,10 @@ bool BitcodeReader::ParseValueSymbolTable() {
}
case bitc::VST_CODE_BBENTRY: {
if (ConvertToString(Record, 1, ValueName))
- return Error("Invalid VST_BBENTRY record");
+ return Error(InvalidRecord);
BasicBlock *BB = getBasicBlock(Record[0]);
if (BB == 0)
- return Error("Invalid BB ID in VST_BBENTRY record");
+ return Error(InvalidRecord);
BB->setName(StringRef(ValueName.data(), ValueName.size()));
ValueName.clear();
@@ -989,11 +962,11 @@ bool BitcodeReader::ParseValueSymbolTable() {
}
}
-bool BitcodeReader::ParseMetadata() {
+error_code BitcodeReader::ParseMetadata() {
unsigned NextMDValueNo = MDValueList.size();
if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
- return Error("Malformed block record");
+ return Error(InvalidRecord);
SmallVector<uint64_t, 64> Record;
@@ -1004,10 +977,9 @@ bool BitcodeReader::ParseMetadata() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- Error("malformed metadata block");
- return true;
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return false;
+ return error_code::success();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -1036,7 +1008,7 @@ bool BitcodeReader::ParseMetadata() {
for (unsigned i = 0; i != Size; ++i) {
MDNode *MD = dyn_cast<MDNode>(MDValueList.getValueFwdRef(Record[i]));
if (MD == 0)
- return Error("Malformed metadata record");
+ return Error(InvalidRecord);
NMD->addOperand(MD);
}
break;
@@ -1046,13 +1018,14 @@ bool BitcodeReader::ParseMetadata() {
// fall-through
case bitc::METADATA_NODE: {
if (Record.size() % 2 == 1)
- return Error("Invalid METADATA_NODE record");
+ return Error(InvalidRecord);
unsigned Size = Record.size();
SmallVector<Value*, 8> Elts;
for (unsigned i = 0; i != Size; i += 2) {
Type *Ty = getTypeByID(Record[i]);
- if (!Ty) return Error("Invalid METADATA_NODE record");
+ if (!Ty)
+ return Error(InvalidRecord);
if (Ty->isMetadataTy())
Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
else if (!Ty->isVoidTy())
@@ -1073,14 +1046,14 @@ bool BitcodeReader::ParseMetadata() {
}
case bitc::METADATA_KIND: {
if (Record.size() < 2)
- return Error("Invalid METADATA_KIND record");
+ return Error(InvalidRecord);
unsigned Kind = Record[0];
SmallString<8> Name(Record.begin()+1, Record.end());
unsigned NewKind = TheModule->getMDKindID(Name.str());
if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
- return Error("Conflicting METADATA_KIND records");
+ return Error(ConflictingMETADATA_KINDRecords);
break;
}
}
@@ -1100,12 +1073,14 @@ uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
/// ResolveGlobalAndAliasInits - Resolve all of the initializers for global
/// values and aliases that we can.
-bool BitcodeReader::ResolveGlobalAndAliasInits() {
+error_code BitcodeReader::ResolveGlobalAndAliasInits() {
std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitWorklist;
std::vector<std::pair<GlobalAlias*, unsigned> > AliasInitWorklist;
+ std::vector<std::pair<Function*, unsigned> > FunctionPrefixWorklist;
GlobalInitWorklist.swap(GlobalInits);
AliasInitWorklist.swap(AliasInits);
+ FunctionPrefixWorklist.swap(FunctionPrefixes);
while (!GlobalInitWorklist.empty()) {
unsigned ValID = GlobalInitWorklist.back().second;
@@ -1116,7 +1091,7 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() {
if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
GlobalInitWorklist.back().first->setInitializer(C);
else
- return Error("Global variable initializer is not a constant!");
+ return Error(ExpectedConstant);
}
GlobalInitWorklist.pop_back();
}
@@ -1129,11 +1104,25 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() {
if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
AliasInitWorklist.back().first->setAliasee(C);
else
- return Error("Alias initializer is not a constant!");
+ return Error(ExpectedConstant);
}
AliasInitWorklist.pop_back();
}
- return false;
+
+ while (!FunctionPrefixWorklist.empty()) {
+ unsigned ValID = FunctionPrefixWorklist.back().second;
+ if (ValID >= ValueList.size()) {
+ FunctionPrefixes.push_back(FunctionPrefixWorklist.back());
+ } else {
+ if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
+ FunctionPrefixWorklist.back().first->setPrefixData(C);
+ else
+ return Error(ExpectedConstant);
+ }
+ FunctionPrefixWorklist.pop_back();
+ }
+
+ return error_code::success();
}
static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
@@ -1144,9 +1133,9 @@ static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
return APInt(TypeBits, Words);
}
-bool BitcodeReader::ParseConstants() {
+error_code BitcodeReader::ParseConstants() {
if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
- return Error("Malformed block record");
+ return Error(InvalidRecord);
SmallVector<uint64_t, 64> Record;
@@ -1159,15 +1148,15 @@ bool BitcodeReader::ParseConstants() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error("malformed block record in AST file");
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
if (NextCstNo != ValueList.size())
- return Error("Invalid constant reference!");
+ return Error(InvalidConstantReference);
// Once all the constants have been read, go through and resolve forward
// references.
ValueList.ResolveConstantForwardRefs();
- return false;
+ return error_code::success();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -1184,9 +1173,9 @@ bool BitcodeReader::ParseConstants() {
break;
case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid]
if (Record.empty())
- return Error("Malformed CST_SETTYPE record");
+ return Error(InvalidRecord);
if (Record[0] >= TypeList.size())
- return Error("Invalid Type ID in CST_SETTYPE record");
+ return Error(InvalidRecord);
CurTy = TypeList[Record[0]];
continue; // Skip the ValueList manipulation.
case bitc::CST_CODE_NULL: // NULL
@@ -1194,12 +1183,12 @@ bool BitcodeReader::ParseConstants() {
break;
case bitc::CST_CODE_INTEGER: // INTEGER: [intval]
if (!CurTy->isIntegerTy() || Record.empty())
- return Error("Invalid CST_INTEGER record");
+ return Error(InvalidRecord);
V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0]));
break;
case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
if (!CurTy->isIntegerTy() || Record.empty())
- return Error("Invalid WIDE_INTEGER record");
+ return Error(InvalidRecord);
APInt VInt = ReadWideAPInt(Record,
cast<IntegerType>(CurTy)->getBitWidth());
@@ -1209,7 +1198,7 @@ bool BitcodeReader::ParseConstants() {
}
case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval]
if (Record.empty())
- return Error("Invalid FLOAT record");
+ return Error(InvalidRecord);
if (CurTy->isHalfTy())
V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf,
APInt(16, (uint16_t)Record[0])));
@@ -1239,7 +1228,7 @@ bool BitcodeReader::ParseConstants() {
case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number]
if (Record.empty())
- return Error("Invalid CST_AGGREGATE record");
+ return Error(InvalidRecord);
unsigned Size = Record.size();
SmallVector<Constant*, 16> Elts;
@@ -1267,7 +1256,7 @@ bool BitcodeReader::ParseConstants() {
case bitc::CST_CODE_STRING: // STRING: [values]
case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
if (Record.empty())
- return Error("Invalid CST_STRING record");
+ return Error(InvalidRecord);
SmallString<16> Elts(Record.begin(), Record.end());
V = ConstantDataArray::getString(Context, Elts,
@@ -1276,7 +1265,7 @@ bool BitcodeReader::ParseConstants() {
}
case bitc::CST_CODE_DATA: {// DATA: [n x value]
if (Record.empty())
- return Error("Invalid CST_DATA record");
+ return Error(InvalidRecord);
Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
unsigned Size = Record.size();
@@ -1321,13 +1310,14 @@ bool BitcodeReader::ParseConstants() {
else
V = ConstantDataArray::get(Context, Elts);
} else {
- return Error("Unknown element type in CE_DATA");
+ return Error(InvalidTypeForValue);
}
break;
}
case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval]
- if (Record.size() < 3) return Error("Invalid CE_BINOP record");
+ if (Record.size() < 3)
+ return Error(InvalidRecord);
int Opc = GetDecodedBinaryOpcode(Record[0], CurTy);
if (Opc < 0) {
V = UndefValue::get(CurTy); // Unknown binop.
@@ -1357,25 +1347,30 @@ bool BitcodeReader::ParseConstants() {
break;
}
case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval]
- if (Record.size() < 3) return Error("Invalid CE_CAST record");
+ if (Record.size() < 3)
+ return Error(InvalidRecord);
int Opc = GetDecodedCastOpcode(Record[0]);
if (Opc < 0) {
V = UndefValue::get(CurTy); // Unknown cast.
} else {
Type *OpTy = getTypeByID(Record[1]);
- if (!OpTy) return Error("Invalid CE_CAST record");
+ if (!OpTy)
+ return Error(InvalidRecord);
Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy);
- V = ConstantExpr::getCast(Opc, Op, CurTy);
+ V = UpgradeBitCastExpr(Opc, Op, CurTy);
+ if (!V) V = ConstantExpr::getCast(Opc, Op, CurTy);
}
break;
}
case bitc::CST_CODE_CE_INBOUNDS_GEP:
case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands]
- if (Record.size() & 1) return Error("Invalid CE_GEP record");
+ if (Record.size() & 1)
+ return Error(InvalidRecord);
SmallVector<Constant*, 16> Elts;
for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
Type *ElTy = getTypeByID(Record[i]);
- if (!ElTy) return Error("Invalid CE_GEP record");
+ if (!ElTy)
+ return Error(InvalidRecord);
Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy));
}
ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
@@ -1384,19 +1379,31 @@ bool BitcodeReader::ParseConstants() {
bitc::CST_CODE_CE_INBOUNDS_GEP);
break;
}
- case bitc::CST_CODE_CE_SELECT: // CE_SELECT: [opval#, opval#, opval#]
- if (Record.size() < 3) return Error("Invalid CE_SELECT record");
- V = ConstantExpr::getSelect(
- ValueList.getConstantFwdRef(Record[0],
- Type::getInt1Ty(Context)),
- ValueList.getConstantFwdRef(Record[1],CurTy),
- ValueList.getConstantFwdRef(Record[2],CurTy));
+ case bitc::CST_CODE_CE_SELECT: { // CE_SELECT: [opval#, opval#, opval#]
+ if (Record.size() < 3)
+ return Error(InvalidRecord);
+
+ Type *SelectorTy = Type::getInt1Ty(Context);
+
+ // If CurTy is a vector of length n, then Record[0] must be a <n x i1>
+ // vector. Otherwise, it must be a single bit.
+ if (VectorType *VTy = dyn_cast<VectorType>(CurTy))
+ SelectorTy = VectorType::get(Type::getInt1Ty(Context),
+ VTy->getNumElements());
+
+ V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
+ SelectorTy),
+ ValueList.getConstantFwdRef(Record[1],CurTy),
+ ValueList.getConstantFwdRef(Record[2],CurTy));
break;
+ }
case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval]
- if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record");
+ if (Record.size() < 3)
+ return Error(InvalidRecord);
VectorType *OpTy =
dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
- if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record");
+ if (OpTy == 0)
+ return Error(InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[2],
Type::getInt32Ty(Context));
@@ -1406,7 +1413,7 @@ bool BitcodeReader::ParseConstants() {
case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval]
VectorType *OpTy = dyn_cast<VectorType>(CurTy);
if (Record.size() < 3 || OpTy == 0)
- return Error("Invalid CE_INSERTELT record");
+ return Error(InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
OpTy->getElementType());
@@ -1418,7 +1425,7 @@ bool BitcodeReader::ParseConstants() {
case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
VectorType *OpTy = dyn_cast<VectorType>(CurTy);
if (Record.size() < 3 || OpTy == 0)
- return Error("Invalid CE_SHUFFLEVEC record");
+ return Error(InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
@@ -1432,7 +1439,7 @@ bool BitcodeReader::ParseConstants() {
VectorType *OpTy =
dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
if (Record.size() < 4 || RTy == 0 || OpTy == 0)
- return Error("Invalid CE_SHUFVEC_EX record");
+ return Error(InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
@@ -1442,9 +1449,11 @@ bool BitcodeReader::ParseConstants() {
break;
}
case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred]
- if (Record.size() < 4) return Error("Invalid CE_CMP record");
+ if (Record.size() < 4)
+ return Error(InvalidRecord);
Type *OpTy = getTypeByID(Record[0]);
- if (OpTy == 0) return Error("Invalid CE_CMP record");
+ if (OpTy == 0)
+ return Error(InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
@@ -1457,16 +1466,17 @@ bool BitcodeReader::ParseConstants() {
// This maintains backward compatibility, pre-asm dialect keywords.
// FIXME: Remove with the 4.0 release.
case bitc::CST_CODE_INLINEASM_OLD: {
- if (Record.size() < 2) return Error("Invalid INLINEASM record");
+ if (Record.size() < 2)
+ return Error(InvalidRecord);
std::string AsmStr, ConstrStr;
bool HasSideEffects = Record[0] & 1;
bool IsAlignStack = Record[0] >> 1;
unsigned AsmStrSize = Record[1];
if (2+AsmStrSize >= Record.size())
- return Error("Invalid INLINEASM record");
+ return Error(InvalidRecord);
unsigned ConstStrSize = Record[2+AsmStrSize];
if (3+AsmStrSize+ConstStrSize > Record.size())
- return Error("Invalid INLINEASM record");
+ return Error(InvalidRecord);
for (unsigned i = 0; i != AsmStrSize; ++i)
AsmStr += (char)Record[2+i];
@@ -1480,17 +1490,18 @@ bool BitcodeReader::ParseConstants() {
// This version adds support for the asm dialect keywords (e.g.,
// inteldialect).
case bitc::CST_CODE_INLINEASM: {
- if (Record.size() < 2) return Error("Invalid INLINEASM record");
+ if (Record.size() < 2)
+ return Error(InvalidRecord);
std::string AsmStr, ConstrStr;
bool HasSideEffects = Record[0] & 1;
bool IsAlignStack = (Record[0] >> 1) & 1;
unsigned AsmDialect = Record[0] >> 2;
unsigned AsmStrSize = Record[1];
if (2+AsmStrSize >= Record.size())
- return Error("Invalid INLINEASM record");
+ return Error(InvalidRecord);
unsigned ConstStrSize = Record[2+AsmStrSize];
if (3+AsmStrSize+ConstStrSize > Record.size())
- return Error("Invalid INLINEASM record");
+ return Error(InvalidRecord);
for (unsigned i = 0; i != AsmStrSize; ++i)
AsmStr += (char)Record[2+i];
@@ -1503,12 +1514,15 @@ bool BitcodeReader::ParseConstants() {
break;
}
case bitc::CST_CODE_BLOCKADDRESS:{
- if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record");
+ if (Record.size() < 3)
+ return Error(InvalidRecord);
Type *FnTy = getTypeByID(Record[0]);
- if (FnTy == 0) return Error("Invalid CE_BLOCKADDRESS record");
+ if (FnTy == 0)
+ return Error(InvalidRecord);
Function *Fn =
dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
- if (Fn == 0) return Error("Invalid CE_BLOCKADDRESS record");
+ if (Fn == 0)
+ return Error(InvalidRecord);
// If the function is already parsed we can insert the block address right
// away.
@@ -1516,7 +1530,7 @@ bool BitcodeReader::ParseConstants() {
Function::iterator BBI = Fn->begin(), BBE = Fn->end();
for (size_t I = 0, E = Record[2]; I != E; ++I) {
if (BBI == BBE)
- return Error("Invalid blockaddress block #");
+ return Error(InvalidID);
++BBI;
}
V = BlockAddress::get(Fn, BBI);
@@ -1539,9 +1553,9 @@ bool BitcodeReader::ParseConstants() {
}
}
-bool BitcodeReader::ParseUseLists() {
+error_code BitcodeReader::ParseUseLists() {
if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
- return Error("Malformed block record");
+ return Error(InvalidRecord);
SmallVector<uint64_t, 64> Record;
@@ -1552,9 +1566,9 @@ bool BitcodeReader::ParseUseLists() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error("malformed use list block");
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return false;
+ return error_code::success();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -1568,7 +1582,7 @@ bool BitcodeReader::ParseUseLists() {
case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD.
unsigned RecordLength = Record.size();
if (RecordLength < 1)
- return Error ("Invalid UseList reader!");
+ return Error(InvalidRecord);
UseListRecords.push_back(Record);
break;
}
@@ -1579,10 +1593,10 @@ bool BitcodeReader::ParseUseLists() {
/// RememberAndSkipFunctionBody - When we see the block for a function body,
/// remember where it is and then skip it. This lets us lazily deserialize the
/// functions.
-bool BitcodeReader::RememberAndSkipFunctionBody() {
+error_code BitcodeReader::RememberAndSkipFunctionBody() {
// Get the function we are talking about.
if (FunctionsWithBodies.empty())
- return Error("Insufficient function protos");
+ return Error(InsufficientFunctionProtos);
Function *Fn = FunctionsWithBodies.back();
FunctionsWithBodies.pop_back();
@@ -1593,15 +1607,15 @@ bool BitcodeReader::RememberAndSkipFunctionBody() {
// Skip over the function block for now.
if (Stream.SkipBlock())
- return Error("Malformed block record");
- return false;
+ return Error(InvalidRecord);
+ return error_code::success();
}
-bool BitcodeReader::GlobalCleanup() {
+error_code BitcodeReader::GlobalCleanup() {
// Patch the initializers for globals and aliases up.
ResolveGlobalAndAliasInits();
if (!GlobalInits.empty() || !AliasInits.empty())
- return Error("Malformed global initializer set");
+ return Error(MalformedGlobalInitializerSet);
// Look for intrinsic functions which need to be upgraded at some point
for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
@@ -1620,14 +1634,14 @@ bool BitcodeReader::GlobalCleanup() {
// want lazy deserialization.
std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
- return false;
+ return error_code::success();
}
-bool BitcodeReader::ParseModule(bool Resume) {
+error_code BitcodeReader::ParseModule(bool Resume) {
if (Resume)
Stream.JumpToBit(NextUnreadBit);
else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
- return Error("Malformed block record");
+ return Error(InvalidRecord);
SmallVector<uint64_t, 64> Record;
std::vector<std::string> SectionTable;
@@ -1639,8 +1653,7 @@ bool BitcodeReader::ParseModule(bool Resume) {
switch (Entry.Kind) {
case BitstreamEntry::Error:
- Error("malformed module block");
- return true;
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
return GlobalCleanup();
@@ -1648,49 +1661,51 @@ bool BitcodeReader::ParseModule(bool Resume) {
switch (Entry.ID) {
default: // Skip unknown content.
if (Stream.SkipBlock())
- return Error("Malformed block record");
+ return Error(InvalidRecord);
break;
case bitc::BLOCKINFO_BLOCK_ID:
if (Stream.ReadBlockInfoBlock())
- return Error("Malformed BlockInfoBlock");
+ return Error(MalformedBlock);
break;
case bitc::PARAMATTR_BLOCK_ID:
- if (ParseAttributeBlock())
- return true;
+ if (error_code EC = ParseAttributeBlock())
+ return EC;
break;
case bitc::PARAMATTR_GROUP_BLOCK_ID:
- if (ParseAttributeGroupBlock())
- return true;
+ if (error_code EC = ParseAttributeGroupBlock())
+ return EC;
break;
case bitc::TYPE_BLOCK_ID_NEW:
- if (ParseTypeTable())
- return true;
+ if (error_code EC = ParseTypeTable())
+ return EC;
break;
case bitc::VALUE_SYMTAB_BLOCK_ID:
- if (ParseValueSymbolTable())
- return true;
+ if (error_code EC = ParseValueSymbolTable())
+ return EC;
SeenValueSymbolTable = true;
break;
case bitc::CONSTANTS_BLOCK_ID:
- if (ParseConstants() || ResolveGlobalAndAliasInits())
- return true;
+ if (error_code EC = ParseConstants())
+ return EC;
+ if (error_code EC = ResolveGlobalAndAliasInits())
+ return EC;
break;
case bitc::METADATA_BLOCK_ID:
- if (ParseMetadata())
- return true;
+ if (error_code EC = ParseMetadata())
+ return EC;
break;
case bitc::FUNCTION_BLOCK_ID:
// If this is the first function body we've seen, reverse the
// FunctionsWithBodies list.
if (!SeenFirstFunctionBody) {
std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
- if (GlobalCleanup())
- return true;
+ if (error_code EC = GlobalCleanup())
+ return EC;
SeenFirstFunctionBody = true;
}
- if (RememberAndSkipFunctionBody())
- return true;
+ if (error_code EC = RememberAndSkipFunctionBody())
+ return EC;
// For streaming bitcode, suspend parsing when we reach the function
// bodies. Subsequent materialization calls will resume it when
// necessary. For streaming, the function bodies must be at the end of
@@ -1699,12 +1714,12 @@ bool BitcodeReader::ParseModule(bool Resume) {
// just finish the parse now.
if (LazyStreamer && SeenValueSymbolTable) {
NextUnreadBit = Stream.GetCurrentBitNo();
- return false;
+ return error_code::success();
}
break;
case bitc::USELIST_BLOCK_ID:
- if (ParseUseLists())
- return true;
+ if (error_code EC = ParseUseLists())
+ return EC;
break;
}
continue;
@@ -1720,11 +1735,12 @@ bool BitcodeReader::ParseModule(bool Resume) {
default: break; // Default behavior, ignore unknown content.
case bitc::MODULE_CODE_VERSION: { // VERSION: [version#]
if (Record.size() < 1)
- return Error("Malformed MODULE_CODE_VERSION");
+ return Error(InvalidRecord);
// Only version #0 and #1 are supported so far.
unsigned module_version = Record[0];
switch (module_version) {
- default: return Error("Unknown bitstream version!");
+ default:
+ return Error(InvalidValue);
case 0:
UseRelativeIDs = false;
break;
@@ -1737,21 +1753,21 @@ bool BitcodeReader::ParseModule(bool Resume) {
case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
- return Error("Invalid MODULE_CODE_TRIPLE record");
+ return Error(InvalidRecord);
TheModule->setTargetTriple(S);
break;
}
case bitc::MODULE_CODE_DATALAYOUT: { // DATALAYOUT: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
- return Error("Invalid MODULE_CODE_DATALAYOUT record");
+ return Error(InvalidRecord);
TheModule->setDataLayout(S);
break;
}
case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
- return Error("Invalid MODULE_CODE_ASM record");
+ return Error(InvalidRecord);
TheModule->setModuleInlineAsm(S);
break;
}
@@ -1759,21 +1775,21 @@ bool BitcodeReader::ParseModule(bool Resume) {
// FIXME: Remove in 4.0.
std::string S;
if (ConvertToString(Record, 0, S))
- return Error("Invalid MODULE_CODE_DEPLIB record");
+ return Error(InvalidRecord);
// Ignore value.
break;
}
case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
- return Error("Invalid MODULE_CODE_SECTIONNAME record");
+ return Error(InvalidRecord);
SectionTable.push_back(S);
break;
}
case bitc::MODULE_CODE_GCNAME: { // SECTIONNAME: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
- return Error("Invalid MODULE_CODE_GCNAME record");
+ return Error(InvalidRecord);
GCTable.push_back(S);
break;
}
@@ -1782,11 +1798,12 @@ bool BitcodeReader::ParseModule(bool Resume) {
// unnamed_addr]
case bitc::MODULE_CODE_GLOBALVAR: {
if (Record.size() < 6)
- return Error("Invalid MODULE_CODE_GLOBALVAR record");
+ return Error(InvalidRecord);
Type *Ty = getTypeByID(Record[0]);
- if (!Ty) return Error("Invalid MODULE_CODE_GLOBALVAR record");
+ if (!Ty)
+ return Error(InvalidRecord);
if (!Ty->isPointerTy())
- return Error("Global not a pointer type!");
+ return Error(InvalidTypeForValue);
unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
Ty = cast<PointerType>(Ty)->getElementType();
@@ -1796,7 +1813,7 @@ bool BitcodeReader::ParseModule(bool Resume) {
std::string Section;
if (Record[5]) {
if (Record[5]-1 >= SectionTable.size())
- return Error("Invalid section ID");
+ return Error(InvalidID);
Section = SectionTable[Record[5]-1];
}
GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
@@ -1835,15 +1852,16 @@ bool BitcodeReader::ParseModule(bool Resume) {
// alignment, section, visibility, gc, unnamed_addr]
case bitc::MODULE_CODE_FUNCTION: {
if (Record.size() < 8)
- return Error("Invalid MODULE_CODE_FUNCTION record");
+ return Error(InvalidRecord);
Type *Ty = getTypeByID(Record[0]);
- if (!Ty) return Error("Invalid MODULE_CODE_FUNCTION record");
+ if (!Ty)
+ return Error(InvalidRecord);
if (!Ty->isPointerTy())
- return Error("Function not a pointer type!");
+ return Error(InvalidTypeForValue);
FunctionType *FTy =
dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
if (!FTy)
- return Error("Function not a pointer to function type!");
+ return Error(InvalidTypeForValue);
Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
"", TheModule);
@@ -1856,19 +1874,21 @@ bool BitcodeReader::ParseModule(bool Resume) {
Func->setAlignment((1 << Record[5]) >> 1);
if (Record[6]) {
if (Record[6]-1 >= SectionTable.size())
- return Error("Invalid section ID");
+ return Error(InvalidID);
Func->setSection(SectionTable[Record[6]-1]);
}
Func->setVisibility(GetDecodedVisibility(Record[7]));
if (Record.size() > 8 && Record[8]) {
if (Record[8]-1 > GCTable.size())
- return Error("Invalid GC ID");
+ return Error(InvalidID);
Func->setGC(GCTable[Record[8]-1].c_str());
}
bool UnnamedAddr = false;
if (Record.size() > 9)
UnnamedAddr = Record[9];
Func->setUnnamedAddr(UnnamedAddr);
+ if (Record.size() > 10 && Record[10] != 0)
+ FunctionPrefixes.push_back(std::make_pair(Func, Record[10]-1));
ValueList.push_back(Func);
// If this is a function with a body, remember the prototype we are
@@ -1883,11 +1903,12 @@ bool BitcodeReader::ParseModule(bool Resume) {
// ALIAS: [alias type, aliasee val#, linkage, visibility]
case bitc::MODULE_CODE_ALIAS: {
if (Record.size() < 3)
- return Error("Invalid MODULE_ALIAS record");
+ return Error(InvalidRecord);
Type *Ty = getTypeByID(Record[0]);
- if (!Ty) return Error("Invalid MODULE_ALIAS record");
+ if (!Ty)
+ return Error(InvalidRecord);
if (!Ty->isPointerTy())
- return Error("Function not a pointer type!");
+ return Error(InvalidTypeForValue);
GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]),
"", 0, TheModule);
@@ -1902,7 +1923,7 @@ bool BitcodeReader::ParseModule(bool Resume) {
case bitc::MODULE_CODE_PURGEVALS:
// Trim down the value list to the specified size.
if (Record.size() < 1 || Record[0] > ValueList.size())
- return Error("Invalid MODULE_PURGEVALS record");
+ return Error(InvalidRecord);
ValueList.shrinkTo(Record[0]);
break;
}
@@ -1910,10 +1931,11 @@ bool BitcodeReader::ParseModule(bool Resume) {
}
}
-bool BitcodeReader::ParseBitcodeInto(Module *M) {
+error_code BitcodeReader::ParseBitcodeInto(Module *M) {
TheModule = 0;
- if (InitStream()) return true;
+ if (error_code EC = InitStream())
+ return EC;
// Sniff for the signature.
if (Stream.Read(8) != 'B' ||
@@ -1922,42 +1944,42 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) {
Stream.Read(4) != 0xC ||
Stream.Read(4) != 0xE ||
Stream.Read(4) != 0xD)
- return Error("Invalid bitcode signature");
+ return Error(InvalidBitcodeSignature);
// We expect a number of well-defined blocks, though we don't necessarily
// need to understand them all.
while (1) {
if (Stream.AtEndOfStream())
- return false;
+ return error_code::success();
BitstreamEntry Entry =
Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
switch (Entry.Kind) {
case BitstreamEntry::Error:
- Error("malformed module file");
- return true;
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return false;
+ return error_code::success();
case BitstreamEntry::SubBlock:
switch (Entry.ID) {
case bitc::BLOCKINFO_BLOCK_ID:
if (Stream.ReadBlockInfoBlock())
- return Error("Malformed BlockInfoBlock");
+ return Error(MalformedBlock);
break;
case bitc::MODULE_BLOCK_ID:
// Reject multiple MODULE_BLOCK's in a single bitstream.
if (TheModule)
- return Error("Multiple MODULE_BLOCKs in same stream");
+ return Error(InvalidMultipleBlocks);
TheModule = M;
- if (ParseModule(false))
- return true;
- if (LazyStreamer) return false;
+ if (error_code EC = ParseModule(false))
+ return EC;
+ if (LazyStreamer)
+ return error_code::success();
break;
default:
if (Stream.SkipBlock())
- return Error("Malformed block record");
+ return Error(InvalidRecord);
break;
}
continue;
@@ -1970,16 +1992,16 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) {
if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 &&
Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a &&
Stream.AtEndOfStream())
- return false;
+ return error_code::success();
- return Error("Invalid record at top-level");
+ return Error(InvalidRecord);
}
}
}
-bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
+error_code BitcodeReader::ParseModuleTriple(std::string &Triple) {
if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
- return Error("Malformed block record");
+ return Error(InvalidRecord);
SmallVector<uint64_t, 64> Record;
@@ -1990,9 +2012,9 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error("malformed module block");
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return false;
+ return error_code::success();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -2004,7 +2026,7 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
- return Error("Invalid MODULE_CODE_TRIPLE record");
+ return Error(InvalidRecord);
Triple = S;
break;
}
@@ -2013,8 +2035,9 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
}
}
-bool BitcodeReader::ParseTriple(std::string &Triple) {
- if (InitStream()) return true;
+error_code BitcodeReader::ParseTriple(std::string &Triple) {
+ if (error_code EC = InitStream())
+ return EC;
// Sniff for the signature.
if (Stream.Read(8) != 'B' ||
@@ -2023,7 +2046,7 @@ bool BitcodeReader::ParseTriple(std::string &Triple) {
Stream.Read(4) != 0xC ||
Stream.Read(4) != 0xE ||
Stream.Read(4) != 0xD)
- return Error("Invalid bitcode signature");
+ return Error(InvalidBitcodeSignature);
// We expect a number of well-defined blocks, though we don't necessarily
// need to understand them all.
@@ -2032,20 +2055,17 @@ bool BitcodeReader::ParseTriple(std::string &Triple) {
switch (Entry.Kind) {
case BitstreamEntry::Error:
- Error("malformed module file");
- return true;
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return false;
+ return error_code::success();
case BitstreamEntry::SubBlock:
if (Entry.ID == bitc::MODULE_BLOCK_ID)
return ParseModuleTriple(Triple);
// Ignore other sub-blocks.
- if (Stream.SkipBlock()) {
- Error("malformed block record in AST file");
- return true;
- }
+ if (Stream.SkipBlock())
+ return Error(MalformedBlock);
continue;
case BitstreamEntry::Record:
@@ -2056,9 +2076,9 @@ bool BitcodeReader::ParseTriple(std::string &Triple) {
}
/// ParseMetadataAttachment - Parse metadata attachments.
-bool BitcodeReader::ParseMetadataAttachment() {
+error_code BitcodeReader::ParseMetadataAttachment() {
if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
- return Error("Malformed block record");
+ return Error(InvalidRecord);
SmallVector<uint64_t, 64> Record;
while (1) {
@@ -2067,9 +2087,9 @@ bool BitcodeReader::ParseMetadataAttachment() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error("malformed metadata block");
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return false;
+ return error_code::success();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -2083,16 +2103,18 @@ bool BitcodeReader::ParseMetadataAttachment() {
case bitc::METADATA_ATTACHMENT: {
unsigned RecordLength = Record.size();
if (Record.empty() || (RecordLength - 1) % 2 == 1)
- return Error ("Invalid METADATA_ATTACHMENT reader!");
+ return Error(InvalidRecord);
Instruction *Inst = InstructionList[Record[0]];
for (unsigned i = 1; i != RecordLength; i = i+2) {
unsigned Kind = Record[i];
DenseMap<unsigned, unsigned>::iterator I =
MDKindMap.find(Kind);
if (I == MDKindMap.end())
- return Error("Invalid metadata kind ID");
+ return Error(InvalidID);
Value *Node = MDValueList.getValueFwdRef(Record[i+1]);
Inst->setMetadata(I->second, cast<MDNode>(Node));
+ if (I->second == LLVMContext::MD_tbaa)
+ InstsWithTBAATag.push_back(Inst);
}
break;
}
@@ -2101,9 +2123,9 @@ bool BitcodeReader::ParseMetadataAttachment() {
}
/// ParseFunctionBody - Lazily parse the specified function body block.
-bool BitcodeReader::ParseFunctionBody(Function *F) {
+error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
- return Error("Malformed block record");
+ return Error(InvalidRecord);
InstructionList.clear();
unsigned ModuleValueListSize = ValueList.size();
@@ -2126,7 +2148,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
switch (Entry.Kind) {
case BitstreamEntry::Error:
- return Error("Bitcode error in function block");
+ return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
goto OutOfRecordLoop;
@@ -2134,20 +2156,24 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
switch (Entry.ID) {
default: // Skip unknown content.
if (Stream.SkipBlock())
- return Error("Malformed block record");
+ return Error(InvalidRecord);
break;
case bitc::CONSTANTS_BLOCK_ID:
- if (ParseConstants()) return true;
+ if (error_code EC = ParseConstants())
+ return EC;
NextValueNo = ValueList.size();
break;
case bitc::VALUE_SYMTAB_BLOCK_ID:
- if (ParseValueSymbolTable()) return true;
+ if (error_code EC = ParseValueSymbolTable())
+ return EC;
break;
case bitc::METADATA_ATTACHMENT_ID:
- if (ParseMetadataAttachment()) return true;
+ if (error_code EC = ParseMetadataAttachment())
+ return EC;
break;
case bitc::METADATA_BLOCK_ID:
- if (ParseMetadata()) return true;
+ if (error_code EC = ParseMetadata())
+ return EC;
break;
}
continue;
@@ -2163,10 +2189,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned BitCode = Stream.readRecord(Entry.ID, Record);
switch (BitCode) {
default: // Default behavior: reject
- return Error("Unknown instruction");
+ return Error(InvalidValue);
case bitc::FUNC_CODE_DECLAREBLOCKS: // DECLAREBLOCKS: [nblocks]
if (Record.size() < 1 || Record[0] == 0)
- return Error("Invalid DECLAREBLOCKS record");
+ return Error(InvalidRecord);
// Create all the basic blocks for the function.
FunctionBBs.resize(Record[0]);
for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
@@ -2186,7 +2212,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
!FunctionBBs[CurBBNo-1]->empty())
I = &FunctionBBs[CurBBNo-1]->back();
- if (I == 0) return Error("Invalid DEBUG_LOC_AGAIN record");
+ if (I == 0)
+ return Error(InvalidRecord);
I->setDebugLoc(LastLoc);
I = 0;
continue;
@@ -2199,7 +2226,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
!FunctionBBs[CurBBNo-1]->empty())
I = &FunctionBBs[CurBBNo-1]->back();
if (I == 0 || Record.size() < 4)
- return Error("Invalid FUNC_CODE_DEBUG_LOC record");
+ return Error(InvalidRecord);
unsigned Line = Record[0], Col = Record[1];
unsigned ScopeID = Record[2], IAID = Record[3];
@@ -2219,10 +2246,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
OpNum+1 > Record.size())
- return Error("Invalid BINOP record");
+ return Error(InvalidRecord);
int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
- if (Opc == -1) return Error("Invalid BINOP record");
+ if (Opc == -1)
+ return Error(InvalidRecord);
I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
InstructionList.push_back(I);
if (OpNum < Record.size()) {
@@ -2264,13 +2292,21 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
OpNum+2 != Record.size())
- return Error("Invalid CAST record");
+ return Error(InvalidRecord);
Type *ResTy = getTypeByID(Record[OpNum]);
int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
if (Opc == -1 || ResTy == 0)
- return Error("Invalid CAST record");
- I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+ return Error(InvalidRecord);
+ Instruction *Temp = 0;
+ if ((I = UpgradeBitCastInst(Opc, Op, ResTy, Temp))) {
+ if (Temp) {
+ InstructionList.push_back(Temp);
+ CurBB->getInstList().push_back(Temp);
+ }
+ } else {
+ I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+ }
InstructionList.push_back(I);
break;
}
@@ -2279,13 +2315,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *BasePtr;
if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
- return Error("Invalid GEP record");
+ return Error(InvalidRecord);
SmallVector<Value*, 16> GEPIdx;
while (OpNum != Record.size()) {
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op))
- return Error("Invalid GEP record");
+ return Error(InvalidRecord);
GEPIdx.push_back(Op);
}
@@ -2301,14 +2337,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Agg;
if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
- return Error("Invalid EXTRACTVAL record");
+ return Error(InvalidRecord);
SmallVector<unsigned, 4> EXTRACTVALIdx;
for (unsigned RecSize = Record.size();
OpNum != RecSize; ++OpNum) {
uint64_t Index = Record[OpNum];
if ((unsigned)Index != Index)
- return Error("Invalid EXTRACTVAL index");
+ return Error(InvalidValue);
EXTRACTVALIdx.push_back((unsigned)Index);
}
@@ -2322,17 +2358,17 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Agg;
if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
- return Error("Invalid INSERTVAL record");
+ return Error(InvalidRecord);
Value *Val;
if (getValueTypePair(Record, OpNum, NextValueNo, Val))
- return Error("Invalid INSERTVAL record");
+ return Error(InvalidRecord);
SmallVector<unsigned, 4> INSERTVALIdx;
for (unsigned RecSize = Record.size();
OpNum != RecSize; ++OpNum) {
uint64_t Index = Record[OpNum];
if ((unsigned)Index != Index)
- return Error("Invalid INSERTVAL index");
+ return Error(InvalidValue);
INSERTVALIdx.push_back((unsigned)Index);
}
@@ -2349,7 +2385,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond))
- return Error("Invalid SELECT record");
+ return Error(InvalidRecord);
I = SelectInst::Create(Cond, TrueVal, FalseVal);
InstructionList.push_back(I);
@@ -2364,18 +2400,18 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
getValueTypePair(Record, OpNum, NextValueNo, Cond))
- return Error("Invalid SELECT record");
+ return Error(InvalidRecord);
// select condition can be either i1 or [N x i1]
if (VectorType* vector_type =
dyn_cast<VectorType>(Cond->getType())) {
// expect <n x i1>
if (vector_type->getElementType() != Type::getInt1Ty(Context))
- return Error("Invalid SELECT condition type");
+ return Error(InvalidTypeForValue);
} else {
// expect i1
if (Cond->getType() != Type::getInt1Ty(Context))
- return Error("Invalid SELECT condition type");
+ return Error(InvalidTypeForValue);
}
I = SelectInst::Create(Cond, TrueVal, FalseVal);
@@ -2388,7 +2424,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
Value *Vec, *Idx;
if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx))
- return Error("Invalid EXTRACTELT record");
+ return Error(InvalidRecord);
I = ExtractElementInst::Create(Vec, Idx);
InstructionList.push_back(I);
break;
@@ -2401,7 +2437,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
popValue(Record, OpNum, NextValueNo,
cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx))
- return Error("Invalid INSERTELT record");
+ return Error(InvalidRecord);
I = InsertElementInst::Create(Vec, Elt, Idx);
InstructionList.push_back(I);
break;
@@ -2412,10 +2448,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
Value *Vec1, *Vec2, *Mask;
if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2))
- return Error("Invalid SHUFFLEVEC record");
+ return Error(InvalidRecord);
if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
- return Error("Invalid SHUFFLEVEC record");
+ return Error(InvalidRecord);
I = new ShuffleVectorInst(Vec1, Vec2, Mask);
InstructionList.push_back(I);
break;
@@ -2433,7 +2469,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
OpNum+1 != Record.size())
- return Error("Invalid CMP record");
+ return Error(InvalidRecord);
if (LHS->getType()->isFPOrFPVectorTy())
I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
@@ -2455,9 +2491,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Op = NULL;
if (getValueTypePair(Record, OpNum, NextValueNo, Op))
- return Error("Invalid RET record");
+ return Error(InvalidRecord);
if (OpNum != Record.size())
- return Error("Invalid RET record");
+ return Error(InvalidRecord);
I = ReturnInst::Create(Context, Op);
InstructionList.push_back(I);
@@ -2465,10 +2501,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#]
if (Record.size() != 1 && Record.size() != 3)
- return Error("Invalid BR record");
+ return Error(InvalidRecord);
BasicBlock *TrueDest = getBasicBlock(Record[0]);
if (TrueDest == 0)
- return Error("Invalid BR record");
+ return Error(InvalidRecord);
if (Record.size() == 1) {
I = BranchInst::Create(TrueDest);
@@ -2479,7 +2515,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
Value *Cond = getValue(Record, 2, NextValueNo,
Type::getInt1Ty(Context));
if (FalseDest == 0 || Cond == 0)
- return Error("Invalid BR record");
+ return Error(InvalidRecord);
I = BranchInst::Create(TrueDest, FalseDest, Cond);
InstructionList.push_back(I);
}
@@ -2488,7 +2524,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
// Check magic
if ((Record[0] >> 16) == SWITCH_INST_MAGIC) {
- // New SwitchInst format with case ranges.
+ // "New" SwitchInst format with case ranges. The changes to write this
+ // format were reverted but we still recognize bitcode that uses it.
+ // Hopefully someday we will have support for case ranges and can use
+ // this format again.
Type *OpTy = getTypeByID(Record[1]);
unsigned ValueBitWidth = cast<IntegerType>(OpTy)->getBitWidth();
@@ -2496,7 +2535,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
Value *Cond = getValue(Record, 2, NextValueNo, OpTy);
BasicBlock *Default = getBasicBlock(Record[3]);
if (OpTy == 0 || Cond == 0 || Default == 0)
- return Error("Invalid SWITCH record");
+ return Error(InvalidRecord);
unsigned NumCases = Record[4];
@@ -2505,7 +2544,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned CurIdx = 5;
for (unsigned i = 0; i != NumCases; ++i) {
- IntegersSubsetToBB CaseBuilder;
+ SmallVector<ConstantInt*, 1> CaseVals;
unsigned NumItems = Record[CurIdx++];
for (unsigned ci = 0; ci != NumItems; ++ci) {
bool isSingleNumber = Record[CurIdx++];
@@ -2525,20 +2564,22 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
APInt High =
ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
ValueBitWidth);
-
- CaseBuilder.add(IntItem::fromType(OpTy, Low),
- IntItem::fromType(OpTy, High));
CurIdx += ActiveWords;
+
+ // FIXME: It is not clear whether values in the range should be
+ // compared as signed or unsigned values. The partially
+ // implemented changes that used this format in the past used
+ // unsigned comparisons.
+ for ( ; Low.ule(High); ++Low)
+ CaseVals.push_back(ConstantInt::get(Context, Low));
} else
- CaseBuilder.add(IntItem::fromType(OpTy, Low));
+ CaseVals.push_back(ConstantInt::get(Context, Low));
}
BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]);
- IntegersSubset Case = CaseBuilder.getCase();
- SI->addCase(Case, DestBB);
+ for (SmallVector<ConstantInt*, 1>::iterator cvi = CaseVals.begin(),
+ cve = CaseVals.end(); cvi != cve; ++cvi)
+ SI->addCase(*cvi, DestBB);
}
- uint16_t Hash = SI->hash();
- if (Hash != (Record[0] & 0xFFFF))
- return Error("Invalid SWITCH record");
I = SI;
break;
}
@@ -2546,12 +2587,12 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
// Old SwitchInst format without case ranges.
if (Record.size() < 3 || (Record.size() & 1) == 0)
- return Error("Invalid SWITCH record");
+ return Error(InvalidRecord);
Type *OpTy = getTypeByID(Record[0]);
Value *Cond = getValue(Record, 1, NextValueNo, OpTy);
BasicBlock *Default = getBasicBlock(Record[2]);
if (OpTy == 0 || Cond == 0 || Default == 0)
- return Error("Invalid SWITCH record");
+ return Error(InvalidRecord);
unsigned NumCases = (Record.size()-3)/2;
SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
InstructionList.push_back(SI);
@@ -2561,7 +2602,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]);
if (CaseVal == 0 || DestBB == 0) {
delete SI;
- return Error("Invalid SWITCH record!");
+ return Error(InvalidRecord);
}
SI->addCase(CaseVal, DestBB);
}
@@ -2570,11 +2611,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...]
if (Record.size() < 2)
- return Error("Invalid INDIRECTBR record");
+ return Error(InvalidRecord);
Type *OpTy = getTypeByID(Record[0]);
Value *Address = getValue(Record, 1, NextValueNo, OpTy);
if (OpTy == 0 || Address == 0)
- return Error("Invalid INDIRECTBR record");
+ return Error(InvalidRecord);
unsigned NumDests = Record.size()-2;
IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests);
InstructionList.push_back(IBI);
@@ -2583,7 +2624,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
IBI->addDestination(DestBB);
} else {
delete IBI;
- return Error("Invalid INDIRECTBR record!");
+ return Error(InvalidRecord);
}
}
I = IBI;
@@ -2592,7 +2633,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_INVOKE: {
// INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
- if (Record.size() < 4) return Error("Invalid INVOKE record");
+ if (Record.size() < 4)
+ return Error(InvalidRecord);
AttributeSet PAL = getAttributes(Record[0]);
unsigned CCInfo = Record[1];
BasicBlock *NormalBB = getBasicBlock(Record[2]);
@@ -2601,7 +2643,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 4;
Value *Callee;
if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
- return Error("Invalid INVOKE record");
+ return Error(InvalidRecord);
PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
FunctionType *FTy = !CalleeTy ? 0 :
@@ -2610,24 +2652,25 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
// Check that the right number of fixed parameters are here.
if (FTy == 0 || NormalBB == 0 || UnwindBB == 0 ||
Record.size() < OpNum+FTy->getNumParams())
- return Error("Invalid INVOKE record");
+ return Error(InvalidRecord);
SmallVector<Value*, 16> Ops;
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
Ops.push_back(getValue(Record, OpNum, NextValueNo,
FTy->getParamType(i)));
- if (Ops.back() == 0) return Error("Invalid INVOKE record");
+ if (Ops.back() == 0)
+ return Error(InvalidRecord);
}
if (!FTy->isVarArg()) {
if (Record.size() != OpNum)
- return Error("Invalid INVOKE record");
+ return Error(InvalidRecord);
} else {
// Read type/value pairs for varargs params.
while (OpNum != Record.size()) {
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op))
- return Error("Invalid INVOKE record");
+ return Error(InvalidRecord);
Ops.push_back(Op);
}
}
@@ -2643,7 +2686,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned Idx = 0;
Value *Val = 0;
if (getValueTypePair(Record, Idx, NextValueNo, Val))
- return Error("Invalid RESUME record");
+ return Error(InvalidRecord);
I = ResumeInst::Create(Val);
InstructionList.push_back(I);
break;
@@ -2654,9 +2697,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
break;
case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
if (Record.size() < 1 || ((Record.size()-1)&1))
- return Error("Invalid PHI record");
+ return Error(InvalidRecord);
Type *Ty = getTypeByID(Record[0]);
- if (!Ty) return Error("Invalid PHI record");
+ if (!Ty)
+ return Error(InvalidRecord);
PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
InstructionList.push_back(PN);
@@ -2671,7 +2715,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
else
V = getValue(Record, 1+i, NextValueNo, Ty);
BasicBlock *BB = getBasicBlock(Record[2+i]);
- if (!V || !BB) return Error("Invalid PHI record");
+ if (!V || !BB)
+ return Error(InvalidRecord);
PN->addIncoming(V, BB);
}
I = PN;
@@ -2682,12 +2727,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
// LANDINGPAD: [ty, val, val, num, (id0,val0 ...)?]
unsigned Idx = 0;
if (Record.size() < 4)
- return Error("Invalid LANDINGPAD record");
+ return Error(InvalidRecord);
Type *Ty = getTypeByID(Record[Idx++]);
- if (!Ty) return Error("Invalid LANDINGPAD record");
+ if (!Ty)
+ return Error(InvalidRecord);
Value *PersFn = 0;
if (getValueTypePair(Record, Idx, NextValueNo, PersFn))
- return Error("Invalid LANDINGPAD record");
+ return Error(InvalidRecord);
bool IsCleanup = !!Record[Idx++];
unsigned NumClauses = Record[Idx++];
@@ -2700,7 +2746,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, Idx, NextValueNo, Val)) {
delete LP;
- return Error("Invalid LANDINGPAD record");
+ return Error(InvalidRecord);
}
assert((CT != LandingPadInst::Catch ||
@@ -2719,13 +2765,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
if (Record.size() != 4)
- return Error("Invalid ALLOCA record");
+ return Error(InvalidRecord);
PointerType *Ty =
dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
Type *OpTy = getTypeByID(Record[1]);
Value *Size = getFnValueByID(Record[2], OpTy);
unsigned Align = Record[3];
- if (!Ty || !Size) return Error("Invalid ALLOCA record");
+ if (!Ty || !Size)
+ return Error(InvalidRecord);
I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
InstructionList.push_back(I);
break;
@@ -2735,7 +2782,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
OpNum+2 != Record.size())
- return Error("Invalid LOAD record");
+ return Error(InvalidRecord);
I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1);
InstructionList.push_back(I);
@@ -2747,15 +2794,15 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
OpNum+4 != Record.size())
- return Error("Invalid LOADATOMIC record");
+ return Error(InvalidRecord);
AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
if (Ordering == NotAtomic || Ordering == Release ||
Ordering == AcquireRelease)
- return Error("Invalid LOADATOMIC record");
+ return Error(InvalidRecord);
if (Ordering != NotAtomic && Record[OpNum] == 0)
- return Error("Invalid LOADATOMIC record");
+ return Error(InvalidRecord);
SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1,
@@ -2770,7 +2817,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
OpNum+2 != Record.size())
- return Error("Invalid STORE record");
+ return Error(InvalidRecord);
I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
InstructionList.push_back(I);
@@ -2784,15 +2831,15 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
OpNum+4 != Record.size())
- return Error("Invalid STOREATOMIC record");
+ return Error(InvalidRecord);
AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
if (Ordering == NotAtomic || Ordering == Acquire ||
Ordering == AcquireRelease)
- return Error("Invalid STOREATOMIC record");
+ return Error(InvalidRecord);
SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
if (Ordering != NotAtomic && Record[OpNum] == 0)
- return Error("Invalid STOREATOMIC record");
+ return Error(InvalidRecord);
I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1,
Ordering, SynchScope);
@@ -2809,10 +2856,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), New) ||
OpNum+3 != Record.size())
- return Error("Invalid CMPXCHG record");
+ return Error(InvalidRecord);
AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+1]);
if (Ordering == NotAtomic || Ordering == Unordered)
- return Error("Invalid CMPXCHG record");
+ return Error(InvalidRecord);
SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+2]);
I = new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, SynchScope);
cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
@@ -2827,14 +2874,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
OpNum+4 != Record.size())
- return Error("Invalid ATOMICRMW record");
+ return Error(InvalidRecord);
AtomicRMWInst::BinOp Operation = GetDecodedRMWOperation(Record[OpNum]);
if (Operation < AtomicRMWInst::FIRST_BINOP ||
Operation > AtomicRMWInst::LAST_BINOP)
- return Error("Invalid ATOMICRMW record");
+ return Error(InvalidRecord);
AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
if (Ordering == NotAtomic || Ordering == Unordered)
- return Error("Invalid ATOMICRMW record");
+ return Error(InvalidRecord);
SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope);
cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]);
@@ -2843,11 +2890,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope]
if (2 != Record.size())
- return Error("Invalid FENCE record");
+ return Error(InvalidRecord);
AtomicOrdering Ordering = GetDecodedOrdering(Record[0]);
if (Ordering == NotAtomic || Ordering == Unordered ||
Ordering == Monotonic)
- return Error("Invalid FENCE record");
+ return Error(InvalidRecord);
SynchronizationScope SynchScope = GetDecodedSynchScope(Record[1]);
I = new FenceInst(Context, Ordering, SynchScope);
InstructionList.push_back(I);
@@ -2856,7 +2903,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_CALL: {
// CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
if (Record.size() < 3)
- return Error("Invalid CALL record");
+ return Error(InvalidRecord);
AttributeSet PAL = getAttributes(Record[0]);
unsigned CCInfo = Record[1];
@@ -2864,13 +2911,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 2;
Value *Callee;
if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
- return Error("Invalid CALL record");
+ return Error(InvalidRecord);
PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
FunctionType *FTy = 0;
if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType());
if (!FTy || Record.size() < FTy->getNumParams()+OpNum)
- return Error("Invalid CALL record");
+ return Error(InvalidRecord);
SmallVector<Value*, 16> Args;
// Read the fixed params.
@@ -2880,18 +2927,19 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
else
Args.push_back(getValue(Record, OpNum, NextValueNo,
FTy->getParamType(i)));
- if (Args.back() == 0) return Error("Invalid CALL record");
+ if (Args.back() == 0)
+ return Error(InvalidRecord);
}
// Read type/value pairs for varargs params.
if (!FTy->isVarArg()) {
if (OpNum != Record.size())
- return Error("Invalid CALL record");
+ return Error(InvalidRecord);
} else {
while (OpNum != Record.size()) {
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op))
- return Error("Invalid CALL record");
+ return Error(InvalidRecord);
Args.push_back(Op);
}
}
@@ -2906,12 +2954,12 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
if (Record.size() < 3)
- return Error("Invalid VAARG record");
+ return Error(InvalidRecord);
Type *OpTy = getTypeByID(Record[0]);
Value *Op = getValue(Record, 1, NextValueNo, OpTy);
Type *ResTy = getTypeByID(Record[2]);
if (!OpTy || !Op || !ResTy)
- return Error("Invalid VAARG record");
+ return Error(InvalidRecord);
I = new VAArgInst(Op, ResTy);
InstructionList.push_back(I);
break;
@@ -2922,7 +2970,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
// this file.
if (CurBB == 0) {
delete I;
- return Error("Invalid instruction with no BB");
+ return Error(InvalidInstructionWithNoBB);
}
CurBB->getInstList().push_back(I);
@@ -2949,7 +2997,7 @@ OutOfRecordLoop:
delete A;
}
}
- return Error("Never resolved value found in function!");
+ return Error(NeverResolvedValueFoundInFunction);
}
}
@@ -2965,7 +3013,7 @@ OutOfRecordLoop:
for (unsigned i = 0, e = RefList.size(); i != e; ++i) {
unsigned BlockIdx = RefList[i].first;
if (BlockIdx >= FunctionBBs.size())
- return Error("Invalid blockaddress block #");
+ return Error(InvalidID);
GlobalVariable *FwdRef = RefList[i].second;
FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx]));
@@ -2979,20 +3027,21 @@ OutOfRecordLoop:
ValueList.shrinkTo(ModuleValueListSize);
MDValueList.shrinkTo(ModuleMDValueListSize);
std::vector<BasicBlock*>().swap(FunctionBBs);
- return false;
+ return error_code::success();
}
-/// FindFunctionInStream - Find the function body in the bitcode stream
-bool BitcodeReader::FindFunctionInStream(Function *F,
+/// Find the function body in the bitcode stream
+error_code BitcodeReader::FindFunctionInStream(Function *F,
DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator) {
while (DeferredFunctionInfoIterator->second == 0) {
if (Stream.AtEndOfStream())
- return Error("Could not find Function in stream");
+ return Error(CouldNotFindFunctionInStream);
// ParseModule will parse the next body in the stream and set its
// position in the DeferredFunctionInfo map.
- if (ParseModule(true)) return true;
+ if (error_code EC = ParseModule(true))
+ return EC;
}
- return false;
+ return error_code::success();
}
//===----------------------------------------------------------------------===//
@@ -3008,25 +3057,25 @@ bool BitcodeReader::isMaterializable(const GlobalValue *GV) const {
return false;
}
-bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+error_code BitcodeReader::Materialize(GlobalValue *GV) {
Function *F = dyn_cast<Function>(GV);
// If it's not a function or is already material, ignore the request.
- if (!F || !F->isMaterializable()) return false;
+ if (!F || !F->isMaterializable())
+ return error_code::success();
DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
// If its position is recorded as 0, its body is somewhere in the stream
// but we haven't seen it yet.
- if (DFII->second == 0)
- if (LazyStreamer && FindFunctionInStream(F, DFII)) return true;
+ if (DFII->second == 0 && LazyStreamer)
+ if (error_code EC = FindFunctionInStream(F, DFII))
+ return EC;
// Move the bit stream to the saved position of the deferred function body.
Stream.JumpToBit(DFII->second);
- if (ParseFunctionBody(F)) {
- if (ErrInfo) *ErrInfo = ErrorString;
- return true;
- }
+ if (error_code EC = ParseFunctionBody(F))
+ return EC;
// Upgrade any old intrinsic calls in the function.
for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
@@ -3040,7 +3089,7 @@ bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) {
}
}
- return false;
+ return error_code::success();
}
bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
@@ -3063,17 +3112,18 @@ void BitcodeReader::Dematerialize(GlobalValue *GV) {
}
-bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
+error_code BitcodeReader::MaterializeModule(Module *M) {
assert(M == TheModule &&
"Can only Materialize the Module this BitcodeReader is attached to.");
// Iterate over the module, deserializing any functions that are still on
// disk.
for (Module::iterator F = TheModule->begin(), E = TheModule->end();
- F != E; ++F)
- if (F->isMaterializable() &&
- Materialize(F, ErrInfo))
- return true;
-
+ F != E; ++F) {
+ if (F->isMaterializable()) {
+ if (error_code EC = Materialize(F))
+ return EC;
+ }
+ }
// At this point, if there are any function bodies, the current bit is
// pointing to the END_BLOCK record after them. Now make sure the rest
// of the bits in the module have been read.
@@ -3099,38 +3149,43 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
}
std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
- return false;
+ for (unsigned I = 0, E = InstsWithTBAATag.size(); I < E; I++)
+ UpgradeInstWithTBAATag(InstsWithTBAATag[I]);
+
+ UpgradeDebugInfo(*M);
+ return error_code::success();
}
-bool BitcodeReader::InitStream() {
- if (LazyStreamer) return InitLazyStream();
+error_code BitcodeReader::InitStream() {
+ if (LazyStreamer)
+ return InitLazyStream();
return InitStreamFromBuffer();
}
-bool BitcodeReader::InitStreamFromBuffer() {
+error_code BitcodeReader::InitStreamFromBuffer() {
const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart();
const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
if (Buffer->getBufferSize() & 3) {
if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
- return Error("Invalid bitcode signature");
+ return Error(InvalidBitcodeSignature);
else
- return Error("Bitcode stream should be a multiple of 4 bytes in length");
+ return Error(BitcodeStreamInvalidSize);
}
// If we have a wrapper header, parse it and ignore the non-bc file contents.
// The magic number is 0x0B17C0DE stored in little endian.
if (isBitcodeWrapper(BufPtr, BufEnd))
if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
- return Error("Invalid bitcode wrapper header");
+ return Error(InvalidBitcodeWrapperHeader);
StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
Stream.init(*StreamFile);
- return false;
+ return error_code::success();
}
-bool BitcodeReader::InitLazyStream() {
+error_code BitcodeReader::InitLazyStream() {
// Check and strip off the bitcode wrapper; BitstreamReader expects never to
// see it.
StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer);
@@ -3139,10 +3194,10 @@ bool BitcodeReader::InitLazyStream() {
unsigned char buf[16];
if (Bytes->readBytes(0, 16, buf) == -1)
- return Error("Bitcode stream must be at least 16 bytes in length");
+ return Error(BitcodeStreamInvalidSize);
if (!isBitcode(buf, buf + 16))
- return Error("Invalid bitcode signature");
+ return Error(InvalidBitcodeSignature);
if (isBitcodeWrapper(buf, buf + 4)) {
const unsigned char *bitcodeStart = buf;
@@ -3151,7 +3206,64 @@ bool BitcodeReader::InitLazyStream() {
Bytes->dropLeadingBytes(bitcodeStart - buf);
Bytes->setKnownObjectSize(bitcodeEnd - bitcodeStart);
}
- return false;
+ return error_code::success();
+}
+
+namespace {
+class BitcodeErrorCategoryType : public _do_message {
+ const char *name() const LLVM_OVERRIDE {
+ return "llvm.bitcode";
+ }
+ std::string message(int IE) const LLVM_OVERRIDE {
+ BitcodeReader::ErrorType E = static_cast<BitcodeReader::ErrorType>(IE);
+ switch (E) {
+ case BitcodeReader::BitcodeStreamInvalidSize:
+ return "Bitcode stream length should be >= 16 bytes and a multiple of 4";
+ case BitcodeReader::ConflictingMETADATA_KINDRecords:
+ return "Conflicting METADATA_KIND records";
+ case BitcodeReader::CouldNotFindFunctionInStream:
+ return "Could not find function in stream";
+ case BitcodeReader::ExpectedConstant:
+ return "Expected a constant";
+ case BitcodeReader::InsufficientFunctionProtos:
+ return "Insufficient function protos";
+ case BitcodeReader::InvalidBitcodeSignature:
+ return "Invalid bitcode signature";
+ case BitcodeReader::InvalidBitcodeWrapperHeader:
+ return "Invalid bitcode wrapper header";
+ case BitcodeReader::InvalidConstantReference:
+ return "Invalid ronstant reference";
+ case BitcodeReader::InvalidID:
+ return "Invalid ID";
+ case BitcodeReader::InvalidInstructionWithNoBB:
+ return "Invalid instruction with no BB";
+ case BitcodeReader::InvalidRecord:
+ return "Invalid record";
+ case BitcodeReader::InvalidTypeForValue:
+ return "Invalid type for value";
+ case BitcodeReader::InvalidTYPETable:
+ return "Invalid TYPE table";
+ case BitcodeReader::InvalidType:
+ return "Invalid type";
+ case BitcodeReader::MalformedBlock:
+ return "Malformed block";
+ case BitcodeReader::MalformedGlobalInitializerSet:
+ return "Malformed global initializer set";
+ case BitcodeReader::InvalidMultipleBlocks:
+ return "Invalid multiple blocks";
+ case BitcodeReader::NeverResolvedValueFoundInFunction:
+ return "Never resolved value found in function";
+ case BitcodeReader::InvalidValue:
+ return "Invalid value";
+ }
+ llvm_unreachable("Unknown error type!");
+ }
+};
+}
+
+const error_category &BitcodeReader::BitcodeErrorCategory() {
+ static BitcodeErrorCategoryType O;
+ return O;
}
//===----------------------------------------------------------------------===//
@@ -3166,9 +3278,9 @@ Module *llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
Module *M = new Module(Buffer->getBufferIdentifier(), Context);
BitcodeReader *R = new BitcodeReader(Buffer, Context);
M->setMaterializer(R);
- if (R->ParseBitcodeInto(M)) {
+ if (error_code EC = R->ParseBitcodeInto(M)) {
if (ErrMsg)
- *ErrMsg = R->getErrorString();
+ *ErrMsg = EC.message();
delete M; // Also deletes R.
return 0;
@@ -3189,9 +3301,9 @@ Module *llvm::getStreamedBitcodeModule(const std::string &name,
Module *M = new Module(name, Context);
BitcodeReader *R = new BitcodeReader(streamer, Context);
M->setMaterializer(R);
- if (R->ParseBitcodeInto(M)) {
+ if (error_code EC = R->ParseBitcodeInto(M)) {
if (ErrMsg)
- *ErrMsg = R->getErrorString();
+ *ErrMsg = EC.message();
delete M; // Also deletes R.
return 0;
}
@@ -3230,9 +3342,9 @@ std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer,
R->setBufferOwned(false);
std::string Triple("");
- if (R->ParseTriple(Triple))
+ if (error_code EC = R->ParseTriple(Triple))
if (ErrMsg)
- *ErrMsg = R->getErrorString();
+ *ErrMsg = EC.message();
delete R;
return Triple;
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index b095447..c5d345b 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -21,6 +21,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/system_error.h"
#include "llvm/Support/ValueHandle.h"
#include <vector>
@@ -132,8 +133,6 @@ class BitcodeReader : public GVMaterializer {
uint64_t NextUnreadBit;
bool SeenValueSymbolTable;
- const char *ErrorString;
-
std::vector<Type*> TypeList;
BitcodeReaderValueList ValueList;
BitcodeReaderMDValueList MDValueList;
@@ -142,6 +141,9 @@ class BitcodeReader : public GVMaterializer {
std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
+ std::vector<std::pair<Function*, unsigned> > FunctionPrefixes;
+
+ SmallVector<Instruction*, 64> InstsWithTBAATag;
/// MAttributes - The set of attributes by index. Index zero in the
/// file is for null, and is thus not represented here. As such all indices
@@ -191,17 +193,46 @@ class BitcodeReader : public GVMaterializer {
/// not need this flag.
bool UseRelativeIDs;
+ static const error_category &BitcodeErrorCategory();
+
public:
+ enum ErrorType {
+ BitcodeStreamInvalidSize,
+ ConflictingMETADATA_KINDRecords,
+ CouldNotFindFunctionInStream,
+ ExpectedConstant,
+ InsufficientFunctionProtos,
+ InvalidBitcodeSignature,
+ InvalidBitcodeWrapperHeader,
+ InvalidConstantReference,
+ InvalidID, // A read identifier is not found in the table it should be in.
+ InvalidInstructionWithNoBB,
+ InvalidRecord, // A read record doesn't have the expected size or structure
+ InvalidTypeForValue, // Type read OK, but is invalid for its use
+ InvalidTYPETable,
+ InvalidType, // We were unable to read a type
+ MalformedBlock, // We are unable to advance in the stream.
+ MalformedGlobalInitializerSet,
+ InvalidMultipleBlocks, // We found multiple blocks of a kind that should
+ // have only one
+ NeverResolvedValueFoundInFunction,
+ InvalidValue // Invalid version, inst number, attr number, etc
+ };
+
+ error_code Error(ErrorType E) {
+ return error_code(E, BitcodeErrorCategory());
+ }
+
explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
: Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false),
- ErrorString(0), ValueList(C), MDValueList(C),
+ ValueList(C), MDValueList(C),
SeenFirstFunctionBody(false), UseRelativeIDs(false) {
}
explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
: Context(C), TheModule(0), Buffer(0), BufferOwned(false),
LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
- ErrorString(0), ValueList(C), MDValueList(C),
+ ValueList(C), MDValueList(C),
SeenFirstFunctionBody(false), UseRelativeIDs(false) {
}
~BitcodeReader() {
@@ -218,23 +249,17 @@ public:
virtual bool isMaterializable(const GlobalValue *GV) const;
virtual bool isDematerializable(const GlobalValue *GV) const;
- virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0);
- virtual bool MaterializeModule(Module *M, std::string *ErrInfo = 0);
+ virtual error_code Materialize(GlobalValue *GV);
+ virtual error_code MaterializeModule(Module *M);
virtual void Dematerialize(GlobalValue *GV);
- bool Error(const char *Str) {
- ErrorString = Str;
- return true;
- }
- const char *getErrorString() const { return ErrorString; }
-
/// @brief Main interface to parsing a bitcode buffer.
/// @returns true if an error occurred.
- bool ParseBitcodeInto(Module *M);
+ error_code ParseBitcodeInto(Module *M);
/// @brief Cheap mechanism to just extract module triple
/// @returns true if an error occurred.
- bool ParseTriple(std::string &Triple);
+ error_code ParseTriple(std::string &Triple);
static uint64_t decodeSignRotatedValue(uint64_t V);
@@ -321,27 +346,27 @@ private:
return getFnValueByID(ValNo, Ty);
}
- bool ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
- bool ParseModule(bool Resume);
- bool ParseAttributeBlock();
- bool ParseAttributeGroupBlock();
- bool ParseTypeTable();
- bool ParseTypeTableBody();
-
- bool ParseValueSymbolTable();
- bool ParseConstants();
- bool RememberAndSkipFunctionBody();
- bool ParseFunctionBody(Function *F);
- bool GlobalCleanup();
- bool ResolveGlobalAndAliasInits();
- bool ParseMetadata();
- bool ParseMetadataAttachment();
- bool ParseModuleTriple(std::string &Triple);
- bool ParseUseLists();
- bool InitStream();
- bool InitStreamFromBuffer();
- bool InitLazyStream();
- bool FindFunctionInStream(Function *F,
+ error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
+ error_code ParseModule(bool Resume);
+ error_code ParseAttributeBlock();
+ error_code ParseAttributeGroupBlock();
+ error_code ParseTypeTable();
+ error_code ParseTypeTableBody();
+
+ error_code ParseValueSymbolTable();
+ error_code ParseConstants();
+ error_code RememberAndSkipFunctionBody();
+ error_code ParseFunctionBody(Function *F);
+ error_code GlobalCleanup();
+ error_code ResolveGlobalAndAliasInits();
+ error_code ParseMetadata();
+ error_code ParseMetadataAttachment();
+ error_code ParseModuleTriple(std::string &Triple);
+ error_code ParseUseLists();
+ error_code InitStream();
+ error_code InitStreamFromBuffer();
+ error_code InitLazyStream();
+ error_code FindFunctionInStream(Function *F,
DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator);
};
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 311c233..4cfc6bd 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -60,10 +60,7 @@ enum {
FUNCTION_INST_CAST_ABBREV,
FUNCTION_INST_RET_VOID_ABBREV,
FUNCTION_INST_RET_VAL_ABBREV,
- FUNCTION_INST_UNREACHABLE_ABBREV,
-
- // SwitchInst Magic
- SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
+ FUNCTION_INST_UNREACHABLE_ABBREV
};
static unsigned GetEncodedCastOpcode(unsigned Opcode) {
@@ -81,6 +78,7 @@ static unsigned GetEncodedCastOpcode(unsigned Opcode) {
case Instruction::PtrToInt: return bitc::CAST_PTRTOINT;
case Instruction::IntToPtr: return bitc::CAST_INTTOPTR;
case Instruction::BitCast : return bitc::CAST_BITCAST;
+ case Instruction::AddrSpaceCast: return bitc::CAST_ADDRSPACECAST;
}
}
@@ -205,6 +203,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_NO_UNWIND;
case Attribute::OptimizeForSize:
return bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE;
+ case Attribute::OptimizeNone:
+ return bitc::ATTR_KIND_OPTIMIZE_NONE;
case Attribute::ReadNone:
return bitc::ATTR_KIND_READ_NONE;
case Attribute::ReadOnly:
@@ -490,7 +490,6 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) {
case GlobalValue::AvailableExternallyLinkage: return 12;
case GlobalValue::LinkerPrivateLinkage: return 13;
case GlobalValue::LinkerPrivateWeakLinkage: return 14;
- case GlobalValue::LinkOnceODRAutoHideLinkage: return 15;
}
llvm_unreachable("Invalid linkage");
}
@@ -607,7 +606,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
// GLOBALVAR: [type, isconst, initid,
// linkage, alignment, section, visibility, threadlocal,
- // unnamed_addr]
+ // unnamed_addr, externally_initialized]
Vals.push_back(VE.getTypeID(GV->getType()));
Vals.push_back(GV->isConstant());
Vals.push_back(GV->isDeclaration() ? 0 :
@@ -633,7 +632,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
// Emit the function proto information.
for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
// FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment,
- // section, visibility, gc, unnamed_addr]
+ // section, visibility, gc, unnamed_addr, prefix]
Vals.push_back(VE.getTypeID(F->getType()));
Vals.push_back(F->getCallingConv());
Vals.push_back(F->isDeclaration());
@@ -644,6 +643,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
Vals.push_back(getEncodedVisibility(F));
Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0);
Vals.push_back(F->hasUnnamedAddr());
+ Vals.push_back(F->hasPrefixData() ? (VE.getValueID(F->getPrefixData()) + 1)
+ : 0);
unsigned AbbrevToUse = 0;
Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
@@ -863,34 +864,6 @@ static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) {
Vals.push_back((-V << 1) | 1);
}
-static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals,
- unsigned &Code, unsigned &AbbrevToUse, const APInt &Val,
- bool EmitSizeForWideNumbers = false
- ) {
- if (Val.getBitWidth() <= 64) {
- uint64_t V = Val.getSExtValue();
- emitSignedInt64(Vals, V);
- Code = bitc::CST_CODE_INTEGER;
- AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
- } else {
- // Wide integers, > 64 bits in size.
- // We have an arbitrary precision integer value to write whose
- // bit width is > 64. However, in canonical unsigned integer
- // format it is likely that the high bits are going to be zero.
- // So, we only write the number of active words.
- unsigned NWords = Val.getActiveWords();
-
- if (EmitSizeForWideNumbers)
- Vals.push_back(NWords);
-
- const uint64_t *RawWords = Val.getRawData();
- for (unsigned i = 0; i != NWords; ++i) {
- emitSignedInt64(Vals, RawWords[i]);
- }
- Code = bitc::CST_CODE_WIDE_INTEGER;
- }
-}
-
static void WriteConstants(unsigned FirstVal, unsigned LastVal,
const ValueEnumerator &VE,
BitstreamWriter &Stream, bool isGlobal) {
@@ -974,7 +947,23 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
} else if (isa<UndefValue>(C)) {
Code = bitc::CST_CODE_UNDEF;
} else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
- EmitAPInt(Record, Code, AbbrevToUse, IV->getValue());
+ if (IV->getBitWidth() <= 64) {
+ uint64_t V = IV->getSExtValue();
+ emitSignedInt64(Record, V);
+ Code = bitc::CST_CODE_INTEGER;
+ AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
+ } else { // Wide integers, > 64 bits in size.
+ // We have an arbitrary precision integer value to write whose
+ // bit width is > 64. However, in canonical unsigned integer
+ // format it is likely that the high bits are going to be zero.
+ // So, we only write the number of active words.
+ unsigned NWords = IV->getValue().getActiveWords();
+ const uint64_t *RawWords = IV->getValue().getRawData();
+ for (unsigned i = 0; i != NWords; ++i) {
+ emitSignedInt64(Record, RawWords[i]);
+ }
+ Code = bitc::CST_CODE_WIDE_INTEGER;
+ }
} else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
Code = bitc::CST_CODE_FLOAT;
Type *Ty = CFP->getType();
@@ -1182,13 +1171,6 @@ static void pushValue(const Value *V, unsigned InstID,
Vals.push_back(InstID - ValID);
}
-static void pushValue64(const Value *V, unsigned InstID,
- SmallVectorImpl<uint64_t> &Vals,
- ValueEnumerator &VE) {
- uint64_t ValID = VE.getValueID(V);
- Vals.push_back(InstID - ValID);
-}
-
static void pushValueSigned(const Value *V, unsigned InstID,
SmallVectorImpl<uint64_t> &Vals,
ValueEnumerator &VE) {
@@ -1312,63 +1294,16 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
break;
case Instruction::Switch:
{
- // Redefine Vals, since here we need to use 64 bit values
- // explicitly to store large APInt numbers.
- SmallVector<uint64_t, 128> Vals64;
-
Code = bitc::FUNC_CODE_INST_SWITCH;
const SwitchInst &SI = cast<SwitchInst>(I);
-
- uint32_t SwitchRecordHeader = SI.hash() | (SWITCH_INST_MAGIC << 16);
- Vals64.push_back(SwitchRecordHeader);
-
- Vals64.push_back(VE.getTypeID(SI.getCondition()->getType()));
- pushValue64(SI.getCondition(), InstID, Vals64, VE);
- Vals64.push_back(VE.getValueID(SI.getDefaultDest()));
- Vals64.push_back(SI.getNumCases());
+ Vals.push_back(VE.getTypeID(SI.getCondition()->getType()));
+ pushValue(SI.getCondition(), InstID, Vals, VE);
+ Vals.push_back(VE.getValueID(SI.getDefaultDest()));
for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
i != e; ++i) {
- const IntegersSubset& CaseRanges = i.getCaseValueEx();
- unsigned Code, Abbrev; // will unused.
-
- if (CaseRanges.isSingleNumber()) {
- Vals64.push_back(1/*NumItems = 1*/);
- Vals64.push_back(true/*IsSingleNumber = true*/);
- EmitAPInt(Vals64, Code, Abbrev, CaseRanges.getSingleNumber(0), true);
- } else {
-
- Vals64.push_back(CaseRanges.getNumItems());
-
- if (CaseRanges.isSingleNumbersOnly()) {
- for (unsigned ri = 0, rn = CaseRanges.getNumItems();
- ri != rn; ++ri) {
-
- Vals64.push_back(true/*IsSingleNumber = true*/);
-
- EmitAPInt(Vals64, Code, Abbrev,
- CaseRanges.getSingleNumber(ri), true);
- }
- } else
- for (unsigned ri = 0, rn = CaseRanges.getNumItems();
- ri != rn; ++ri) {
- IntegersSubset::Range r = CaseRanges.getItem(ri);
- bool IsSingleNumber = CaseRanges.isSingleNumber(ri);
-
- Vals64.push_back(IsSingleNumber);
-
- EmitAPInt(Vals64, Code, Abbrev, r.getLow(), true);
- if (!IsSingleNumber)
- EmitAPInt(Vals64, Code, Abbrev, r.getHigh(), true);
- }
- }
- Vals64.push_back(VE.getValueID(i.getCaseSuccessor()));
+ Vals.push_back(VE.getValueID(i.getCaseValue()));
+ Vals.push_back(VE.getValueID(i.getCaseSuccessor()));
}
-
- Stream.EmitRecord(Code, Vals64, AbbrevToUse);
-
- // Also do expected action - clear external Vals collection:
- Vals.clear();
- return;
}
break;
case Instruction::IndirectBr:
@@ -1930,6 +1865,8 @@ static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
WriteUseList(FI, VE, Stream);
if (!FI->isDeclaration())
WriteFunctionUseList(FI, VE, Stream);
+ if (FI->hasPrefixData())
+ WriteUseList(FI->getPrefixData(), VE, Stream);
}
// Write the aliases.
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 8bac6da..a164104 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -60,6 +60,11 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
I != E; ++I)
EnumerateValue(I->getAliasee());
+ // Enumerate the prefix data constants.
+ for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
+ if (I->hasPrefixData())
+ EnumerateValue(I->getPrefixData());
+
// Insert constants and metadata that are named at module level into the slot
// pool so that the module symbol table can refer to them...
EnumerateValueSymbolTable(M->getValueSymbolTable());
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 4d9aebc..7fbf123 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -7,6 +7,7 @@ add_subdirectory(Bitcode)
add_subdirectory(Transforms)
add_subdirectory(Linker)
add_subdirectory(Analysis)
+add_subdirectory(LTO)
add_subdirectory(MC)
add_subdirectory(Object)
add_subdirectory(Option)
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index e079707..2ee7767 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -782,7 +782,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
if (MI == CriticalPathMI) {
CriticalPathSU = CriticalPathStep(CriticalPathSU);
CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
- } else {
+ } else if (CriticalPathSet.any()) {
ExcludeRegs = &CriticalPathSet;
}
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index ca08b5b..1600c67 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -320,6 +320,7 @@ static const Value *getNoopInput(const Value *V,
static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
SmallVectorImpl<unsigned> &RetIndices,
SmallVectorImpl<unsigned> &CallIndices,
+ bool AllowDifferingSizes,
const TargetLoweringBase &TLI) {
// Trace the sub-value needed by the return value as far back up the graph as
@@ -350,7 +351,8 @@ static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
// all the bits that are needed by the "ret" have been provided by the "tail
// call". FIXME: with sufficiently cunning bit-tracking, we could look through
// extensions too.
- if (BitsProvided < BitsRequired)
+ if (BitsProvided < BitsRequired ||
+ (!AllowDifferingSizes && BitsProvided != BitsRequired))
return false;
return true;
@@ -382,9 +384,8 @@ static bool indexReallyValid(CompositeType *T, unsigned Idx) {
/// function again on a finished iterator will repeatedly return
/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty
/// aggregate or a non-aggregate
-static bool
-advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes,
- SmallVectorImpl<unsigned> &Path) {
+static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes,
+ SmallVectorImpl<unsigned> &Path) {
// First march back up the tree until we can successfully increment one of the
// coordinates in Path.
while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) {
@@ -454,8 +455,8 @@ static bool firstRealType(Type *Next,
/// Set the iterator data-structures to the next non-empty, non-aggregate
/// subtype.
-bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
- SmallVectorImpl<unsigned> &Path) {
+static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
+ SmallVectorImpl<unsigned> &Path) {
do {
if (!advanceToNextLeafType(SubTypes, Path))
return false;
@@ -509,6 +510,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,
return false;
}
+ return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, TLI);
+}
+
+bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
+ const Instruction *I,
+ const ReturnInst *Ret,
+ const TargetLoweringBase &TLI) {
// If the block ends with a void return or unreachable, it doesn't matter
// what the call's return type is.
if (!Ret || Ret->getNumOperands() == 0) return true;
@@ -517,19 +525,38 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,
// return type is.
if (isa<UndefValue>(Ret->getOperand(0))) return true;
- // Conservatively require the attributes of the call to match those of
- // the return. Ignore noalias because it doesn't affect the call sequence.
- const Function *F = ExitBB->getParent();
- AttributeSet CallerAttrs = F->getAttributes();
- if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias) !=
- AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias))
- return false;
+ // Make sure the attributes attached to each return are compatible.
+ AttrBuilder CallerAttrs(F->getAttributes(),
+ AttributeSet::ReturnIndex);
+ AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
+ AttributeSet::ReturnIndex);
+
+ // Noalias is completely benign as far as calling convention goes, it
+ // shouldn't affect whether the call is a tail call.
+ CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias);
+ CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias);
+
+ bool AllowDifferingSizes = true;
+ if (CallerAttrs.contains(Attribute::ZExt)) {
+ if (!CalleeAttrs.contains(Attribute::ZExt))
+ return false;
+
+ AllowDifferingSizes = false;
+ CallerAttrs.removeAttribute(Attribute::ZExt);
+ CalleeAttrs.removeAttribute(Attribute::ZExt);
+ } else if (CallerAttrs.contains(Attribute::SExt)) {
+ if (!CalleeAttrs.contains(Attribute::SExt))
+ return false;
+
+ AllowDifferingSizes = false;
+ CallerAttrs.removeAttribute(Attribute::SExt);
+ CalleeAttrs.removeAttribute(Attribute::SExt);
+ }
- // It's not safe to eliminate the sign / zero extension of the return value.
- if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
- CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ // If they're still different, there's some facet we don't understand
+ // (currently only "inreg", but in future who knows). It may be OK but the
+ // only safe option is to reject the tail call.
+ if (CallerAttrs != CalleeAttrs)
return false;
const Value *RetVal = Ret->getOperand(0), *CallVal = I;
@@ -571,7 +598,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,
// Finally, we can check whether the value produced by the tail call at this
// index is compatible with the value we return.
- if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, TLI))
+ if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
+ AllowDifferingSizes, TLI))
return false;
CallEmpty = !nextRealType(CallSubTypes, CallPath);
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 188047d..5d82dd9 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -47,13 +47,18 @@ ARMException::ARMException(AsmPrinter *A)
ARMException::~ARMException() {}
+ARMTargetStreamer &ARMException::getTargetStreamer() {
+ MCTargetStreamer &TS = Asm->OutStreamer.getTargetStreamer();
+ return static_cast<ARMTargetStreamer &>(TS);
+}
+
void ARMException::EndModule() {
}
/// BeginFunction - Gather pre-function exception information. Assumes it's
/// being emitted immediately after the function entry point.
void ARMException::BeginFunction(const MachineFunction *MF) {
- Asm->OutStreamer.EmitFnStart();
+ getTargetStreamer().emitFnStart();
if (Asm->MF->getFunction()->needsUnwindTableEntry())
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
Asm->getFunctionNumber()));
@@ -62,8 +67,9 @@ void ARMException::BeginFunction(const MachineFunction *MF) {
/// EndFunction - Gather and emit post-function exception information.
///
void ARMException::EndFunction() {
+ ARMTargetStreamer &ATS = getTargetStreamer();
if (!Asm->MF->getFunction()->needsUnwindTableEntry())
- Asm->OutStreamer.EmitCantUnwind();
+ ATS.emitCantUnwind();
else {
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
Asm->getFunctionNumber()));
@@ -76,13 +82,13 @@ void ARMException::EndFunction() {
// Emit references to personality.
if (const Function * Personality =
MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
- MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
+ MCSymbol *PerSym = Asm->getSymbol(Personality);
Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
- Asm->OutStreamer.EmitPersonality(PerSym);
+ ATS.emitPersonality(PerSym);
}
// Emit .handlerdata directive.
- Asm->OutStreamer.EmitHandlerData();
+ ATS.emitHandlerData();
// Emit actual exception table
EmitExceptionTable();
@@ -90,7 +96,7 @@ void ARMException::EndFunction() {
}
}
- Asm->OutStreamer.EmitFnEnd();
+ ATS.emitFnEnd();
}
void ARMException::EmitTypeInfos(unsigned TTypeEncoding) {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 12c3574..308b0e0 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -48,6 +48,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
using namespace llvm;
static const char *const DWARFGroupName = "DWARF Emission";
@@ -94,7 +95,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
: MachineFunctionPass(ID),
- TM(tm), MAI(tm.getMCAsmInfo()),
+ TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()),
OutContext(Streamer.getContext()),
OutStreamer(Streamer),
LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
@@ -164,7 +165,7 @@ bool AsmPrinter::doInitialization(Module &M) {
OutStreamer.InitStreamer();
- Mang = new Mangler(OutContext, &TM);
+ Mang = new Mangler(&TM);
// Allow the target to emit any magic that it wants at the start of the file.
EmitStartOfAsmFile(M);
@@ -212,12 +213,12 @@ bool AsmPrinter::doInitialization(Module &M) {
llvm_unreachable("Unknown exception type.");
}
-void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
- switch ((GlobalValue::LinkageTypes)Linkage) {
+void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
+ GlobalValue::LinkageTypes Linkage = GV->getLinkage();
+ switch (Linkage) {
case GlobalValue::CommonLinkage:
case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::LinkOnceODRLinkage:
- case GlobalValue::LinkOnceODRAutoHideLinkage:
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
case GlobalValue::LinkerPrivateWeakLinkage:
@@ -225,8 +226,19 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
- if ((GlobalValue::LinkageTypes)Linkage !=
- GlobalValue::LinkOnceODRAutoHideLinkage)
+ bool CanBeHidden = false;
+
+ if (Linkage == GlobalValue::LinkOnceODRLinkage) {
+ if (GV->hasUnnamedAddr()) {
+ CanBeHidden = true;
+ } else {
+ GlobalStatus GS;
+ if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared)
+ CanBeHidden = true;
+ }
+ }
+
+ if (!CanBeHidden)
// .weak_definition _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
else
@@ -239,7 +251,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
// .weak _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
}
- break;
+ return;
case GlobalValue::DLLExportLinkage:
case GlobalValue::AppendingLinkage:
// FIXME: appending linkage variables should go into a section of
@@ -248,16 +260,23 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
// If external or appending, declare as a global symbol.
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
- break;
+ return;
case GlobalValue::PrivateLinkage:
case GlobalValue::InternalLinkage:
case GlobalValue::LinkerPrivateLinkage:
- break;
- default:
- llvm_unreachable("Unknown linkage type!");
+ return;
+ case GlobalValue::AvailableExternallyLinkage:
+ llvm_unreachable("Should never emit this");
+ case GlobalValue::DLLImportLinkage:
+ case GlobalValue::ExternalWeakLinkage:
+ llvm_unreachable("Don't know how to emit these");
}
+ llvm_unreachable("Unknown linkage type!");
}
+MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
+ return getObjFileLowering().getSymbol(*Mang, GV);
+}
/// EmitGlobalVariable - Emit the specified global variable to the .s file.
void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
@@ -273,7 +292,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
}
}
- MCSymbol *GVSym = Mang->getSymbol(GV);
+ MCSymbol *GVSym = getSymbol(GV);
EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
if (!GV->hasInitializer()) // External globals require no extra code.
@@ -284,13 +303,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
- const DataLayout *TD = TM.getDataLayout();
- uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+ const DataLayout *DL = TM.getDataLayout();
+ uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType());
// If the alignment is specified, we *must* obey it. Overaligning a global
// with a specified alignment is a prompt way to break globals emitted to
// sections and expected to be contiguous (e.g. ObjC metadata).
- unsigned AlignLog = getGVAlignmentLog2(GV, *TD);
+ unsigned AlignLog = getGVAlignmentLog2(GV, *DL);
+
+ if (DD)
+ DD->setSymbolSize(GVSym, Size);
// Handle common and BSS local symbols (.lcomm).
if (GVKind.isCommon() || GVKind.isBSSLocal()) {
@@ -388,14 +410,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer.SwitchSection(TLVSect);
// Emit the linkage here.
- EmitLinkage(GV->getLinkage(), GVSym);
+ EmitLinkage(GV, GVSym);
OutStreamer.EmitLabel(GVSym);
// Three pointers in size:
// - __tlv_bootstrap - used to make sure support exists
// - spare pointer, used when mapped by the runtime
// - pointer to mangled symbol above with initializer
- unsigned PtrSize = TD->getPointerSizeInBits()/8;
+ unsigned PtrSize = DL->getPointerTypeSize(GV->getType());
OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
PtrSize);
OutStreamer.EmitIntValue(0, PtrSize);
@@ -407,7 +429,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer.SwitchSection(TheSection);
- EmitLinkage(GV->getLinkage(), GVSym);
+ EmitLinkage(GV, GVSym);
EmitAlignment(AlignLog, GV);
OutStreamer.EmitLabel(GVSym);
@@ -433,7 +455,7 @@ void AsmPrinter::EmitFunctionHeader() {
OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
EmitVisibility(CurrentFnSym, F->getVisibility());
- EmitLinkage(F->getLinkage(), CurrentFnSym);
+ EmitLinkage(F, CurrentFnSym);
EmitAlignment(MF->getAlignment(), F);
if (MAI->hasDotTypeDotSizeDirective())
@@ -459,16 +481,6 @@ void AsmPrinter::EmitFunctionHeader() {
OutStreamer.EmitLabel(DeadBlockSyms[i]);
}
- // Add some workaround for linkonce linkage on Cygwin\MinGW.
- if (MAI->getLinkOnceDirective() != 0 &&
- (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) {
- // FIXME: What is this?
- MCSymbol *FakeStub =
- OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+
- CurrentFnSym->getName());
- OutStreamer.EmitLabel(FakeStub);
- }
-
// Emit pre-function debug and/or EH information.
if (DE) {
NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
@@ -478,6 +490,10 @@ void AsmPrinter::EmitFunctionHeader() {
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
DD->beginFunction(MF);
}
+
+ // Emit the prefix data.
+ if (F->hasPrefixData())
+ EmitGlobalConstant(F->getPrefixData());
}
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -530,11 +546,11 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
/// emitImplicitDef - This method emits the specified machine instruction
/// that is an implicit def.
-static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) {
+void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- AP.OutStreamer.AddComment(Twine("implicit-def: ") +
- AP.TM.getRegisterInfo()->getName(RegNo));
- AP.OutStreamer.AddBlankLine();
+ OutStreamer.AddComment(Twine("implicit-def: ") +
+ TM.getRegisterInfo()->getName(RegNo));
+ OutStreamer.AddBlankLine();
}
static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
@@ -646,7 +662,7 @@ bool AsmPrinter::needsRelocationsForDwarfStringPool() const {
}
void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
- MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+ const MCSymbol *Label = MI.getOperand(0).getMCSymbol();
if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
return;
@@ -657,12 +673,12 @@ void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
if (MMI->getCompactUnwindEncoding() != 0)
OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding());
- MachineModuleInfo &MMI = MF->getMMI();
- std::vector<MCCFIInstruction> Instructions = MMI.getFrameInstructions();
+ const MachineModuleInfo &MMI = MF->getMMI();
+ const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions();
bool FoundOne = false;
(void)FoundOne;
- for (std::vector<MCCFIInstruction>::iterator I = Instructions.begin(),
- E = Instructions.end(); I != E; ++I) {
+ for (std::vector<MCCFIInstruction>::const_iterator I = Instrs.begin(),
+ E = Instrs.end(); I != E; ++I) {
if (I->getLabel() == Label) {
emitCFIInstruction(*I);
FoundOne = true;
@@ -724,7 +740,7 @@ void AsmPrinter::EmitFunctionBody() {
}
break;
case TargetOpcode::IMPLICIT_DEF:
- if (isVerbose()) emitImplicitDef(II, *this);
+ if (isVerbose()) emitImplicitDef(II);
break;
case TargetOpcode::KILL:
if (isVerbose()) emitKill(II, *this);
@@ -877,7 +893,7 @@ bool AsmPrinter::doFinalization(Module &M) {
if (V == GlobalValue::DefaultVisibility)
continue;
- MCSymbol *Name = Mang->getSymbol(&F);
+ MCSymbol *Name = getSymbol(&F);
EmitVisibility(Name, V, false);
}
@@ -887,6 +903,9 @@ bool AsmPrinter::doFinalization(Module &M) {
if (!ModuleFlags.empty())
getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM);
+ // Make sure we wrote out everything we need.
+ OutStreamer.Flush();
+
// Finalize debug and EH information.
if (DE) {
{
@@ -914,12 +933,12 @@ bool AsmPrinter::doFinalization(Module &M) {
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
if (!I->hasExternalWeakLinkage()) continue;
- OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+ OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference);
}
for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!I->hasExternalWeakLinkage()) continue;
- OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+ OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference);
}
}
@@ -927,14 +946,19 @@ bool AsmPrinter::doFinalization(Module &M) {
OutStreamer.AddBlankLine();
for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E; ++I) {
- MCSymbol *Name = Mang->getSymbol(I);
+ MCSymbol *Name = getSymbol(I);
const GlobalValue *GV = I->getAliasedGlobal();
- MCSymbol *Target = Mang->getSymbol(GV);
+ if (GV->isDeclaration()) {
+ report_fatal_error(Name->getName() +
+ ": Target doesn't support aliases to declarations");
+ }
+
+ MCSymbol *Target = getSymbol(GV);
if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
- else if (I->hasWeakLinkage())
+ else if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
else
assert(I->hasLocalLinkage() && "Invalid alias linkage");
@@ -953,6 +977,9 @@ bool AsmPrinter::doFinalization(Module &M) {
if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
MP->finishAssembly(*this);
+ // Emit llvm.ident metadata in an '.ident' directive.
+ EmitModuleIdents(M);
+
// If we don't have any trampolines, then we don't require stack memory
// to be executable. Some targets have a directive to declare this.
Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
@@ -976,7 +1003,7 @@ bool AsmPrinter::doFinalization(Module &M) {
void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
// Get the function symbol.
- CurrentFnSym = Mang->getSymbol(MF.getFunction());
+ CurrentFnSym = getSymbol(MF.getFunction());
CurrentFnSymForSize = CurrentFnSym;
if (isVerbose())
@@ -1283,16 +1310,10 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
const GlobalValue *GV =
dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang))
- OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip);
+ OutStreamer.EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip);
}
}
-typedef std::pair<unsigned, Constant*> Structor;
-
-static bool priority_order(const Structor& lhs, const Structor& rhs) {
- return lhs.first < rhs.first;
-}
-
/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
/// priority.
void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
@@ -1309,6 +1330,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
!isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr).
// Gather the structors in a form that's convenient for sorting by priority.
+ typedef std::pair<unsigned, Constant *> Structor;
SmallVector<Structor, 8> Structors;
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i));
@@ -1322,9 +1344,9 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
}
// Emit the function pointers in the target-specific order
- const DataLayout *TD = TM.getDataLayout();
- unsigned Align = Log2_32(TD->getPointerPrefAlignment());
- std::stable_sort(Structors.begin(), Structors.end(), priority_order);
+ const DataLayout *DL = TM.getDataLayout();
+ unsigned Align = Log2_32(DL->getPointerPrefAlignment());
+ std::stable_sort(Structors.begin(), Structors.end(), less_first());
for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
const MCSection *OutputSection =
(isCtor ?
@@ -1337,6 +1359,21 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
}
}
+void AsmPrinter::EmitModuleIdents(Module &M) {
+ if (!MAI->hasIdentDirective())
+ return;
+
+ if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) {
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *N = NMD->getOperand(i);
+ assert(N->getNumOperands() == 1 &&
+ "llvm.ident metadata entry can have only one operand");
+ const MDString *S = cast<MDString>(N->getOperand(0));
+ OutStreamer.EmitIdent(S->getString());
+ }
+ }
+}
+
//===--------------------------------------------------------------------===//
// Emission and print routines
//
@@ -1402,12 +1439,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
OutContext);
if (!MAI->hasSetDirective())
- OutStreamer.EmitValue(Diff, 4);
+ OutStreamer.EmitValue(Diff, Size);
else {
// Otherwise, emit with .set (aka assignment).
MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
OutStreamer.EmitAssignment(SetLabel, Diff);
- OutStreamer.EmitSymbolValue(SetLabel, 4);
+ OutStreamer.EmitSymbolValue(SetLabel, Size);
}
}
@@ -1415,9 +1452,9 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
/// where the size in bytes of the directive is specified by Size and Label
/// specifies the label. This implicitly uses .set if it is available.
void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
- unsigned Size)
+ unsigned Size, bool IsSectionRelative)
const {
- if (MAI->needsDwarfSectionOffsetDirective() && Size == 4) { // secrel32 ONLY works for 32bits.
+ if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
OutStreamer.EmitCOFFSecRel32(Label);
return;
}
@@ -1468,7 +1505,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
- return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+ return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
@@ -1498,10 +1535,10 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
- const DataLayout &TD = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getDataLayout();
// Generate a symbolic expression for the byte address
- APInt OffsetAI(TD.getPointerSizeInBits(), 0);
- cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
+ APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
const MCExpr *Base = lowerConstant(CE->getOperand(0), AP);
if (!OffsetAI)
@@ -1522,17 +1559,17 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
return lowerConstant(CE->getOperand(0), AP);
case Instruction::IntToPtr: {
- const DataLayout &TD = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getDataLayout();
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
- Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+ Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
false/*ZExt*/);
return lowerConstant(Op, AP);
}
case Instruction::PtrToInt: {
- const DataLayout &TD = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getDataLayout();
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
Constant *Op = CE->getOperand(0);
@@ -1542,13 +1579,13 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
// We can emit the pointer value into this slot if the slot is an
// integer slot equal to the size of the pointer.
- if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
+ if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
return OpExpr;
// Otherwise the pointer is smaller than the resultant integer, mask off
// the high bits so we are sure to get a proper truncation if the input is
// a constant expr.
- unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
+ unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
}
@@ -1699,9 +1736,9 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
}
}
- const DataLayout &TD = *AP.TM.getDataLayout();
- unsigned Size = TD.getTypeAllocSize(CDS->getType());
- unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) *
+ const DataLayout &DL = *AP.TM.getDataLayout();
+ unsigned Size = DL.getTypeAllocSize(CDS->getType());
+ unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
CDS->getNumElements();
if (unsigned Padding = Size - EmittedSize)
AP.OutStreamer.EmitZeros(Padding);
@@ -1727,9 +1764,9 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
emitGlobalConstantImpl(CV->getOperand(i), AP);
- const DataLayout &TD = *AP.TM.getDataLayout();
- unsigned Size = TD.getTypeAllocSize(CV->getType());
- unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
+ const DataLayout &DL = *AP.TM.getDataLayout();
+ unsigned Size = DL.getTypeAllocSize(CV->getType());
+ unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
CV->getType()->getNumElements();
if (unsigned Padding = Size - EmittedSize)
AP.OutStreamer.EmitZeros(Padding);
@@ -1737,15 +1774,15 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) {
// Print the fields in successive locations. Pad to align if needed!
- const DataLayout *TD = AP.TM.getDataLayout();
- unsigned Size = TD->getTypeAllocSize(CS->getType());
- const StructLayout *Layout = TD->getStructLayout(CS->getType());
+ const DataLayout *DL = AP.TM.getDataLayout();
+ unsigned Size = DL->getTypeAllocSize(CS->getType());
+ const StructLayout *Layout = DL->getStructLayout(CS->getType());
uint64_t SizeSoFar = 0;
for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
const Constant *Field = CS->getOperand(i);
// Check if padding is needed and insert one or more 0s.
- uint64_t FieldSize = TD->getTypeAllocSize(Field->getType());
+ uint64_t FieldSize = DL->getTypeAllocSize(Field->getType());
uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
- Layout->getElementOffset(i)) - FieldSize;
SizeSoFar += FieldSize + PadSize;
@@ -1802,13 +1839,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
}
// Emit the tail padding for the long double.
- const DataLayout &TD = *AP.TM.getDataLayout();
- AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
- TD.getTypeStoreSize(CFP->getType()));
+ const DataLayout &DL = *AP.TM.getDataLayout();
+ AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
+ DL.getTypeStoreSize(CFP->getType()));
}
static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
- const DataLayout *TD = AP.TM.getDataLayout();
+ const DataLayout *DL = AP.TM.getDataLayout();
unsigned BitWidth = CI->getBitWidth();
// Copy the value as we may massage the layout for constants whose bit width
@@ -1825,7 +1862,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// Big endian:
// * Record the extra bits to emit.
// * Realign the raw data to emit the chunks of 64-bits.
- if (TD->isBigEndian()) {
+ if (DL->isBigEndian()) {
// Basically the structure of the raw data is a chunk of 64-bits cells:
// 0 1 BitWidth / 64
// [chunk1][chunk2] ... [chunkN].
@@ -1846,7 +1883,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// quantities at a time.
const uint64_t *RawData = Realigned.getRawData();
for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
- uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i];
+ uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i];
AP.OutStreamer.EmitIntValue(Val, 8);
}
@@ -1864,8 +1901,8 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
}
static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
- const DataLayout *TD = AP.TM.getDataLayout();
- uint64_t Size = TD->getTypeAllocSize(CV->getType());
+ const DataLayout *DL = AP.TM.getDataLayout();
+ uint64_t Size = DL->getTypeAllocSize(CV->getType());
if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
return AP.OutStreamer.EmitZeros(Size);
@@ -1913,7 +1950,7 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
// If the constant expression's size is greater than 64-bits, then we have
// to emit the value in chunks. Try to constant fold the value and emit it
// that way.
- Constant *New = ConstantFoldConstantExpression(CE, TD);
+ Constant *New = ConstantFoldConstantExpression(CE, DL);
if (New && New != CE)
return emitGlobalConstantImpl(New, AP);
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index c141d60..b92f49c 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -185,5 +185,11 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
case MCCFIInstruction::OpOffset:
OutStreamer.EmitCFIOffset(Inst.getRegister(), Inst.getOffset());
break;
+ case MCCFIInstruction::OpRegister:
+ OutStreamer.EmitCFIRegister(Inst.getRegister(), Inst.getRegister2());
+ break;
+ case MCCFIInstruction::OpWindowSave:
+ OutStreamer.EmitCFIWindowSave();
+ break;
}
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index d8e9c95..4f927f6 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -123,7 +123,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
TM.getTargetCPU(),
TM.getTargetFeatureString()));
OwningPtr<MCTargetAsmParser>
- TAP(TM.getTarget().createMCAsmParser(*STI, *Parser));
+ TAP(TM.getTarget().createMCAsmParser(*STI, *Parser, *MII));
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 65e7bee..be484a6 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMAsmPrinter
AsmPrinterDwarf.cpp
AsmPrinterInlineAsm.cpp
DIE.cpp
+ DIEHash.cpp
DwarfAccelTable.cpp
DwarfCFIException.cpp
DwarfCompileUnit.cpp
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index ab03861..6944428 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -34,8 +34,10 @@ using namespace llvm;
/// Profile - Used to gather unique data for the abbreviation folding set.
///
void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
- ID.AddInteger(Attribute);
- ID.AddInteger(Form);
+ // Explicitly cast to an integer type for which FoldingSetNodeID has
+ // overloads. Otherwise MSVC 2010 thinks this call is ambiguous.
+ ID.AddInteger(unsigned(Attribute));
+ ID.AddInteger(unsigned(Form));
}
//===----------------------------------------------------------------------===//
@@ -45,7 +47,7 @@ void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
/// Profile - Used to gather unique data for the abbreviation folding set.
///
void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
- ID.AddInteger(Tag);
+ ID.AddInteger(unsigned(Tag));
ID.AddInteger(ChildrenFlag);
// For each attribute description.
@@ -112,17 +114,25 @@ DIE::~DIE() {
/// Climb up the parent chain to get the compile unit DIE to which this DIE
/// belongs.
-DIE *DIE::getCompileUnit() {
- DIE *p = this;
+const DIE *DIE::getCompileUnit() const {
+ const DIE *Cu = getCompileUnitOrNull();
+ assert(Cu && "We should not have orphaned DIEs.");
+ return Cu;
+}
+
+/// Climb up the parent chain to get the compile unit DIE this DIE belongs
+/// to. Return NULL if DIE is not added to an owner yet.
+const DIE *DIE::getCompileUnitOrNull() const {
+ const DIE *p = this;
while (p) {
if (p->getTag() == dwarf::DW_TAG_compile_unit)
return p;
p = p->getParent();
}
- llvm_unreachable("We should not have orphaned DIEs.");
+ return NULL;
}
-DIEValue *DIE::findAttribute(unsigned Attribute) {
+DIEValue *DIE::findAttribute(uint16_t Attribute) {
const SmallVectorImpl<DIEValue *> &Values = getValues();
const DIEAbbrev &Abbrevs = getAbbrev();
@@ -199,14 +209,14 @@ void DIEValue::dump() const {
/// EmitValue - Emit integer of appropriate size.
///
-void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
unsigned Size = ~0U;
switch (Form) {
case dwarf::DW_FORM_flag_present:
// Emit something to keep the lines and comments in sync.
// FIXME: Is there a better way to do this?
if (Asm->OutStreamer.hasRawTextSupport())
- Asm->OutStreamer.EmitRawText(StringRef(""));
+ Asm->OutStreamer.EmitRawText("");
return;
case dwarf::DW_FORM_flag: // Fall thru
case dwarf::DW_FORM_ref1: // Fall thru
@@ -231,7 +241,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
/// SizeOf - Determine size of integer value in bytes.
///
-unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
+unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_flag_present: return 0;
case dwarf::DW_FORM_flag: // Fall thru
@@ -266,13 +276,13 @@ void DIEInteger::print(raw_ostream &O) const {
/// EmitValue - Emit expression value.
///
-void DIEExpr::EmitValue(AsmPrinter *AP, unsigned Form) const {
+void DIEExpr::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form));
}
/// SizeOf - Determine size of expression value in bytes.
///
-unsigned DIEExpr::SizeOf(AsmPrinter *AP, unsigned Form) const {
+unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
@@ -292,13 +302,16 @@ void DIEExpr::print(raw_ostream &O) const {
/// EmitValue - Emit label value.
///
-void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
- AP->EmitLabelReference(Label, SizeOf(AP, Form));
+void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+ AP->EmitLabelReference(Label, SizeOf(AP, Form),
+ Form == dwarf::DW_FORM_strp ||
+ Form == dwarf::DW_FORM_sec_offset ||
+ Form == dwarf::DW_FORM_ref_addr);
}
/// SizeOf - Determine size of label value in bytes.
///
-unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
+unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
@@ -317,13 +330,13 @@ void DIELabel::print(raw_ostream &O) const {
/// EmitValue - Emit delta value.
///
-void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
+void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
}
/// SizeOf - Determine size of delta value in bytes.
///
-unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
+unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
return AP->getDataLayout().getPointerSize();
@@ -341,13 +354,13 @@ void DIEDelta::print(raw_ostream &O) const {
/// EmitValue - Emit string value.
///
-void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const {
+void DIEString::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
Access->EmitValue(AP, Form);
}
/// SizeOf - Determine size of delta value in bytes.
///
-unsigned DIEString::SizeOf(AsmPrinter *AP, unsigned Form) const {
+unsigned DIEString::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
return Access->SizeOf(AP, Form);
}
@@ -364,7 +377,7 @@ void DIEString::print(raw_ostream &O) const {
/// EmitValue - Emit debug information entry offset.
///
-void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const {
+void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitInt32(Entry->getOffset());
}
@@ -402,7 +415,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
/// EmitValue - Emit block data.
///
-void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
default: llvm_unreachable("Improper form for block");
case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
@@ -418,7 +431,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
/// SizeOf - Determine size of block data in bytes.
///
-unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const {
+unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index bfd7d1d..f4fa326 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -33,17 +33,17 @@ namespace llvm {
class DIEAbbrevData {
/// Attribute - Dwarf attribute code.
///
- uint16_t Attribute;
+ dwarf::Attribute Attribute;
/// Form - Dwarf form code.
///
- uint16_t Form;
+ dwarf::Form Form;
public:
- DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {}
+ DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {}
// Accessors.
- uint16_t getAttribute() const { return Attribute; }
- uint16_t getForm() const { return Form; }
+ dwarf::Attribute getAttribute() const { return Attribute; }
+ dwarf::Form getForm() const { return Form; }
/// Profile - Used to gather unique data for the abbreviation folding set.
///
@@ -56,7 +56,7 @@ namespace llvm {
class DIEAbbrev : public FoldingSetNode {
/// Tag - Dwarf tag code.
///
- uint16_t Tag;
+ dwarf::Tag Tag;
/// ChildrenFlag - Dwarf children flag.
///
@@ -71,20 +71,19 @@ namespace llvm {
SmallVector<DIEAbbrevData, 12> Data;
public:
- DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
+ DIEAbbrev(dwarf::Tag T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
// Accessors.
- uint16_t getTag() const { return Tag; }
+ dwarf::Tag getTag() const { return Tag; }
unsigned getNumber() const { return Number; }
uint16_t getChildrenFlag() const { return ChildrenFlag; }
const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
- void setTag(uint16_t T) { Tag = T; }
void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; }
void setNumber(unsigned N) { Number = N; }
/// AddAttribute - Adds another set of attribute information to the
/// abbreviation.
- void AddAttribute(uint16_t Attribute, uint16_t Form) {
+ void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) {
Data.push_back(DIEAbbrevData(Attribute, Form));
}
@@ -131,19 +130,17 @@ namespace llvm {
///
SmallVector<DIEValue*, 12> Values;
-#ifndef NDEBUG
- // Private data for print()
- mutable unsigned IndentCount;
-#endif
public:
explicit DIE(unsigned Tag)
- : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0) {}
+ : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no),
+ Parent(0) {}
virtual ~DIE();
// Accessors.
DIEAbbrev &getAbbrev() { return Abbrev; }
+ const DIEAbbrev &getAbbrev() const { return Abbrev; }
unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
- unsigned getTag() const { return Abbrev.getTag(); }
+ dwarf::Tag getTag() const { return Abbrev.getTag(); }
unsigned getOffset() const { return Offset; }
unsigned getSize() const { return Size; }
const std::vector<DIE *> &getChildren() const { return Children; }
@@ -151,14 +148,17 @@ namespace llvm {
DIE *getParent() const { return Parent; }
/// Climb up the parent chain to get the compile unit DIE this DIE belongs
/// to.
- DIE *getCompileUnit();
- void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
+ const DIE *getCompileUnit() const;
+ /// Similar to getCompileUnit, returns null when DIE is not added to an
+ /// owner yet.
+ const DIE *getCompileUnitOrNull() const;
void setOffset(unsigned O) { Offset = O; }
void setSize(unsigned S) { Size = S; }
/// addValue - Add a value and attributes to a DIE.
///
- void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+ void addValue(dwarf::Attribute Attribute, dwarf::Form Form,
+ DIEValue *Value) {
Abbrev.AddAttribute(Attribute, Form);
Values.push_back(Value);
}
@@ -166,10 +166,7 @@ namespace llvm {
/// addChild - Add a child to the DIE.
///
void addChild(DIE *Child) {
- if (Child->getParent()) {
- assert (Child->getParent() == this && "Unexpected DIE Parent!");
- return;
- }
+ assert(!Child->getParent());
Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
Children.push_back(Child);
Child->Parent = this;
@@ -177,7 +174,7 @@ namespace llvm {
/// findAttribute - Find a value in the DIE with the attribute given, returns NULL
/// if no such attribute exists.
- DIEValue *findAttribute(unsigned Attribute);
+ DIEValue *findAttribute(uint16_t Attribute);
#ifndef NDEBUG
void print(raw_ostream &O, unsigned IndentCount = 0) const;
@@ -213,11 +210,11 @@ namespace llvm {
/// EmitValue - Emit value via the Dwarf writer.
///
- virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0;
+ virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0;
/// SizeOf - Return the size of a value in bytes.
///
- virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0;
+ virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0;
#ifndef NDEBUG
virtual void print(raw_ostream &O) const = 0;
@@ -235,7 +232,7 @@ namespace llvm {
/// BestForm - Choose the best form for integer.
///
- static unsigned BestForm(bool IsSigned, uint64_t Int) {
+ static dwarf::Form BestForm(bool IsSigned, uint64_t Int) {
if (IsSigned) {
const int64_t SignedInt = Int;
if ((char)Int == SignedInt) return dwarf::DW_FORM_data1;
@@ -251,13 +248,13 @@ namespace llvm {
/// EmitValue - Emit integer of appropriate size.
///
- virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+ virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
uint64_t getValue() const { return Integer; }
/// SizeOf - Determine size of integer value in bytes.
///
- virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+ virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const;
// Implement isa/cast/dyncast.
static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
@@ -277,7 +274,7 @@ namespace llvm {
/// EmitValue - Emit expression value.
///
- virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+ virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
/// getValue - Get MCExpr.
///
@@ -285,7 +282,7 @@ namespace llvm {
/// SizeOf - Determine size of expression value in bytes.
///
- virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+ virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const;
// Implement isa/cast/dyncast.
static bool classof(const DIEValue *E) { return E->getType() == isExpr; }
@@ -305,7 +302,7 @@ namespace llvm {
/// EmitValue - Emit label value.
///
- virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+ virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
/// getValue - Get MCSymbol.
///
@@ -313,7 +310,7 @@ namespace llvm {
/// SizeOf - Determine size of label value in bytes.
///
- virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+ virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const;
// Implement isa/cast/dyncast.
static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
@@ -335,11 +332,11 @@ namespace llvm {
/// EmitValue - Emit delta value.
///
- virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+ virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
/// SizeOf - Determine size of delta value in bytes.
///
- virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+ virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const;
// Implement isa/cast/dyncast.
static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
@@ -365,11 +362,11 @@ namespace llvm {
/// EmitValue - Emit delta value.
///
- virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+ virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
/// SizeOf - Determine size of delta value in bytes.
///
- virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+ virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const;
// Implement isa/cast/dyncast.
static bool classof(const DIEValue *D) { return D->getType() == isString; }
@@ -394,13 +391,13 @@ namespace llvm {
/// EmitValue - Emit debug information entry offset.
///
- virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+ virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
/// SizeOf - Determine size of debug information entry in bytes.
///
- virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const {
- return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) :
- sizeof(int32_t);
+ virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+ return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP)
+ : sizeof(int32_t);
}
/// Returns size of a ref_addr entry.
@@ -420,9 +417,7 @@ namespace llvm {
class DIEBlock : public DIEValue, public DIE {
unsigned Size; // Size in bytes excluding size header.
public:
- DIEBlock()
- : DIEValue(isBlock), DIE(0), Size(0) {}
- virtual ~DIEBlock() {}
+ DIEBlock() : DIEValue(isBlock), DIE(0), Size(0) {}
/// ComputeSize - calculate the size of the block.
///
@@ -430,7 +425,7 @@ namespace llvm {
/// BestForm - Choose the best form for data.
///
- unsigned BestForm() const {
+ dwarf::Form BestForm() const {
if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1;
if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2;
if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4;
@@ -439,11 +434,11 @@ namespace llvm {
/// EmitValue - Emit block data.
///
- virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+ virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
/// SizeOf - Determine size of block data in bytes.
///
- virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+ virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const;
// Implement isa/cast/dyncast.
static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
new file mode 100644
index 0000000..95eca90
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -0,0 +1,507 @@
+//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for DWARF4 hashing of DIEs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+
+#include "DIEHash.h"
+
+#include "DIE.h"
+#include "DwarfCompileUnit.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+/// \brief Grabs the string in whichever attribute is passed in and returns
+/// a reference to it.
+static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {
+ const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
+ const DIEAbbrev &Abbrevs = Die.getAbbrev();
+
+ // Iterate through all the attributes until we find the one we're
+ // looking for, if we can't find it return an empty string.
+ for (size_t i = 0; i < Values.size(); ++i) {
+ if (Abbrevs.getData()[i].getAttribute() == Attr) {
+ DIEValue *V = Values[i];
+ assert(isa<DIEString>(V) && "String requested. Not a string.");
+ DIEString *S = cast<DIEString>(V);
+ return S->getString();
+ }
+ }
+ return StringRef("");
+}
+
+/// \brief Adds the string in \p Str to the hash. This also hashes
+/// a trailing NULL with the string.
+void DIEHash::addString(StringRef Str) {
+ DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");
+ Hash.update(Str);
+ Hash.update(makeArrayRef((uint8_t)'\0'));
+}
+
+// FIXME: The LEB128 routines are copied and only slightly modified out of
+// LEB128.h.
+
+/// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128.
+void DIEHash::addULEB128(uint64_t Value) {
+ DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
+ do {
+ uint8_t Byte = Value & 0x7f;
+ Value >>= 7;
+ if (Value != 0)
+ Byte |= 0x80; // Mark this byte to show that more bytes will follow.
+ Hash.update(Byte);
+ } while (Value != 0);
+}
+
+void DIEHash::addSLEB128(int64_t Value) {
+ DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
+ bool More;
+ do {
+ uint8_t Byte = Value & 0x7f;
+ Value >>= 7;
+ More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) ||
+ ((Value == -1) && ((Byte & 0x40) != 0))));
+ if (More)
+ Byte |= 0x80; // Mark this byte to show that more bytes will follow.
+ Hash.update(Byte);
+ } while (More);
+}
+
+/// \brief Including \p Parent adds the context of Parent to the hash..
+void DIEHash::addParentContext(const DIE &Parent) {
+
+ DEBUG(dbgs() << "Adding parent context to hash...\n");
+
+ // [7.27.2] For each surrounding type or namespace beginning with the
+ // outermost such construct...
+ SmallVector<const DIE *, 1> Parents;
+ const DIE *Cur = &Parent;
+ while (Cur->getTag() != dwarf::DW_TAG_compile_unit) {
+ Parents.push_back(Cur);
+ Cur = Cur->getParent();
+ }
+
+ // Reverse iterate over our list to go from the outermost construct to the
+ // innermost.
+ for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(),
+ E = Parents.rend();
+ I != E; ++I) {
+ const DIE &Die = **I;
+
+ // ... Append the letter "C" to the sequence...
+ addULEB128('C');
+
+ // ... Followed by the DWARF tag of the construct...
+ addULEB128(Die.getTag());
+
+ // ... Then the name, taken from the DW_AT_name attribute.
+ StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name);
+ DEBUG(dbgs() << "... adding context: " << Name << "\n");
+ if (!Name.empty())
+ addString(Name);
+ }
+}
+
+// Collect all of the attributes for a particular DIE in single structure.
+void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) {
+ const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
+ const DIEAbbrev &Abbrevs = Die.getAbbrev();
+
+#define COLLECT_ATTR(NAME) \
+ case dwarf::NAME: \
+ Attrs.NAME.Val = Values[i]; \
+ Attrs.NAME.Desc = &Abbrevs.getData()[i]; \
+ break
+
+ for (size_t i = 0, e = Values.size(); i != e; ++i) {
+ DEBUG(dbgs() << "Attribute: "
+ << dwarf::AttributeString(Abbrevs.getData()[i].getAttribute())
+ << " added.\n");
+ switch (Abbrevs.getData()[i].getAttribute()) {
+ COLLECT_ATTR(DW_AT_name);
+ COLLECT_ATTR(DW_AT_accessibility);
+ COLLECT_ATTR(DW_AT_address_class);
+ COLLECT_ATTR(DW_AT_allocated);
+ COLLECT_ATTR(DW_AT_artificial);
+ COLLECT_ATTR(DW_AT_associated);
+ COLLECT_ATTR(DW_AT_binary_scale);
+ COLLECT_ATTR(DW_AT_bit_offset);
+ COLLECT_ATTR(DW_AT_bit_size);
+ COLLECT_ATTR(DW_AT_bit_stride);
+ COLLECT_ATTR(DW_AT_byte_size);
+ COLLECT_ATTR(DW_AT_byte_stride);
+ COLLECT_ATTR(DW_AT_const_expr);
+ COLLECT_ATTR(DW_AT_const_value);
+ COLLECT_ATTR(DW_AT_containing_type);
+ COLLECT_ATTR(DW_AT_count);
+ COLLECT_ATTR(DW_AT_data_bit_offset);
+ COLLECT_ATTR(DW_AT_data_location);
+ COLLECT_ATTR(DW_AT_data_member_location);
+ COLLECT_ATTR(DW_AT_decimal_scale);
+ COLLECT_ATTR(DW_AT_decimal_sign);
+ COLLECT_ATTR(DW_AT_default_value);
+ COLLECT_ATTR(DW_AT_digit_count);
+ COLLECT_ATTR(DW_AT_discr);
+ COLLECT_ATTR(DW_AT_discr_list);
+ COLLECT_ATTR(DW_AT_discr_value);
+ COLLECT_ATTR(DW_AT_encoding);
+ COLLECT_ATTR(DW_AT_enum_class);
+ COLLECT_ATTR(DW_AT_endianity);
+ COLLECT_ATTR(DW_AT_explicit);
+ COLLECT_ATTR(DW_AT_is_optional);
+ COLLECT_ATTR(DW_AT_location);
+ COLLECT_ATTR(DW_AT_lower_bound);
+ COLLECT_ATTR(DW_AT_mutable);
+ COLLECT_ATTR(DW_AT_ordering);
+ COLLECT_ATTR(DW_AT_picture_string);
+ COLLECT_ATTR(DW_AT_prototyped);
+ COLLECT_ATTR(DW_AT_small);
+ COLLECT_ATTR(DW_AT_segment);
+ COLLECT_ATTR(DW_AT_string_length);
+ COLLECT_ATTR(DW_AT_threads_scaled);
+ COLLECT_ATTR(DW_AT_upper_bound);
+ COLLECT_ATTR(DW_AT_use_location);
+ COLLECT_ATTR(DW_AT_use_UTF8);
+ COLLECT_ATTR(DW_AT_variable_parameter);
+ COLLECT_ATTR(DW_AT_virtuality);
+ COLLECT_ATTR(DW_AT_visibility);
+ COLLECT_ATTR(DW_AT_vtable_elem_location);
+ COLLECT_ATTR(DW_AT_type);
+ default:
+ break;
+ }
+ }
+}
+
+void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute,
+ const DIE &Entry, StringRef Name) {
+ // append the letter 'N'
+ addULEB128('N');
+
+ // the DWARF attribute code (DW_AT_type or DW_AT_friend),
+ addULEB128(Attribute);
+
+ // the context of the tag,
+ if (const DIE *Parent = Entry.getParent())
+ addParentContext(*Parent);
+
+ // the letter 'E',
+ addULEB128('E');
+
+ // and the name of the type.
+ addString(Name);
+
+ // Currently DW_TAG_friends are not used by Clang, but if they do become so,
+ // here's the relevant spec text to implement:
+ //
+ // For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram,
+ // the context is omitted and the name to be used is the ABI-specific name
+ // of the subprogram (e.g., the mangled linker name).
+}
+
+void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute,
+ unsigned DieNumber) {
+ // a) If T is in the list of [previously hashed types], use the letter
+ // 'R' as the marker
+ addULEB128('R');
+
+ addULEB128(Attribute);
+
+ // and use the unsigned LEB128 encoding of [the index of T in the
+ // list] as the attribute value;
+ addULEB128(DieNumber);
+}
+
+void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
+ const DIE &Entry) {
+ assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend "
+ "tags. Add support here when there's "
+ "a use case");
+ // Step 5
+ // If the tag in Step 3 is one of [the below tags]
+ if ((Tag == dwarf::DW_TAG_pointer_type ||
+ Tag == dwarf::DW_TAG_reference_type ||
+ Tag == dwarf::DW_TAG_rvalue_reference_type ||
+ Tag == dwarf::DW_TAG_ptr_to_member_type) &&
+ // and the referenced type (via the [below attributes])
+ // FIXME: This seems overly restrictive, and causes hash mismatches
+ // there's a decl/def difference in the containing type of a
+ // ptr_to_member_type, but it's what DWARF says, for some reason.
+ Attribute == dwarf::DW_AT_type) {
+ // ... has a DW_AT_name attribute,
+ StringRef Name = getDIEStringAttr(Entry, dwarf::DW_AT_name);
+ if (!Name.empty()) {
+ hashShallowTypeReference(Attribute, Entry, Name);
+ return;
+ }
+ }
+
+ unsigned &DieNumber = Numbering[&Entry];
+ if (DieNumber) {
+ hashRepeatedTypeReference(Attribute, DieNumber);
+ return;
+ }
+
+ // otherwise, b) use the letter 'T' as a the marker, ...
+ addULEB128('T');
+
+ addULEB128(Attribute);
+
+ // ... process the type T recursively by performing Steps 2 through 7, and
+ // use the result as the attribute value.
+ DieNumber = Numbering.size();
+ computeHash(Entry);
+}
+
+// Hash an individual attribute \param Attr based on the type of attribute and
+// the form.
+void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) {
+ const DIEValue *Value = Attr.Val;
+ const DIEAbbrevData *Desc = Attr.Desc;
+ dwarf::Attribute Attribute = Desc->getAttribute();
+
+ // 7.27 Step 3
+ // ... An attribute that refers to another type entry T is processed as
+ // follows:
+ if (const DIEEntry *EntryAttr = dyn_cast<DIEEntry>(Value)) {
+ hashDIEEntry(Attribute, Tag, *EntryAttr->getEntry());
+ return;
+ }
+
+ // Other attribute values use the letter 'A' as the marker, ...
+ addULEB128('A');
+
+ addULEB128(Attribute);
+
+ // ... and the value consists of the form code (encoded as an unsigned LEB128
+ // value) followed by the encoding of the value according to the form code. To
+ // ensure reproducibility of the signature, the set of forms used in the
+ // signature computation is limited to the following: DW_FORM_sdata,
+ // DW_FORM_flag, DW_FORM_string, and DW_FORM_block.
+ switch (Desc->getForm()) {
+ case dwarf::DW_FORM_string:
+ llvm_unreachable(
+ "Add support for DW_FORM_string if we ever start emitting them again");
+ case dwarf::DW_FORM_GNU_str_index:
+ case dwarf::DW_FORM_strp:
+ addULEB128(dwarf::DW_FORM_string);
+ addString(cast<DIEString>(Value)->getString());
+ break;
+ case dwarf::DW_FORM_data1:
+ case dwarf::DW_FORM_data2:
+ case dwarf::DW_FORM_data4:
+ case dwarf::DW_FORM_data8:
+ case dwarf::DW_FORM_udata:
+ addULEB128(dwarf::DW_FORM_sdata);
+ addSLEB128((int64_t)cast<DIEInteger>(Value)->getValue());
+ break;
+ default:
+ llvm_unreachable("Add support for additional forms");
+ }
+}
+
+// Go through the attributes from \param Attrs in the order specified in 7.27.4
+// and hash them.
+void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) {
+#define ADD_ATTR(ATTR) \
+ { \
+ if (ATTR.Val != 0) \
+ hashAttribute(ATTR, Tag); \
+ }
+
+ ADD_ATTR(Attrs.DW_AT_name);
+ ADD_ATTR(Attrs.DW_AT_accessibility);
+ ADD_ATTR(Attrs.DW_AT_address_class);
+ ADD_ATTR(Attrs.DW_AT_allocated);
+ ADD_ATTR(Attrs.DW_AT_artificial);
+ ADD_ATTR(Attrs.DW_AT_associated);
+ ADD_ATTR(Attrs.DW_AT_binary_scale);
+ ADD_ATTR(Attrs.DW_AT_bit_offset);
+ ADD_ATTR(Attrs.DW_AT_bit_size);
+ ADD_ATTR(Attrs.DW_AT_bit_stride);
+ ADD_ATTR(Attrs.DW_AT_byte_size);
+ ADD_ATTR(Attrs.DW_AT_byte_stride);
+ ADD_ATTR(Attrs.DW_AT_const_expr);
+ ADD_ATTR(Attrs.DW_AT_const_value);
+ ADD_ATTR(Attrs.DW_AT_containing_type);
+ ADD_ATTR(Attrs.DW_AT_count);
+ ADD_ATTR(Attrs.DW_AT_data_bit_offset);
+ ADD_ATTR(Attrs.DW_AT_data_location);
+ ADD_ATTR(Attrs.DW_AT_data_member_location);
+ ADD_ATTR(Attrs.DW_AT_decimal_scale);
+ ADD_ATTR(Attrs.DW_AT_decimal_sign);
+ ADD_ATTR(Attrs.DW_AT_default_value);
+ ADD_ATTR(Attrs.DW_AT_digit_count);
+ ADD_ATTR(Attrs.DW_AT_discr);
+ ADD_ATTR(Attrs.DW_AT_discr_list);
+ ADD_ATTR(Attrs.DW_AT_discr_value);
+ ADD_ATTR(Attrs.DW_AT_encoding);
+ ADD_ATTR(Attrs.DW_AT_enum_class);
+ ADD_ATTR(Attrs.DW_AT_endianity);
+ ADD_ATTR(Attrs.DW_AT_explicit);
+ ADD_ATTR(Attrs.DW_AT_is_optional);
+ ADD_ATTR(Attrs.DW_AT_location);
+ ADD_ATTR(Attrs.DW_AT_lower_bound);
+ ADD_ATTR(Attrs.DW_AT_mutable);
+ ADD_ATTR(Attrs.DW_AT_ordering);
+ ADD_ATTR(Attrs.DW_AT_picture_string);
+ ADD_ATTR(Attrs.DW_AT_prototyped);
+ ADD_ATTR(Attrs.DW_AT_small);
+ ADD_ATTR(Attrs.DW_AT_segment);
+ ADD_ATTR(Attrs.DW_AT_string_length);
+ ADD_ATTR(Attrs.DW_AT_threads_scaled);
+ ADD_ATTR(Attrs.DW_AT_upper_bound);
+ ADD_ATTR(Attrs.DW_AT_use_location);
+ ADD_ATTR(Attrs.DW_AT_use_UTF8);
+ ADD_ATTR(Attrs.DW_AT_variable_parameter);
+ ADD_ATTR(Attrs.DW_AT_virtuality);
+ ADD_ATTR(Attrs.DW_AT_visibility);
+ ADD_ATTR(Attrs.DW_AT_vtable_elem_location);
+ ADD_ATTR(Attrs.DW_AT_type);
+
+ // FIXME: Add the extended attributes.
+}
+
+// Add all of the attributes for \param Die to the hash.
+void DIEHash::addAttributes(const DIE &Die) {
+ DIEAttrs Attrs = {};
+ collectAttributes(Die, Attrs);
+ hashAttributes(Attrs, Die.getTag());
+}
+
+void DIEHash::hashNestedType(const DIE &Die, StringRef Name) {
+ // 7.27 Step 7
+ // ... append the letter 'S',
+ addULEB128('S');
+
+ // the tag of C,
+ addULEB128(Die.getTag());
+
+ // and the name.
+ addString(Name);
+}
+
+// Compute the hash of a DIE. This is based on the type signature computation
+// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a
+// flattened description of the DIE.
+void DIEHash::computeHash(const DIE &Die) {
+ // Append the letter 'D', followed by the DWARF tag of the DIE.
+ addULEB128('D');
+ addULEB128(Die.getTag());
+
+ // Add each of the attributes of the DIE.
+ addAttributes(Die);
+
+ // Then hash each of the children of the DIE.
+ for (std::vector<DIE *>::const_iterator I = Die.getChildren().begin(),
+ E = Die.getChildren().end();
+ I != E; ++I) {
+ // 7.27 Step 7
+ // If C is a nested type entry or a member function entry, ...
+ if (isType((*I)->getTag()) || (*I)->getTag() == dwarf::DW_TAG_subprogram) {
+ StringRef Name = getDIEStringAttr(**I, dwarf::DW_AT_name);
+ // ... and has a DW_AT_name attribute
+ if (!Name.empty()) {
+ hashNestedType(**I, Name);
+ continue;
+ }
+ }
+ computeHash(**I);
+ }
+
+ // Following the last (or if there are no children), append a zero byte.
+ Hash.update(makeArrayRef((uint8_t)'\0'));
+}
+
+/// This is based on the type signature computation given in section 7.27 of the
+/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE
+/// with the exception that we are hashing only the context and the name of the
+/// type.
+uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) {
+
+ // Add the contexts to the hash. We won't be computing the ODR hash for
+ // function local types so it's safe to use the generic context hashing
+ // algorithm here.
+ // FIXME: If we figure out how to account for linkage in some way we could
+ // actually do this with a slight modification to the parent hash algorithm.
+ if (const DIE *Parent = Die.getParent())
+ addParentContext(*Parent);
+
+ // Add the current DIE information.
+
+ // Add the DWARF tag of the DIE.
+ addULEB128(Die.getTag());
+
+ // Add the name of the type to the hash.
+ addString(getDIEStringAttr(Die, dwarf::DW_AT_name));
+
+ // Now get the result.
+ MD5::MD5Result Result;
+ Hash.final(Result);
+
+ // ... take the least significant 8 bytes and return those. Our MD5
+ // implementation always returns its results in little endian, swap bytes
+ // appropriately.
+ return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+}
+
+/// This is based on the type signature computation given in section 7.27 of the
+/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
+/// with the inclusion of the full CU and all top level CU entities.
+// TODO: Initialize the type chain at 0 instead of 1 for CU signatures.
+uint64_t DIEHash::computeCUSignature(const DIE &Die) {
+ Numbering.clear();
+ Numbering[&Die] = 1;
+
+ // Hash the DIE.
+ computeHash(Die);
+
+ // Now return the result.
+ MD5::MD5Result Result;
+ Hash.final(Result);
+
+ // ... take the least significant 8 bytes and return those. Our MD5
+ // implementation always returns its results in little endian, swap bytes
+ // appropriately.
+ return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+}
+
+/// This is based on the type signature computation given in section 7.27 of the
+/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
+/// with the inclusion of additional forms not specifically called out in the
+/// standard.
+uint64_t DIEHash::computeTypeSignature(const DIE &Die) {
+ Numbering.clear();
+ Numbering[&Die] = 1;
+
+ if (const DIE *Parent = Die.getParent())
+ addParentContext(*Parent);
+
+ // Hash the DIE.
+ computeHash(Die);
+
+ // Now return the result.
+ MD5::MD5Result Result;
+ Hash.final(Result);
+
+ // ... take the least significant 8 bytes and return those. Our MD5
+ // implementation always returns its results in little endian, swap bytes
+ // appropriately.
+ return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+}
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
new file mode 100644
index 0000000..f0c4ef9
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -0,0 +1,147 @@
+//===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for DWARF4 hashing of DIEs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DIE.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/MD5.h"
+
+namespace llvm {
+
+class CompileUnit;
+
+/// \brief An object containing the capability of hashing and adding hash
+/// attributes onto a DIE.
+class DIEHash {
+ // The entry for a particular attribute.
+ struct AttrEntry {
+ const DIEValue *Val;
+ const DIEAbbrevData *Desc;
+ };
+
+ // Collection of all attributes used in hashing a particular DIE.
+ struct DIEAttrs {
+ AttrEntry DW_AT_name;
+ AttrEntry DW_AT_accessibility;
+ AttrEntry DW_AT_address_class;
+ AttrEntry DW_AT_allocated;
+ AttrEntry DW_AT_artificial;
+ AttrEntry DW_AT_associated;
+ AttrEntry DW_AT_binary_scale;
+ AttrEntry DW_AT_bit_offset;
+ AttrEntry DW_AT_bit_size;
+ AttrEntry DW_AT_bit_stride;
+ AttrEntry DW_AT_byte_size;
+ AttrEntry DW_AT_byte_stride;
+ AttrEntry DW_AT_const_expr;
+ AttrEntry DW_AT_const_value;
+ AttrEntry DW_AT_containing_type;
+ AttrEntry DW_AT_count;
+ AttrEntry DW_AT_data_bit_offset;
+ AttrEntry DW_AT_data_location;
+ AttrEntry DW_AT_data_member_location;
+ AttrEntry DW_AT_decimal_scale;
+ AttrEntry DW_AT_decimal_sign;
+ AttrEntry DW_AT_default_value;
+ AttrEntry DW_AT_digit_count;
+ AttrEntry DW_AT_discr;
+ AttrEntry DW_AT_discr_list;
+ AttrEntry DW_AT_discr_value;
+ AttrEntry DW_AT_encoding;
+ AttrEntry DW_AT_enum_class;
+ AttrEntry DW_AT_endianity;
+ AttrEntry DW_AT_explicit;
+ AttrEntry DW_AT_is_optional;
+ AttrEntry DW_AT_location;
+ AttrEntry DW_AT_lower_bound;
+ AttrEntry DW_AT_mutable;
+ AttrEntry DW_AT_ordering;
+ AttrEntry DW_AT_picture_string;
+ AttrEntry DW_AT_prototyped;
+ AttrEntry DW_AT_small;
+ AttrEntry DW_AT_segment;
+ AttrEntry DW_AT_string_length;
+ AttrEntry DW_AT_threads_scaled;
+ AttrEntry DW_AT_upper_bound;
+ AttrEntry DW_AT_use_location;
+ AttrEntry DW_AT_use_UTF8;
+ AttrEntry DW_AT_variable_parameter;
+ AttrEntry DW_AT_virtuality;
+ AttrEntry DW_AT_visibility;
+ AttrEntry DW_AT_vtable_elem_location;
+ AttrEntry DW_AT_type;
+
+ // Insert any additional ones here...
+ };
+
+public:
+ /// \brief Computes the ODR signature.
+ uint64_t computeDIEODRSignature(const DIE &Die);
+
+ /// \brief Computes the CU signature.
+ uint64_t computeCUSignature(const DIE &Die);
+
+ /// \brief Computes the type signature.
+ uint64_t computeTypeSignature(const DIE &Die);
+
+ // Helper routines to process parts of a DIE.
+private:
+ /// \brief Adds the parent context of \param Die to the hash.
+ void addParentContext(const DIE &Die);
+
+ /// \brief Adds the attributes of \param Die to the hash.
+ void addAttributes(const DIE &Die);
+
+ /// \brief Computes the full DWARF4 7.27 hash of the DIE.
+ void computeHash(const DIE &Die);
+
+ // Routines that add DIEValues to the hash.
+private:
+ /// \brief Encodes and adds \param Value to the hash as a ULEB128.
+ void addULEB128(uint64_t Value);
+
+ /// \brief Encodes and adds \param Value to the hash as a SLEB128.
+ void addSLEB128(int64_t Value);
+
+ /// \brief Adds \param Str to the hash and includes a NULL byte.
+ void addString(StringRef Str);
+
+ /// \brief Collects the attributes of DIE \param Die into the \param Attrs
+ /// structure.
+ void collectAttributes(const DIE &Die, DIEAttrs &Attrs);
+
+ /// \brief Hashes the attributes in \param Attrs in order.
+ void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag);
+
+ /// \brief Hashes an individual attribute.
+ void hashAttribute(AttrEntry Attr, dwarf::Tag Tag);
+
+ /// \brief Hashes an attribute that refers to another DIE.
+ void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
+ const DIE &Entry);
+
+ /// \brief Hashes a reference to a named type in such a way that is
+ /// independent of whether that type is described by a declaration or a
+ /// definition.
+ void hashShallowTypeReference(dwarf::Attribute Attribute, const DIE &Entry,
+ StringRef Name);
+
+ /// \brief Hashes a reference to a previously referenced type DIE.
+ void hashRepeatedTypeReference(dwarf::Attribute Attribute, unsigned DieNumber);
+
+ void hashNestedType(const DIE &Die, StringRef Name);
+
+private:
+ MD5 Hash;
+ DenseMap<const DIE *, unsigned> Numbering;
+};
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index a82a149..689aeda 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -24,27 +24,14 @@
using namespace llvm;
-const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
- switch (AT) {
- case eAtomTypeNULL: return "eAtomTypeNULL";
- case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset";
- case eAtomTypeCUOffset: return "eAtomTypeCUOffset";
- case eAtomTypeTag: return "eAtomTypeTag";
- case eAtomTypeNameFlags: return "eAtomTypeNameFlags";
- case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags";
- }
- llvm_unreachable("invalid AtomType!");
-}
-
// The length of the header data is always going to be 4 + 4 + 4*NumAtoms.
-DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) :
- Header(8 + (atomList.size() * 4)),
- HeaderData(atomList),
- Entries(Allocator) { }
+DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList)
+ : Header(8 + (atomList.size() * 4)), HeaderData(atomList),
+ Entries(Allocator) {}
-DwarfAccelTable::~DwarfAccelTable() { }
+DwarfAccelTable::~DwarfAccelTable() {}
-void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) {
+void DwarfAccelTable::AddName(StringRef Name, DIE *die, char Flags) {
assert(Data.empty() && "Already finalized!");
// If the string is in the list already then add this die to the list
// otherwise add a new one.
@@ -59,13 +46,16 @@ void DwarfAccelTable::ComputeBucketCount(void) {
uniques[i] = Data[i]->HashValue;
array_pod_sort(uniques.begin(), uniques.end());
std::vector<uint32_t>::iterator p =
- std::unique(uniques.begin(), uniques.end());
+ std::unique(uniques.begin(), uniques.end());
uint32_t num = std::distance(uniques.begin(), p);
// Then compute the bucket size, minimum of 1 bucket.
- if (num > 1024) Header.bucket_count = num/4;
- if (num > 16) Header.bucket_count = num/2;
- else Header.bucket_count = num > 0 ? num : 1;
+ if (num > 1024)
+ Header.bucket_count = num / 4;
+ if (num > 16)
+ Header.bucket_count = num / 2;
+ else
+ Header.bucket_count = num > 0 ? num : 1;
Header.hashes_count = num;
}
@@ -78,13 +68,13 @@ static bool compareDIEs(const DwarfAccelTable::HashDataContents *A,
void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
// Create the individual hash data outputs.
- for (StringMap<DataArray>::iterator
- EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+ for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end();
+ EI != EE; ++EI) {
// Unique the entries.
std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs);
EI->second.erase(std::unique(EI->second.begin(), EI->second.end()),
- EI->second.end());
+ EI->second.end());
HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second);
Data.push_back(Entry);
@@ -126,7 +116,7 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
Asm->EmitInt32(HeaderData.Atoms.size());
for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
Atom A = HeaderData.Atoms[i];
- Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type));
+ Asm->OutStreamer.AddComment(dwarf::AtomTypeString(A.type));
Asm->EmitInt16(A.type);
Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form));
Asm->EmitInt16(A.form);
@@ -152,7 +142,8 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
for (HashList::const_iterator HI = Buckets[i].begin(),
- HE = Buckets[i].end(); HI != HE; ++HI) {
+ HE = Buckets[i].end();
+ HI != HE; ++HI) {
Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
Asm->EmitInt32((*HI)->HashValue);
}
@@ -166,13 +157,13 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
for (HashList::const_iterator HI = Buckets[i].begin(),
- HE = Buckets[i].end(); HI != HE; ++HI) {
+ HE = Buckets[i].end();
+ HI != HE; ++HI) {
Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
MCContext &Context = Asm->OutStreamer.getContext();
- const MCExpr *Sub =
- MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context),
- MCSymbolRefExpr::Create(SecBegin, Context),
- Context);
+ const MCExpr *Sub = MCBinaryExpr::CreateSub(
+ MCSymbolRefExpr::Create((*HI)->Sym, Context),
+ MCSymbolRefExpr::Create(SecBegin, Context), Context);
Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t));
}
}
@@ -185,7 +176,8 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) {
uint64_t PrevHash = UINT64_MAX;
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
for (HashList::const_iterator HI = Buckets[i].begin(),
- HE = Buckets[i].end(); HI != HE; ++HI) {
+ HE = Buckets[i].end();
+ HI != HE; ++HI) {
// Remember to emit the label for our offset.
Asm->OutStreamer.EmitLabel((*HI)->Sym);
Asm->OutStreamer.AddComment((*HI)->Str);
@@ -193,8 +185,9 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) {
D->getStringPoolSym());
Asm->OutStreamer.AddComment("Num DIEs");
Asm->EmitInt32((*HI)->Data.size());
- for (ArrayRef<HashDataContents*>::const_iterator
- DI = (*HI)->Data.begin(), DE = (*HI)->Data.end();
+ for (ArrayRef<HashDataContents *>::const_iterator
+ DI = (*HI)->Data.begin(),
+ DE = (*HI)->Data.end();
DI != DE; ++DI) {
// Emit the DIE offset
Asm->EmitInt32((*DI)->Die->getOffset());
@@ -214,8 +207,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) {
}
// Emit the entire data structure to the output file.
-void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin,
- DwarfUnits *D) {
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfUnits *D) {
// Emit the header.
EmitHeader(Asm);
@@ -239,11 +231,12 @@ void DwarfAccelTable::print(raw_ostream &O) {
HeaderData.print(O);
O << "Entries: \n";
- for (StringMap<DataArray>::const_iterator
- EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+ for (StringMap<DataArray>::const_iterator EI = Entries.begin(),
+ EE = Entries.end();
+ EI != EE; ++EI) {
O << "Name: " << EI->getKeyData() << "\n";
for (DataArray::const_iterator DI = EI->second.begin(),
- DE = EI->second.end();
+ DE = EI->second.end();
DI != DE; ++DI)
(*DI)->print(O);
}
@@ -251,14 +244,14 @@ void DwarfAccelTable::print(raw_ostream &O) {
O << "Buckets and Hashes: \n";
for (size_t i = 0, e = Buckets.size(); i < e; ++i)
for (HashList::const_iterator HI = Buckets[i].begin(),
- HE = Buckets[i].end(); HI != HE; ++HI)
+ HE = Buckets[i].end();
+ HI != HE; ++HI)
(*HI)->print(O);
O << "Data: \n";
- for (std::vector<HashData*>::const_iterator
- DI = Data.begin(), DE = Data.end(); DI != DE; ++DI)
- (*DI)->print(O);
-
-
+ for (std::vector<HashData *>::const_iterator DI = Data.begin(),
+ DE = Data.end();
+ DI != DE; ++DI)
+ (*DI)->print(O);
}
#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 3ef1dc5..7627313 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -67,11 +67,7 @@ class DwarfUnits;
class DwarfAccelTable {
- enum HashFunctionType {
- eHashFunctionDJB = 0u
- };
-
- static uint32_t HashDJB (StringRef Str) {
+ static uint32_t HashDJB(StringRef Str) {
uint32_t h = 5381;
for (unsigned i = 0, e = Str.size(); i != e; ++i)
h = ((h << 5) + h) + Str[i];
@@ -80,25 +76,25 @@ class DwarfAccelTable {
// Helper function to compute the number of buckets needed based on
// the number of unique hashes.
- void ComputeBucketCount (void);
+ void ComputeBucketCount(void);
struct TableHeader {
- uint32_t magic; // 'HASH' magic value to allow endian detection
- uint16_t version; // Version number.
- uint16_t hash_function; // The hash function enumeration that was used.
- uint32_t bucket_count; // The number of buckets in this hash table.
- uint32_t hashes_count; // The total number of unique hash values
- // and hash data offsets in this table.
- uint32_t header_data_len; // The bytes to skip to get to the hash
- // indexes (buckets) for correct alignment.
+ uint32_t magic; // 'HASH' magic value to allow endian detection
+ uint16_t version; // Version number.
+ uint16_t hash_function; // The hash function enumeration that was used.
+ uint32_t bucket_count; // The number of buckets in this hash table.
+ uint32_t hashes_count; // The total number of unique hash values
+ // and hash data offsets in this table.
+ uint32_t header_data_len; // The bytes to skip to get to the hash
+ // indexes (buckets) for correct alignment.
// Also written to disk is the implementation specific header data.
static const uint32_t MagicHash = 0x48415348;
- TableHeader (uint32_t data_len) :
- magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
- bucket_count (0), hashes_count (0), header_data_len (data_len)
- {}
+ TableHeader(uint32_t data_len)
+ : magic(MagicHash), version(1),
+ hash_function(dwarf::DW_hash_function_djb), bucket_count(0),
+ hashes_count(0), header_data_len(data_len) {}
#ifndef NDEBUG
void print(raw_ostream &O) {
@@ -124,62 +120,38 @@ public:
// uint32_t die_offset_base
// uint32_t atom_count
// atom_count Atoms
- enum AtomType {
- eAtomTypeNULL = 0u,
- eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding
- eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that
- // contains the item in question
- eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as
- // DW_FORM_data1 (if no tags exceed 255) or
- // DW_FORM_data2.
- eAtomTypeNameFlags = 4u, // Flags from enum NameFlags
- eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags
- };
-
- enum TypeFlags {
- eTypeFlagClassMask = 0x0000000fu,
-
- // Always set for C++, only set for ObjC if this is the
- // @implementation for a class.
- eTypeFlagClassIsImplementation = ( 1u << 1 )
- };
// Make these public so that they can be used as a general interface to
// the class.
struct Atom {
- AtomType type; // enum AtomType
+ uint16_t type; // enum AtomType
uint16_t form; // DWARF DW_FORM_ defines
- Atom(AtomType type, uint16_t form) : type(type), form(form) {}
- static const char * AtomTypeString(enum AtomType);
+ Atom(uint16_t type, uint16_t form) : type(type), form(form) {}
#ifndef NDEBUG
void print(raw_ostream &O) {
- O << "Type: " << AtomTypeString(type) << "\n"
+ O << "Type: " << dwarf::AtomTypeString(type) << "\n"
<< "Form: " << dwarf::FormEncodingString(form) << "\n";
}
- void dump() {
- print(dbgs());
- }
+ void dump() { print(dbgs()); }
#endif
};
- private:
+private:
struct TableHeaderData {
uint32_t die_offset_base;
SmallVector<Atom, 1> Atoms;
TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0)
- : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { }
+ : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {}
#ifndef NDEBUG
- void print (raw_ostream &O) {
+ void print(raw_ostream &O) {
O << "die_offset_base: " << die_offset_base << "\n";
for (size_t i = 0; i < Atoms.size(); i++)
Atoms[i].print(O);
}
- void dump() {
- print(dbgs());
- }
+ void dump() { print(dbgs()); }
#endif
};
@@ -193,37 +165,38 @@ public:
// HashData[hash_data_count]
public:
struct HashDataContents {
- DIE *Die; // Offsets
+ DIE *Die; // Offsets
char Flags; // Specific flags to output
- HashDataContents(DIE *D, char Flags) :
- Die(D),
- Flags(Flags) { }
- #ifndef NDEBUG
+ HashDataContents(DIE *D, char Flags) : Die(D), Flags(Flags) {}
+#ifndef NDEBUG
void print(raw_ostream &O) const {
O << " Offset: " << Die->getOffset() << "\n";
O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n";
O << " Flags: " << Flags << "\n";
}
- #endif
+#endif
};
+
private:
struct HashData {
StringRef Str;
uint32_t HashValue;
MCSymbol *Sym;
- ArrayRef<HashDataContents*> Data; // offsets
- HashData(StringRef S, ArrayRef<HashDataContents*> Data)
- : Str(S), Data(Data) {
+ ArrayRef<HashDataContents *> Data; // offsets
+ HashData(StringRef S, ArrayRef<HashDataContents *> Data)
+ : Str(S), Data(Data) {
HashValue = DwarfAccelTable::HashDJB(S);
}
- #ifndef NDEBUG
+#ifndef NDEBUG
void print(raw_ostream &O) {
O << "Name: " << Str << "\n";
O << " Hash Value: " << format("0x%x", HashValue) << "\n";
- O << " Symbol: " ;
- if (Sym) Sym->print(O);
- else O << "<none>";
+ O << " Symbol: ";
+ if (Sym)
+ Sym->print(O);
+ else
+ O << "<none>";
O << "\n";
for (size_t i = 0; i < Data.size(); i++) {
O << " Offset: " << Data[i]->Die->getOffset() << "\n";
@@ -231,14 +204,12 @@ private:
O << " Flags: " << Data[i]->Flags << "\n";
}
}
- void dump() {
- print(dbgs());
- }
- #endif
+ void dump() { print(dbgs()); }
+#endif
};
- DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
- void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
+ DwarfAccelTable(const DwarfAccelTable &) LLVM_DELETED_FUNCTION;
+ void operator=(const DwarfAccelTable &) LLVM_DELETED_FUNCTION;
// Internal Functions
void EmitHeader(AsmPrinter *);
@@ -253,24 +224,24 @@ private:
// Output Variables
TableHeader Header;
TableHeaderData HeaderData;
- std::vector<HashData*> Data;
+ std::vector<HashData *> Data;
// String Data
- typedef std::vector<HashDataContents*> DataArray;
- typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries;
+ typedef std::vector<HashDataContents *> DataArray;
+ typedef StringMap<DataArray, BumpPtrAllocator &> StringEntries;
StringEntries Entries;
// Buckets/Hashes/Offsets
- typedef std::vector<HashData*> HashList;
+ typedef std::vector<HashData *> HashList;
typedef std::vector<HashList> BucketList;
BucketList Buckets;
HashList Hashes;
// Public Implementation
- public:
+public:
DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
~DwarfAccelTable();
- void AddName(StringRef, DIE*, char = 0);
+ void AddName(StringRef, DIE *, char = 0);
void FinalizeTable(AsmPrinter *, StringRef);
void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *);
#ifndef NDEBUG
@@ -278,6 +249,5 @@ private:
void dump() { print(dbgs()); }
#endif
};
-
}
#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index fec5ced..8918f3d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -68,7 +68,7 @@ void DwarfCFIException::EndModule() {
for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
if (!Personalities[i])
continue;
- MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]);
+ MCSymbol *Sym = Asm->getSymbol(Personalities[i]);
TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
AtLeastOne = true;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index df8ca17..a6ff953 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -22,8 +22,8 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -33,12 +33,12 @@
using namespace llvm;
/// CompileUnit - Compile unit constructor.
-CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, const MDNode *N,
+CompileUnit::CompileUnit(unsigned UID, DIE *D, DICompileUnit Node,
AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU)
- : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU),
- IndexTyDie(0), DebugInfoOffset(0) {
+ : UniqueID(UID), Node(Node), CUDie(D), Asm(A), DD(DW), DU(DWU),
+ IndexTyDie(0), DebugInfoOffset(0) {
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
- insertDIE(N, D);
+ insertDIE(Node, D);
}
/// ~CompileUnit - Destructor for compile unit.
@@ -57,7 +57,7 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
/// getDefaultLowerBound - Return the default lower bound for an array. If the
/// DWARF version doesn't handle the language, return -1.
int64_t CompileUnit::getDefaultLowerBound() const {
- switch (Language) {
+ switch (getLanguage()) {
default:
break;
@@ -98,32 +98,71 @@ int64_t CompileUnit::getDefaultLowerBound() const {
return -1;
}
+/// Check whether the DIE for this MDNode can be shared across CUs.
+static bool isShareableAcrossCUs(DIDescriptor D) {
+ // When the MDNode can be part of the type system, the DIE can be
+ // shared across CUs.
+ return D.isType() ||
+ (D.isSubprogram() && !DISubprogram(D).isDefinition());
+}
+
+/// getDIE - Returns the debug information entry map slot for the
+/// specified debug variable. We delegate the request to DwarfDebug
+/// when the DIE for this MDNode can be shared across CUs. The mappings
+/// will be kept in DwarfDebug for shareable DIEs.
+DIE *CompileUnit::getDIE(DIDescriptor D) const {
+ if (isShareableAcrossCUs(D))
+ return DD->getDIE(D);
+ return MDNodeToDieMap.lookup(D);
+}
+
+/// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug
+/// when the DIE for this MDNode can be shared across CUs. The mappings
+/// will be kept in DwarfDebug for shareable DIEs.
+void CompileUnit::insertDIE(DIDescriptor Desc, DIE *D) {
+ if (isShareableAcrossCUs(Desc)) {
+ DD->insertDIE(Desc, D);
+ return;
+ }
+ MDNodeToDieMap.insert(std::make_pair(Desc, D));
+}
+
/// addFlag - Add a flag that is true.
-void CompileUnit::addFlag(DIE *Die, unsigned Attribute) {
- if (!DD->useDarwinGDBCompat())
- Die->addValue(Attribute, dwarf::DW_FORM_flag_present,
- DIEIntegerOne);
+void CompileUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) {
+ if (DD->getDwarfVersion() >= 4)
+ Die->addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne);
else
- addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1);
+ Die->addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne);
}
/// addUInt - Add an unsigned integer attribute data and value.
///
-void CompileUnit::addUInt(DIE *Die, unsigned Attribute,
- unsigned Form, uint64_t Integer) {
- if (!Form) Form = DIEInteger::BestForm(false, Integer);
- DIEValue *Value = Integer == 1 ?
- DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
- Die->addValue(Attribute, Form, Value);
+void CompileUnit::addUInt(DIE *Die, dwarf::Attribute Attribute,
+ Optional<dwarf::Form> Form, uint64_t Integer) {
+ if (!Form)
+ Form = DIEInteger::BestForm(false, Integer);
+ DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator)
+ DIEInteger(Integer);
+ Die->addValue(Attribute, *Form, Value);
+}
+
+void CompileUnit::addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer) {
+ addUInt(Block, (dwarf::Attribute)0, Form, Integer);
}
/// addSInt - Add an signed integer attribute data and value.
///
-void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
- unsigned Form, int64_t Integer) {
- if (!Form) Form = DIEInteger::BestForm(true, Integer);
+void CompileUnit::addSInt(DIE *Die, dwarf::Attribute Attribute,
+ Optional<dwarf::Form> Form, int64_t Integer) {
+ if (!Form)
+ Form = DIEInteger::BestForm(true, Integer);
DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
- Die->addValue(Attribute, Form, Value);
+ Die->addValue(Attribute, *Form, Value);
+}
+
+void CompileUnit::addSInt(DIEBlock *Die, Optional<dwarf::Form> Form,
+ int64_t Integer) {
+ addSInt(Die, (dwarf::Attribute)0, Form, Integer);
}
/// addString - Add a string attribute data and value. We always emit a
@@ -131,9 +170,10 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
/// more predictable sizes. In the case of split dwarf we emit an index
/// into another table which gets us the static offset into the string
/// table.
-void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) {
+void CompileUnit::addString(DIE *Die, dwarf::Attribute Attribute,
+ StringRef String) {
DIEValue *Value;
- unsigned Form;
+ dwarf::Form Form;
if (!DD->useSplitDwarf()) {
MCSymbol *Symb = DU->getStringPoolEntry(String);
if (Asm->needsRelocationsForDwarfStringPool())
@@ -154,7 +194,7 @@ void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) {
/// addLocalString - Add a string attribute data and value. This is guaranteed
/// to be in the local string pool instead of indirected.
-void CompileUnit::addLocalString(DIE *Die, unsigned Attribute,
+void CompileUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute,
StringRef String) {
MCSymbol *Symb = DU->getStringPoolEntry(String);
DIEValue *Value;
@@ -169,25 +209,32 @@ void CompileUnit::addLocalString(DIE *Die, unsigned Attribute,
/// addExpr - Add a Dwarf expression attribute data and value.
///
-void CompileUnit::addExpr(DIE *Die, unsigned Attribute, unsigned Form,
- const MCExpr *Expr) {
+void CompileUnit::addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr) {
DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr);
- Die->addValue(Attribute, Form, Value);
+ Die->addValue((dwarf::Attribute)0, Form, Value);
}
/// addLabel - Add a Dwarf label attribute data and value.
///
-void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
- const MCSymbol *Label) {
+void CompileUnit::addLabel(DIE *Die, dwarf::Attribute Attribute,
+ dwarf::Form Form, const MCSymbol *Label) {
DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
Die->addValue(Attribute, Form, Value);
}
+void CompileUnit::addLabel(DIEBlock *Die, dwarf::Form Form,
+ const MCSymbol *Label) {
+ addLabel(Die, (dwarf::Attribute)0, Form, Label);
+}
+
/// addLabelAddress - Add a dwarf label attribute data and value using
/// DW_FORM_addr or DW_FORM_GNU_addr_index.
///
-void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute,
+void CompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute,
MCSymbol *Label) {
+ if (Label)
+ DD->addArangeLabel(SymbolCU(this, Label));
+
if (!DD->useSplitDwarf()) {
if (Label != NULL) {
DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
@@ -206,34 +253,60 @@ void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute,
/// addOpAddress - Add a dwarf op address data and value using the
/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
///
-void CompileUnit::addOpAddress(DIE *Die, const MCSymbol *Sym) {
+void CompileUnit::addOpAddress(DIEBlock *Die, const MCSymbol *Sym) {
+ DD->addArangeLabel(SymbolCU(this, Sym));
if (!DD->useSplitDwarf()) {
- addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- addLabel(Die, 0, dwarf::DW_FORM_udata, Sym);
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Die, dwarf::DW_FORM_udata, Sym);
} else {
- addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
- addUInt(Die, 0, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym));
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
+ addUInt(Die, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym));
}
}
/// addDelta - Add a label delta attribute data and value.
///
-void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
- const MCSymbol *Hi, const MCSymbol *Lo) {
+void CompileUnit::addDelta(DIE *Die, dwarf::Attribute Attribute,
+ dwarf::Form Form, const MCSymbol *Hi,
+ const MCSymbol *Lo) {
DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
Die->addValue(Attribute, Form, Value);
}
/// addDIEEntry - Add a DIE attribute data and value.
///
-void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
+void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute,
DIE *Entry) {
- Die->addValue(Attribute, Form, createDIEEntry(Entry));
+ addDIEEntry(Die, Attribute, createDIEEntry(Entry));
+}
+
+void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute,
+ DIEEntry *Entry) {
+ const DIE *DieCU = Die->getCompileUnitOrNull();
+ const DIE *EntryCU = Entry->getEntry()->getCompileUnitOrNull();
+ if (!DieCU)
+ // We assume that Die belongs to this CU, if it is not linked to any CU yet.
+ DieCU = getCUDie();
+ if (!EntryCU)
+ EntryCU = getCUDie();
+ Die->addValue(Attribute, EntryCU == DieCU ? dwarf::DW_FORM_ref4
+ : dwarf::DW_FORM_ref_addr,
+ Entry);
+}
+
+/// Create a DIE with the given Tag, add the DIE to its parent, and
+/// call insertDIE if MD is not null.
+DIE *CompileUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) {
+ DIE *Die = new DIE(Tag);
+ Parent.addChild(Die);
+ if (N)
+ insertDIE(N, Die);
+ return Die;
}
/// addBlock - Add block data.
///
-void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
+void CompileUnit::addBlock(DIE *Die, dwarf::Attribute Attribute,
DIEBlock *Block) {
Block->ComputeSize(Asm);
DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
@@ -250,12 +323,12 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
unsigned Line = V.getLineNumber();
if (Line == 0)
return;
- unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(),
- V.getContext().getDirectory(),
- getUniqueID());
+ unsigned FileID =
+ DD->getOrCreateSourceID(V.getContext().getFilename(),
+ V.getContext().getDirectory(), getUniqueID());
assert(FileID && "Invalid file id");
- addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
- addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+ addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
}
/// addSourceLine - Add location information to specified debug information
@@ -268,11 +341,11 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
unsigned Line = G.getLineNumber();
if (Line == 0)
return;
- unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(),
- getUniqueID());
+ unsigned FileID =
+ DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), getUniqueID());
assert(FileID && "Invalid file id");
- addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
- addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+ addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
}
/// addSourceLine - Add location information to specified debug information
@@ -287,11 +360,11 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
if (Line == 0)
return;
- unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(),
- SP.getDirectory(), getUniqueID());
+ unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), SP.getDirectory(),
+ getUniqueID());
assert(FileID && "Invalid file id");
- addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
- addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+ addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
}
/// addSourceLine - Add location information to specified debug information
@@ -304,11 +377,11 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
unsigned Line = Ty.getLineNumber();
if (Line == 0)
return;
- unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(),
- Ty.getDirectory(), getUniqueID());
+ unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), Ty.getDirectory(),
+ getUniqueID());
assert(FileID && "Invalid file id");
- addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
- addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+ addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
}
/// addSourceLine - Add location information to specified debug information
@@ -325,8 +398,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
unsigned FileID = DD->getOrCreateSourceID(File.getFilename(),
File.getDirectory(), getUniqueID());
assert(FileID && "Invalid file id");
- addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
- addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+ addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
}
/// addSourceLine - Add location information to specified debug information
@@ -341,11 +414,11 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
return;
StringRef FN = NS.getFilename();
- unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(),
- getUniqueID());
+ unsigned FileID =
+ DD->getOrCreateSourceID(FN, NS.getDirectory(), getUniqueID());
assert(FileID && "Invalid file id");
- addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
- addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+ addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
}
/// addVariableAddress - Add DW_AT_location attribute for a
@@ -362,38 +435,38 @@ void CompileUnit::addVariableAddress(const DbgVariable &DV, DIE *Die,
}
/// addRegisterOp - Add register operand.
-void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) {
+void CompileUnit::addRegisterOp(DIEBlock *TheDie, unsigned Reg) {
const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
unsigned DWReg = RI->getDwarfRegNum(Reg, false);
if (DWReg < 32)
- addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
+ addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
else {
- addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
- addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+ addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+ addUInt(TheDie, dwarf::DW_FORM_udata, DWReg);
}
}
/// addRegisterOffset - Add register offset.
-void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg,
+void CompileUnit::addRegisterOffset(DIEBlock *TheDie, unsigned Reg,
int64_t Offset) {
const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
unsigned DWReg = RI->getDwarfRegNum(Reg, false);
const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
if (Reg == TRI->getFrameRegister(*Asm->MF))
// If variable offset is based in frame register then use fbreg.
- addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+ addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
else if (DWReg < 32)
- addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg);
+ addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg);
else {
- addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
- addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg);
+ addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+ addUInt(TheDie, dwarf::DW_FORM_udata, DWReg);
}
- addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset);
+ addSInt(TheDie, dwarf::DW_FORM_sdata, Offset);
}
/// addAddress - Add an address attribute to a die based on the location
/// provided.
-void CompileUnit::addAddress(DIE *Die, unsigned Attribute,
+void CompileUnit::addAddress(DIE *Die, dwarf::Attribute Attribute,
const MachineLocation &Location, bool Indirect) {
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
@@ -402,12 +475,12 @@ void CompileUnit::addAddress(DIE *Die, unsigned Attribute,
else {
addRegisterOffset(Block, Location.getReg(), Location.getOffset());
if (Indirect && !Location.isReg()) {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
}
}
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, 0, Block);
+ addBlock(Die, Attribute, Block);
}
/// addComplexAddress - Start with the address based on the location provided,
@@ -416,7 +489,7 @@ void CompileUnit::addAddress(DIE *Die, unsigned Attribute,
/// the starting location. Add the DWARF information to the die.
///
void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die,
- unsigned Attribute,
+ dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
unsigned N = DV.getNumAddrElements();
@@ -429,23 +502,23 @@ void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die,
i = 2;
} else
addRegisterOp(Block, Location.getReg());
- }
- else
+ } else
addRegisterOffset(Block, Location.getReg(), Location.getOffset());
- for (;i < N; ++i) {
+ for (; i < N; ++i) {
uint64_t Element = DV.getAddrElement(i);
if (Element == DIBuilder::OpPlus) {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(Block, 0, dwarf::DW_FORM_udata, DV.getAddrElement(++i));
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, dwarf::DW_FORM_udata, DV.getAddrElement(++i));
} else if (Element == DIBuilder::OpDeref) {
if (!Location.isReg())
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
- } else llvm_unreachable("unknown DIBuilder Opcode");
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ } else
+ llvm_unreachable("unknown DIBuilder Opcode");
}
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, 0, Block);
+ addBlock(Die, Attribute, Block);
}
/* Byref variables, in Blocks, are declared by the programmer as "SomeType
@@ -509,44 +582,41 @@ void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die,
/// more information, read large comment just above here.
///
void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die,
- unsigned Attribute,
+ dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIType Ty = DV.getType();
DIType TmpTy = Ty;
- unsigned Tag = Ty.getTag();
+ uint16_t Tag = Ty.getTag();
bool isPointer = false;
StringRef varName = DV.getName();
if (Tag == dwarf::DW_TAG_pointer_type) {
- DIDerivedType DTy = DIDerivedType(Ty);
- TmpTy = DTy.getTypeDerivedFrom();
+ DIDerivedType DTy(Ty);
+ TmpTy = resolve(DTy.getTypeDerivedFrom());
isPointer = true;
}
- DICompositeType blockStruct = DICompositeType(TmpTy);
+ DICompositeType blockStruct(TmpTy);
// Find the __forwarding field and the variable field in the __Block_byref
// struct.
DIArray Fields = blockStruct.getTypeArray();
- DIDescriptor varField = DIDescriptor();
- DIDescriptor forwardingField = DIDescriptor();
+ DIDerivedType varField;
+ DIDerivedType forwardingField;
for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
- DIDescriptor Element = Fields.getElement(i);
- DIDerivedType DT = DIDerivedType(Element);
+ DIDerivedType DT(Fields.getElement(i));
StringRef fieldName = DT.getName();
if (fieldName == "__forwarding")
- forwardingField = Element;
+ forwardingField = DT;
else if (fieldName == varName)
- varField = Element;
+ varField = DT;
}
// Get the offsets for the forwarding field and the variable field.
- unsigned forwardingFieldOffset =
- DIDerivedType(forwardingField).getOffsetInBits() >> 3;
- unsigned varFieldOffset =
- DIDerivedType(varField).getOffsetInBits() >> 3;
+ unsigned forwardingFieldOffset = forwardingField.getOffsetInBits() >> 3;
+ unsigned varFieldOffset = varField.getOffsetInBits() >> 2;
// Decode the original location, and use that as the start of the byref
// variable's location.
@@ -560,45 +630,91 @@ void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die,
// If we started with a pointer to the __Block_byref... struct, then
// the first thing we need to do is dereference the pointer (DW_OP_deref).
if (isPointer)
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
// Next add the offset for the '__forwarding' field:
// DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
// adding the offset if it's 0.
if (forwardingFieldOffset > 0) {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, dwarf::DW_FORM_udata, forwardingFieldOffset);
}
// Now dereference the __forwarding field to get to the real __Block_byref
// struct: DW_OP_deref.
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
// Now that we've got the real __Block_byref... struct, add the offset
// for the variable's field to get to the location of the actual variable:
// DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
if (varFieldOffset > 0) {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(Block, dwarf::DW_FORM_udata, varFieldOffset);
}
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, 0, Block);
+ addBlock(Die, Attribute, Block);
}
/// isTypeSigned - Return true if the type is signed.
-static bool isTypeSigned(DIType Ty, int *SizeInBits) {
+static bool isTypeSigned(DwarfDebug *DD, DIType Ty, int *SizeInBits) {
if (Ty.isDerivedType())
- return isTypeSigned(DIDerivedType(Ty).getTypeDerivedFrom(), SizeInBits);
+ return isTypeSigned(DD, DD->resolve(DIDerivedType(Ty).getTypeDerivedFrom()),
+ SizeInBits);
if (Ty.isBasicType())
- if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed
- || DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) {
+ if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed ||
+ DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) {
*SizeInBits = Ty.getSizeInBits();
return true;
}
return false;
}
+/// Return true if type encoding is unsigned.
+static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) {
+ DIDerivedType DTy(Ty);
+ if (DTy.isDerivedType())
+ return isUnsignedDIType(DD, DD->resolve(DTy.getTypeDerivedFrom()));
+
+ DIBasicType BTy(Ty);
+ if (BTy.isBasicType()) {
+ unsigned Encoding = BTy.getEncoding();
+ if (Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char ||
+ Encoding == dwarf::DW_ATE_boolean)
+ return true;
+ }
+ return false;
+}
+
+/// If this type is derived from a base type then return base type size.
+static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) {
+ unsigned Tag = Ty.getTag();
+
+ if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
+ Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
+ Tag != dwarf::DW_TAG_restrict_type)
+ return Ty.getSizeInBits();
+
+ DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom());
+
+ // If this type is not derived from any type then take conservative approach.
+ if (!BaseType.isValid())
+ return Ty.getSizeInBits();
+
+ // If this is a derived type, go ahead and get the base type, unless it's a
+ // reference then it's just the size of the field. Pointer types have no need
+ // of this since they're a different type of qualification on the type.
+ if (BaseType.getTag() == dwarf::DW_TAG_reference_type ||
+ BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type)
+ return Ty.getSizeInBits();
+
+ if (BaseType.isDerivedType())
+ return getBaseTypeSize(DD, DIDerivedType(BaseType));
+
+ return BaseType.getSizeInBits();
+}
+
/// addConstantValue - Add constant value entry in variable DIE.
void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
DIType Ty) {
@@ -606,32 +722,47 @@ void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
// their maximum bit width which is a bit unfortunate (& doesn't prefer
// udata/sdata over dataN as suggested by the DWARF spec)
assert(MO.isImm() && "Invalid machine operand!");
- DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
int SizeInBits = -1;
- bool SignedConstant = isTypeSigned(Ty, &SizeInBits);
- unsigned Form = SignedConstant ? dwarf::DW_FORM_sdata : dwarf::DW_FORM_udata;
- switch (SizeInBits) {
- case 8: Form = dwarf::DW_FORM_data1; break;
- case 16: Form = dwarf::DW_FORM_data2; break;
- case 32: Form = dwarf::DW_FORM_data4; break;
- case 64: Form = dwarf::DW_FORM_data8; break;
- default: break;
+ bool SignedConstant = isTypeSigned(DD, Ty, &SizeInBits);
+ dwarf::Form Form;
+
+ // If we're a signed constant definitely use sdata.
+ if (SignedConstant) {
+ addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, MO.getImm());
+ return;
}
- SignedConstant ? addSInt(Block, 0, Form, MO.getImm())
- : addUInt(Block, 0, Form, MO.getImm());
- addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ // Else use data for now unless it's larger than we can deal with.
+ switch (SizeInBits) {
+ case 8:
+ Form = dwarf::DW_FORM_data1;
+ break;
+ case 16:
+ Form = dwarf::DW_FORM_data2;
+ break;
+ case 32:
+ Form = dwarf::DW_FORM_data4;
+ break;
+ case 64:
+ Form = dwarf::DW_FORM_data8;
+ break;
+ default:
+ Form = dwarf::DW_FORM_udata;
+ addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm());
+ return;
+ }
+ addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm());
}
/// addConstantFPValue - Add constant value entry in variable DIE.
void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
- assert (MO.isFPImm() && "Invalid machine operand!");
+ assert(MO.isFPImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
APFloat FPImm = MO.getFPImm()->getValueAPF();
// Get the raw data form of the floating point.
const APInt FltVal = FPImm.bitcastToAPInt();
- const char *FltPtr = (const char*)FltVal.getRawData();
+ const char *FltPtr = (const char *)FltVal.getRawData();
int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
bool LittleEndian = Asm->getDataLayout().isLittleEndian();
@@ -641,15 +772,15 @@ void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
// Output the constant to DWARF one byte at a time.
for (; Start != Stop; Start += Incr)
- addUInt(Block, 0, dwarf::DW_FORM_data1,
- (unsigned char)0xFF & FltPtr[Start]);
+ addUInt(Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]);
- addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ addBlock(Die, dwarf::DW_AT_const_value, Block);
}
/// addConstantFPValue - Add constant value entry in variable DIE.
void CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) {
- addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false);
+ // Pass this down to addConstantValue as an unsigned bag of bits.
+ addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true);
}
/// addConstantValue - Add constant value entry in variable DIE.
@@ -662,19 +793,34 @@ void CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) {
unsigned CIBitWidth = Val.getBitWidth();
if (CIBitWidth <= 64) {
- unsigned form = 0;
+ // If we're a signed constant definitely use sdata.
+ if (!Unsigned) {
+ addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
+ Val.getSExtValue());
+ return;
+ }
+
+ // Else use data for now unless it's larger than we can deal with.
+ dwarf::Form Form;
switch (CIBitWidth) {
- case 8: form = dwarf::DW_FORM_data1; break;
- case 16: form = dwarf::DW_FORM_data2; break;
- case 32: form = dwarf::DW_FORM_data4; break;
- case 64: form = dwarf::DW_FORM_data8; break;
+ case 8:
+ Form = dwarf::DW_FORM_data1;
+ break;
+ case 16:
+ Form = dwarf::DW_FORM_data2;
+ break;
+ case 32:
+ Form = dwarf::DW_FORM_data4;
+ break;
+ case 64:
+ Form = dwarf::DW_FORM_data8;
+ break;
default:
- form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata;
+ addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+ Val.getZExtValue());
+ return;
}
- if (Unsigned)
- addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue());
- else
- addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue());
+ addUInt(Die, dwarf::DW_AT_const_value, Form, Val.getZExtValue());
return;
}
@@ -693,10 +839,10 @@ void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) {
c = Ptr64[i / 8] >> (8 * (i & 7));
else
c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7));
- addUInt(Block, 0, dwarf::DW_FORM_data1, c);
+ addUInt(Block, dwarf::DW_FORM_data1, c);
}
- addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ addBlock(Die, dwarf::DW_AT_const_value, Block);
}
/// addTemplateParams - Add template parameters into buffer.
@@ -705,47 +851,48 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
DIDescriptor Element = TParams.getElement(i);
if (Element.isTemplateTypeParameter())
- Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
- DITemplateTypeParameter(Element)));
+ constructTemplateTypeParameterDIE(Buffer,
+ DITemplateTypeParameter(Element));
else if (Element.isTemplateValueParameter())
- Buffer.addChild(getOrCreateTemplateValueParameterDIE(
- DITemplateValueParameter(Element)));
+ constructTemplateValueParameterDIE(Buffer,
+ DITemplateValueParameter(Element));
}
}
/// getOrCreateContextDIE - Get context owner's DIE.
-DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) {
+DIE *CompileUnit::getOrCreateContextDIE(DIScope Context) {
+ if (!Context || Context.isFile())
+ return getCUDie();
if (Context.isType())
return getOrCreateTypeDIE(DIType(Context));
- else if (Context.isNameSpace())
+ if (Context.isNameSpace())
return getOrCreateNameSpace(DINameSpace(Context));
- else if (Context.isSubprogram())
+ if (Context.isSubprogram())
return getOrCreateSubprogramDIE(DISubprogram(Context));
- else
- return getDIE(Context);
-}
-
-/// addToContextOwner - Add Die into the list of its context owner's children.
-void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
- if (DIE *ContextDIE = getOrCreateContextDIE(Context))
- ContextDIE->addChild(Die);
- else
- addDie(Die);
+ return getDIE(Context);
}
/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
/// given DIType.
DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
- DIType Ty(TyNode);
- if (!Ty.isType())
+ if (!TyNode)
return NULL;
+
+ DIType Ty(TyNode);
+ assert(Ty.isType());
+
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(resolve(Ty.getContext()));
+ assert(ContextDIE);
+
DIE *TyDIE = getDIE(Ty);
if (TyDIE)
return TyDIE;
// Create new type.
- TyDIE = new DIE(dwarf::DW_TAG_base_type);
- insertDIE(Ty, TyDIE);
+ TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
+
if (Ty.isBasicType())
constructTypeDIE(*TyDIE, DIBasicType(Ty));
else if (Ty.isCompositeType())
@@ -762,28 +909,24 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
DICompositeType CT(Ty);
// A runtime language of 0 actually means C/C++ and that any
// non-negative value is some version of Objective-C/C++.
- IsImplementation = (CT.getRunTimeLang() == 0) ||
- CT.isObjcClassComplete();
+ IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete();
}
- unsigned Flags = IsImplementation ?
- DwarfAccelTable::eTypeFlagClassIsImplementation : 0;
+ unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags));
}
- addToContextOwner(TyDIE, Ty.getContext());
return TyDIE;
}
/// addType - Add a new type attribute to the specified entity.
-void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) {
- if (!Ty.isType())
- return;
+void CompileUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) {
+ assert(Ty && "Trying to add a type that doesn't exist?");
// Check for pre-existence.
DIEEntry *Entry = getDIEEntry(Ty);
// If it exists then use the existing value.
if (Entry) {
- Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
+ addDIEEntry(Entity, Attribute, Entry);
return;
}
@@ -793,28 +936,105 @@ void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) {
// Set up proxy.
Entry = createDIEEntry(Buffer);
insertDIEEntry(Ty, Entry);
- Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
+ addDIEEntry(Entity, Attribute, Entry);
// If this is a complete composite type then include it in the
// list of global types.
addGlobalType(Ty);
}
+// Accelerator table mutators - add each name along with its companion
+// DIE to the proper table while ensuring that the name that we're going
+// to reference is in the string table. We do this since the names we
+// add may not only be identical to the names in the DIE.
+void CompileUnit::addAccelName(StringRef Name, DIE *Die) {
+ DU->getStringPoolEntry(Name);
+ std::vector<DIE *> &DIEs = AccelNames[Name];
+ DIEs.push_back(Die);
+}
+
+void CompileUnit::addAccelObjC(StringRef Name, DIE *Die) {
+ DU->getStringPoolEntry(Name);
+ std::vector<DIE *> &DIEs = AccelObjC[Name];
+ DIEs.push_back(Die);
+}
+
+void CompileUnit::addAccelNamespace(StringRef Name, DIE *Die) {
+ DU->getStringPoolEntry(Name);
+ std::vector<DIE *> &DIEs = AccelNamespace[Name];
+ DIEs.push_back(Die);
+}
+
+void CompileUnit::addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) {
+ DU->getStringPoolEntry(Name);
+ std::vector<std::pair<DIE *, unsigned> > &DIEs = AccelTypes[Name];
+ DIEs.push_back(Die);
+}
+
+/// addGlobalName - Add a new global name to the compile unit.
+void CompileUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) {
+ std::string FullName = getParentContextString(Context) + Name.str();
+ GlobalNames[FullName] = Die;
+}
+
/// addGlobalType - Add a new global type to the compile unit.
///
void CompileUnit::addGlobalType(DIType Ty) {
- DIDescriptor Context = Ty.getContext();
- if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl()
- && (!Context || Context.isCompileUnit() || Context.isFile()
- || Context.isNameSpace()))
- if (DIEEntry *Entry = getDIEEntry(Ty))
- GlobalTypes[Ty.getName()] = Entry->getEntry();
+ DIScope Context = resolve(Ty.getContext());
+ if (!Ty.getName().empty() && !Ty.isForwardDecl() &&
+ (!Context || Context.isCompileUnit() || Context.isFile() ||
+ Context.isNameSpace()))
+ if (DIEEntry *Entry = getDIEEntry(Ty)) {
+ std::string FullName =
+ getParentContextString(Context) + Ty.getName().str();
+ GlobalTypes[FullName] = Entry->getEntry();
+ }
+}
+
+/// getParentContextString - Walks the metadata parent chain in a language
+/// specific manner (using the compile unit language) and returns
+/// it as a string. This is done at the metadata level because DIEs may
+/// not currently have been added to the parent context and walking the
+/// DIEs looking for names is more expensive than walking the metadata.
+std::string CompileUnit::getParentContextString(DIScope Context) const {
+ if (!Context)
+ return "";
+
+ // FIXME: Decide whether to implement this for non-C++ languages.
+ if (getLanguage() != dwarf::DW_LANG_C_plus_plus)
+ return "";
+
+ std::string CS;
+ SmallVector<DIScope, 1> Parents;
+ while (!Context.isCompileUnit()) {
+ Parents.push_back(Context);
+ if (Context.getContext())
+ Context = resolve(Context.getContext());
+ else
+ // Structure, etc types will have a NULL context if they're at the top
+ // level.
+ break;
+ }
+
+ // Reverse iterate over our list to go from the outermost construct to the
+ // innermost.
+ for (SmallVectorImpl<DIScope>::reverse_iterator I = Parents.rbegin(),
+ E = Parents.rend();
+ I != E; ++I) {
+ DIScope Ctx = *I;
+ StringRef Name = Ctx.getName();
+ if (!Name.empty()) {
+ CS += Name;
+ CS += "::";
+ }
+ }
+ return CS;
}
-/// addPubTypes - Add type for pubtypes section.
+/// addPubTypes - Add subprogram argument types for pubtypes section.
void CompileUnit::addPubTypes(DISubprogram SP) {
DICompositeType SPTy = SP.getType();
- unsigned SPTag = SPTy.getTag();
+ uint16_t SPTag = SPTy.getTag();
if (SPTag != dwarf::DW_TAG_subroutine_type)
return;
@@ -835,18 +1055,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
if (!Name.empty())
addString(&Buffer, dwarf::DW_AT_name, Name);
- if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) {
- Buffer.setTag(dwarf::DW_TAG_unspecified_type);
- // An unspecified type only has a name attribute.
+ // An unspecified type only has a name attribute.
+ if (BTy.getTag() == dwarf::DW_TAG_unspecified_type)
return;
- }
- Buffer.setTag(dwarf::DW_TAG_base_type);
addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
BTy.getEncoding());
uint64_t Size = BTy.getSizeInBits() >> 3;
- addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size);
}
/// constructTypeDIE - Construct derived type die from DIDerivedType.
@@ -854,16 +1071,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
// Get core information.
StringRef Name = DTy.getName();
uint64_t Size = DTy.getSizeInBits() >> 3;
- unsigned Tag = DTy.getTag();
-
- // FIXME - Workaround for templates.
- if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
-
- Buffer.setTag(Tag);
+ uint16_t Tag = Buffer.getTag();
// Map to main type, void will not have a type.
- DIType FromTy = DTy.getTypeDerivedFrom();
- addType(&Buffer, FromTy);
+ DIType FromTy = resolve(DTy.getTypeDerivedFrom());
+ if (FromTy)
+ addType(&Buffer, FromTy);
// Add name if not anonymous or intermediate type.
if (!Name.empty())
@@ -871,11 +1084,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
// Add size if non-zero (derived types might be zero-sized.)
if (Size && Tag != dwarf::DW_TAG_pointer_type)
- addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size);
if (Tag == dwarf::DW_TAG_ptr_to_member_type)
- addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
- getOrCreateTypeDIE(DTy.getClassType()));
+ addDIEEntry(&Buffer, dwarf::DW_AT_containing_type,
+ getOrCreateTypeDIE(resolve(DTy.getClassType())));
// Add source line info if available and TyDesc is not a forward declaration.
if (!DTy.isForwardDecl())
addSourceLine(&Buffer, DTy);
@@ -883,20 +1096,20 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
/// Return true if the type is appropriately scoped to be contained inside
/// its own type unit.
-static bool isTypeUnitScoped(DIType Ty) {
- DIScope Parent = Ty.getContext();
+static bool isTypeUnitScoped(DIType Ty, const DwarfDebug *DD) {
+ DIScope Parent = DD->resolve(Ty.getContext());
while (Parent) {
// Don't generate a hash for anything scoped inside a function.
if (Parent.isSubprogram())
return false;
- Parent = Parent.getContext();
+ Parent = DD->resolve(Parent.getContext());
}
return true;
}
/// Return true if the type should be split out into a type unit.
-static bool shouldCreateTypeUnit(DICompositeType CTy) {
- unsigned Tag = CTy.getTag();
+static bool shouldCreateTypeUnit(DICompositeType CTy, const DwarfDebug *DD) {
+ uint16_t Tag = CTy.getTag();
switch (Tag) {
case dwarf::DW_TAG_structure_type:
@@ -904,13 +1117,11 @@ static bool shouldCreateTypeUnit(DICompositeType CTy) {
case dwarf::DW_TAG_enumeration_type:
case dwarf::DW_TAG_class_type:
// If this is a class, structure, union, or enumeration type
- // that is not a declaration, is a type definition, and not scoped
+ // that is a definition (not a declaration), and not scoped
// inside a function then separate this out as a type unit.
- if (CTy.isForwardDecl() || !isTypeUnitScoped(CTy))
- return 0;
- return 1;
+ return !CTy.isForwardDecl() && isTypeUnitScoped(CTy, DD);
default:
- return 0;
+ return false;
}
}
@@ -920,69 +1131,47 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
StringRef Name = CTy.getName();
uint64_t Size = CTy.getSizeInBits() >> 3;
- unsigned Tag = CTy.getTag();
- Buffer.setTag(Tag);
+ uint16_t Tag = Buffer.getTag();
switch (Tag) {
case dwarf::DW_TAG_array_type:
- constructArrayTypeDIE(Buffer, &CTy);
+ constructArrayTypeDIE(Buffer, CTy);
break;
- case dwarf::DW_TAG_enumeration_type: {
- DIArray Elements = CTy.getTypeArray();
-
- // Add enumerators to enumeration type.
- for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
- DIE *ElemDie = NULL;
- DIDescriptor Enum(Elements.getElement(i));
- if (Enum.isEnumerator()) {
- ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
- Buffer.addChild(ElemDie);
- }
- }
- DIType DTy = CTy.getTypeDerivedFrom();
- if (DTy.isType()) {
- addType(&Buffer, DTy);
- addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1);
- }
- }
+ case dwarf::DW_TAG_enumeration_type:
+ constructEnumTypeDIE(Buffer, CTy);
break;
case dwarf::DW_TAG_subroutine_type: {
- // Add return type.
+ // Add return type. A void return won't have a type.
DIArray Elements = CTy.getTypeArray();
- DIDescriptor RTy = Elements.getElement(0);
- addType(&Buffer, DIType(RTy));
+ DIType RTy(Elements.getElement(0));
+ if (RTy)
+ addType(&Buffer, RTy);
bool isPrototyped = true;
// Add arguments.
for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
DIDescriptor Ty = Elements.getElement(i);
if (Ty.isUnspecifiedParameter()) {
- DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
- Buffer.addChild(Arg);
+ createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
isPrototyped = false;
} else {
- DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
addType(Arg, DIType(Ty));
if (DIType(Ty).isArtificial())
addFlag(Arg, dwarf::DW_AT_artificial);
- Buffer.addChild(Arg);
}
}
// Add prototype flag if we're dealing with a C language and the
// function has been prototyped.
+ uint16_t Language = getLanguage();
if (isPrototyped &&
- (Language == dwarf::DW_LANG_C89 ||
- Language == dwarf::DW_LANG_C99 ||
+ (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
Language == dwarf::DW_LANG_ObjC))
addFlag(&Buffer, dwarf::DW_AT_prototyped);
- }
- break;
+ } break;
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_union_type:
case dwarf::DW_TAG_class_type: {
- if (CTy.isForwardDecl())
- break;
-
// Add elements to structure type.
DIArray Elements = CTy.getTypeArray();
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
@@ -990,7 +1179,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DIE *ElemDie = NULL;
if (Element.isSubprogram()) {
DISubprogram SP(Element);
- ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
+ ElemDie = getOrCreateSubprogramDIE(SP);
if (SP.isProtected())
addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_protected);
@@ -999,21 +1188,23 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
dwarf::DW_ACCESS_private);
else
addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
- dwarf::DW_ACCESS_public);
+ dwarf::DW_ACCESS_public);
if (SP.isExplicit())
addFlag(ElemDie, dwarf::DW_AT_explicit);
} else if (Element.isDerivedType()) {
DIDerivedType DDTy(Element);
if (DDTy.getTag() == dwarf::DW_TAG_friend) {
- ElemDie = new DIE(dwarf::DW_TAG_friend);
- addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
- } else if (DDTy.isStaticMember())
- ElemDie = createStaticMemberDIE(DDTy);
- else
- ElemDie = createMemberDIE(DDTy);
+ ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
+ addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()),
+ dwarf::DW_AT_friend);
+ } else if (DDTy.isStaticMember()) {
+ getOrCreateStaticMemberDIE(DDTy);
+ } else {
+ constructMemberDIE(Buffer, DDTy);
+ }
} else if (Element.isObjCProperty()) {
DIObjCProperty Property(Element);
- ElemDie = new DIE(Property.getTag());
+ ElemDie = createAndAddDIE(Property.getTag(), Buffer);
StringRef PropertyName = Property.getObjCPropertyName();
addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
addType(ElemDie, Property.getType());
@@ -1038,8 +1229,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (Property.isNonAtomicObjCProperty())
PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
if (PropertyAttributes)
- addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0,
- PropertyAttributes);
+ addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None,
+ PropertyAttributes);
DIEEntry *Entry = getDIEEntry(Element);
if (!Entry) {
@@ -1048,18 +1239,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
}
} else
continue;
- Buffer.addChild(ElemDie);
}
if (CTy.isAppleBlockExtension())
addFlag(&Buffer, dwarf::DW_AT_APPLE_block);
- DICompositeType ContainingType = CTy.getContainingType();
- if (DIDescriptor(ContainingType).isCompositeType())
- addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
- getOrCreateTypeDIE(DIType(ContainingType)));
- else
- addToContextOwner(&Buffer, CTy.getContext());
+ DICompositeType ContainingType(resolve(CTy.getContainingType()));
+ if (ContainingType)
+ addDIEEntry(&Buffer, dwarf::DW_AT_containing_type,
+ getOrCreateTypeDIE(ContainingType));
if (CTy.isObjcClassComplete())
addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
@@ -1067,8 +1255,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add template parameters to a class, structure or union types.
// FIXME: The support isn't in the metadata for this yet.
if (Tag == dwarf::DW_TAG_class_type ||
- Tag == dwarf::DW_TAG_structure_type ||
- Tag == dwarf::DW_TAG_union_type)
+ Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
addTemplateParams(Buffer, CTy.getTemplateParams());
break;
@@ -1082,16 +1269,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
addString(&Buffer, dwarf::DW_AT_name, Name);
if (Tag == dwarf::DW_TAG_enumeration_type ||
- Tag == dwarf::DW_TAG_class_type ||
- Tag == dwarf::DW_TAG_structure_type ||
+ Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type ||
Tag == dwarf::DW_TAG_union_type) {
// Add size if non-zero (derived types might be zero-sized.)
// TODO: Do we care about size for enum forward declarations?
if (Size)
- addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size);
else if (!CTy.isForwardDecl())
// Add zero size if it is not a forward declaration.
- addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, None, 0);
// If we're a forward decl, say so.
if (CTy.isForwardDecl())
@@ -1104,131 +1290,126 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// No harm in adding the runtime language to the declaration.
unsigned RLang = CTy.getRunTimeLang();
if (RLang)
- addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
- dwarf::DW_FORM_data1, RLang);
+ addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
+ RLang);
}
// If this is a type applicable to a type unit it then add it to the
// list of types we'll compute a hash for later.
- if (shouldCreateTypeUnit(CTy))
+ if (shouldCreateTypeUnit(CTy, DD))
DD->addTypeUnitType(&Buffer);
}
-/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
-/// for the given DITemplateTypeParameter.
-DIE *
-CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
- DIE *ParamDIE = getDIE(TP);
- if (ParamDIE)
- return ParamDIE;
-
- ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
- addType(ParamDIE, TP.getType());
+/// constructTemplateTypeParameterDIE - Construct new DIE for the given
+/// DITemplateTypeParameter.
+void
+CompileUnit::constructTemplateTypeParameterDIE(DIE &Buffer,
+ DITemplateTypeParameter TP) {
+ DIE *ParamDIE =
+ createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
+ // Add the type if it exists, it could be void and therefore no type.
+ if (TP.getType())
+ addType(ParamDIE, resolve(TP.getType()));
if (!TP.getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
- return ParamDIE;
-}
-
-/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
-/// for the given DITemplateValueParameter.
-DIE *
-CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
- DIE *ParamDIE = getDIE(TPV);
- if (ParamDIE)
- return ParamDIE;
-
- ParamDIE = new DIE(TPV.getTag());
- addType(ParamDIE, TPV.getType());
- if (!TPV.getName().empty())
- addString(ParamDIE, dwarf::DW_AT_name, TPV.getName());
- if (Value *Val = TPV.getValue()) {
+}
+
+/// constructTemplateValueParameterDIE - Construct new DIE for the given
+/// DITemplateValueParameter.
+void
+CompileUnit::constructTemplateValueParameterDIE(DIE &Buffer,
+ DITemplateValueParameter VP) {
+ DIE *ParamDIE = createAndAddDIE(VP.getTag(), Buffer);
+
+ // Add the type if there is one, template template and template parameter
+ // packs will not have a type.
+ if (VP.getTag() == dwarf::DW_TAG_template_value_parameter)
+ addType(ParamDIE, resolve(VP.getType()));
+ if (!VP.getName().empty())
+ addString(ParamDIE, dwarf::DW_AT_name, VP.getName());
+ if (Value *Val = VP.getValue()) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val))
- addConstantValue(ParamDIE, CI, TPV.getType().isUnsignedDIType());
+ addConstantValue(ParamDIE, CI,
+ isUnsignedDIType(DD, resolve(VP.getType())));
else if (GlobalValue *GV = dyn_cast<GlobalValue>(Val)) {
// For declaration non-type template parameters (such as global values and
// functions)
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- addOpAddress(Block, Asm->Mang->getSymbol(GV));
+ addOpAddress(Block, Asm->getSymbol(GV));
// Emit DW_OP_stack_value to use the address as the immediate value of the
// parameter, rather than a pointer to it.
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
- addBlock(ParamDIE, dwarf::DW_AT_location, 0, Block);
- } else if (TPV.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
+ addBlock(ParamDIE, dwarf::DW_AT_location, Block);
+ } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
assert(isa<MDString>(Val));
addString(ParamDIE, dwarf::DW_AT_GNU_template_name,
cast<MDString>(Val)->getString());
- } else if (TPV.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
+ } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
assert(isa<MDNode>(Val));
DIArray A(cast<MDNode>(Val));
addTemplateParams(*ParamDIE, A);
}
}
-
- return ParamDIE;
}
/// getOrCreateNameSpace - Create a DIE for DINameSpace.
DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(NS.getContext());
+
DIE *NDie = getDIE(NS);
if (NDie)
return NDie;
- NDie = new DIE(dwarf::DW_TAG_namespace);
- insertDIE(NS, NDie);
+ NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
+
if (!NS.getName().empty()) {
addString(NDie, dwarf::DW_AT_name, NS.getName());
addAccelNamespace(NS.getName(), NDie);
+ addGlobalName(NS.getName(), NDie, NS.getContext());
} else
addAccelNamespace("(anonymous namespace)", NDie);
addSourceLine(NDie, NS);
- addToContextOwner(NDie, NS.getContext());
return NDie;
}
/// getOrCreateSubprogramDIE - Create new DIE using SP.
DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE (as is the case for member function
+ // declarations).
+ DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext()));
+
DIE *SPDie = getDIE(SP);
if (SPDie)
return SPDie;
- SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ if (SPDecl.isSubprogram())
+ // Add subprogram definitions to the CU die directly.
+ ContextDIE = CUDie.get();
// DW_TAG_inlined_subroutine may refer to this DIE.
- insertDIE(SP, SPDie);
+ SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP);
- DISubprogram SPDecl = SP.getFunctionDeclaration();
DIE *DeclDie = NULL;
- if (SPDecl.isSubprogram()) {
+ if (SPDecl.isSubprogram())
DeclDie = getOrCreateSubprogramDIE(SPDecl);
- }
-
- // Add to context owner.
- addToContextOwner(SPDie, SP.getContext());
// Add function template parameters.
addTemplateParams(*SPDie, SP.getTemplateParams());
- // Unfortunately this code needs to stay here instead of below the
- // AT_specification code in order to work around a bug in older
- // gdbs that requires the linkage name to resolve multiple template
- // functions.
- // TODO: Remove this set of code when we get rid of the old gdb
- // compatibility.
- StringRef LinkageName = SP.getLinkageName();
- if (!LinkageName.empty() && DD->useDarwinGDBCompat())
- addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
- GlobalValue::getRealLinkageName(LinkageName));
-
// If this DIE is going to refer declaration info using AT_specification
// then there is no need to add other attributes.
if (DeclDie) {
// Refer function declaration directly.
- addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
- DeclDie);
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, DeclDie);
return SPDie;
}
// Add the linkage name if we have one.
- if (!LinkageName.empty() && !DD->useDarwinGDBCompat())
+ StringRef LinkageName = SP.getLinkageName();
+ if (!LinkageName.empty())
addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
GlobalValue::getRealLinkageName(LinkageName));
@@ -1240,29 +1421,31 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
// Add the prototype if we have a prototype and we have a C like
// language.
+ uint16_t Language = getLanguage();
if (SP.isPrototyped() &&
- (Language == dwarf::DW_LANG_C89 ||
- Language == dwarf::DW_LANG_C99 ||
+ (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
Language == dwarf::DW_LANG_ObjC))
addFlag(SPDie, dwarf::DW_AT_prototyped);
- // Add Return Type.
DICompositeType SPTy = SP.getType();
assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type &&
"the type of a subprogram should be a subroutine");
DIArray Args = SPTy.getTypeArray();
- addType(SPDie, DIType(Args.getElement(0)));
+ // Add a return type. If this is a type like a C/C++ void type we don't add a
+ // return type.
+ if (Args.getElement(0))
+ addType(SPDie, DIType(Args.getElement(0)));
unsigned VK = SP.getVirtuality();
if (VK) {
addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
DIEBlock *Block = getDIEBlock();
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
- addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
- ContainingTypeMap.insert(std::make_pair(SPDie,
- SP.getContainingType()));
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(Block, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+ addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
+ ContainingTypeMap.insert(
+ std::make_pair(SPDie, resolve(SP.getContainingType())));
}
if (!SP.isDefinition()) {
@@ -1270,13 +1453,12 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
// Add arguments. Do not add arguments for subprogram definition. They will
// be handled while processing variables.
- for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
- DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
- DIType ATy = DIType(Args.getElement(i));
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie);
+ DIType ATy(Args.getElement(i));
addType(Arg, ATy);
if (ATy.isArtificial())
addFlag(Arg, dwarf::DW_AT_artificial);
- SPDie->addChild(Arg);
}
}
@@ -1324,16 +1506,16 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
}
/// createGlobalVariableDIE - create global variable DIE.
-void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
+void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
+
// Check for pre-existence.
- if (getDIE(N))
+ if (getDIE(GV))
return;
- DIGlobalVariable GV(N);
if (!GV.isGlobalVariable())
return;
- DIDescriptor GVContext = GV.getContext();
+ DIScope GVContext = GV.getContext();
DIType GTy = GV.getType();
// If this is a static data member definition, some attributes belong
@@ -1344,35 +1526,30 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
if (SDMDecl.Verify()) {
assert(SDMDecl.isStaticMember() && "Expected static member decl");
// We need the declaration DIE that is in the static member's class.
- // But that class might not exist in the DWARF yet.
- // Creating the class will create the static member decl DIE.
- getOrCreateContextDIE(SDMDecl.getContext());
- VariableDIE = getDIE(SDMDecl);
- assert(VariableDIE && "Static member decl has no context?");
+ VariableDIE = getOrCreateStaticMemberDIE(SDMDecl);
IsStaticMember = true;
}
// If this is not a static data member definition, create the variable
// DIE and add the initial set of attributes to it.
if (!VariableDIE) {
- VariableDIE = new DIE(GV.getTag());
+ // Construct the context before querying for the existence of the DIE in
+ // case such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(GVContext);
+
// Add to map.
- insertDIE(N, VariableDIE);
+ VariableDIE = createAndAddDIE(GV.getTag(), *ContextDIE, GV);
// Add name and type.
addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
addType(VariableDIE, GTy);
// Add scoping info.
- if (!GV.isLocalToUnit()) {
+ if (!GV.isLocalToUnit())
addFlag(VariableDIE, dwarf::DW_AT_external);
- addGlobalName(GV.getName(), VariableDIE);
- }
// Add line number info.
addSourceLine(VariableDIE, GV);
- // Add to context owner.
- addToContextOwner(VariableDIE, GVContext);
}
// Add location.
@@ -1382,7 +1559,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
if (isGlobalVariable) {
addToAccelTable = true;
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- const MCSymbol *Sym = Asm->Mang->getSymbol(GV.getGlobal());
+ const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal());
if (GV.getGlobal()->isThreadLocal()) {
// FIXME: Make this work with -gsplit-dwarf.
unsigned PointerSize = Asm->getDataLayout().getPointerSize();
@@ -1393,68 +1570,62 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
// Based on GCC's support for TLS:
if (!DD->useSplitDwarf()) {
// 1) Start with a constNu of the appropriate pointer size
- addUInt(Block, 0, dwarf::DW_FORM_data1,
+ addUInt(Block, dwarf::DW_FORM_data1,
PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u);
- // 2) containing the (relocated) address of the TLS variable
- addExpr(Block, 0, dwarf::DW_FORM_udata, Expr);
+ // 2) containing the (relocated) offset of the TLS variable
+ // within the module's TLS block.
+ addExpr(Block, dwarf::DW_FORM_udata, Expr);
} else {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
- addUInt(Block, 0, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr));
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(Block, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr));
}
- // 3) followed by a custom OP to tell the debugger about TLS (presumably)
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_lo_user);
+ // 3) followed by a custom OP to make the debugger do a TLS lookup.
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
} else
addOpAddress(Block, Sym);
// Do not create specification DIE if context is either compile unit
// or a subprogram.
if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
- !GVContext.isFile() && !isSubprogramContext(GVContext)) {
+ !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) {
// Create specification DIE.
- VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
- addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
- dwarf::DW_FORM_ref4, VariableDIE);
- addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+ VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *CUDie);
+ addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, VariableDIE);
+ addBlock(VariableSpecDIE, dwarf::DW_AT_location, Block);
// A static member's declaration is already flagged as such.
if (!SDMDecl.Verify())
addFlag(VariableDIE, dwarf::DW_AT_declaration);
- addDie(VariableSpecDIE);
} else {
- addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ addBlock(VariableDIE, dwarf::DW_AT_location, Block);
}
- // Add linkage name.
+ // Add the linkage name.
StringRef LinkageName = GV.getLinkageName();
- if (!LinkageName.empty()) {
+ if (!LinkageName.empty())
// From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
// TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
// TAG_variable.
- addString(IsStaticMember && VariableSpecDIE ?
- VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+ addString(IsStaticMember && VariableSpecDIE ? VariableSpecDIE
+ : VariableDIE,
+ dwarf::DW_AT_MIPS_linkage_name,
GlobalValue::getRealLinkageName(LinkageName));
- // In compatibility mode with older gdbs we put the linkage name on both
- // the TAG_variable DIE and on the TAG_member DIE.
- if (IsStaticMember && VariableSpecDIE && DD->useDarwinGDBCompat())
- addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
- GlobalValue::getRealLinkageName(LinkageName));
- }
} else if (const ConstantInt *CI =
- dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
+ dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
// AT_const_value was added when the static member was created. To avoid
// emitting AT_const_value multiple times, we only add AT_const_value when
// it is not a static member.
if (!IsStaticMember)
- addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
- } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+ addConstantValue(VariableDIE, CI, isUnsignedDIType(DD, GTy));
+ } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) {
addToAccelTable = true;
// GV is a merged global.
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
Value *Ptr = CE->getOperand(0);
- addOpAddress(Block, Asm->Mang->getSymbol(cast<GlobalValue>(Ptr)));
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end());
- addUInt(Block, 0, dwarf::DW_FORM_udata,
- Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
- addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ addOpAddress(Block, Asm->getSymbol(cast<GlobalValue>(Ptr)));
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
+ addUInt(Block, dwarf::DW_FORM_udata,
+ Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
+ addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addBlock(VariableDIE, dwarf::DW_AT_location, Block);
}
if (addToAccelTable) {
@@ -1466,13 +1637,17 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
addAccelName(GV.getLinkageName(), AddrDIE);
}
+
+ if (!GV.isLocalToUnit())
+ addGlobalName(GV.getName(), VariableSpecDIE ? VariableSpecDIE : VariableDIE,
+ GV.getContext());
}
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
DIE *IndexTy) {
- DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
- addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+ DIE *DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer);
+ addDIEEntry(DW_Subrange, dwarf::DW_AT_type, IndexTy);
// The LowerBound value defines the lower bounds which is typically zero for
// C/C++. The Count value is the number of elements. Values are 64 bit. If
@@ -1485,26 +1660,22 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
int64_t Count = SR.getCount();
if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
- addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, LowerBound);
+ addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound);
if (Count != -1 && Count != 0)
// FIXME: An unbounded array should reference the expression that defines
// the array.
- addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, LowerBound + Count - 1);
-
- Buffer.addChild(DW_Subrange);
+ addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, None,
+ LowerBound + Count - 1);
}
/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
- DICompositeType *CTy) {
- Buffer.setTag(dwarf::DW_TAG_array_type);
- if (CTy->isVector())
+void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
+ if (CTy.isVector())
addFlag(&Buffer, dwarf::DW_AT_GNU_vector);
- // Emit derived type.
- addType(&Buffer, CTy->getTypeDerivedFrom());
- DIArray Elements = CTy->getTypeArray();
+ // Emit the element type.
+ addType(&Buffer, resolve(CTy.getTypeDerivedFrom()));
// Get an anonymous type for index type.
// FIXME: This type should be passed down from the front end
@@ -1512,16 +1683,16 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
DIE *IdxTy = getIndexTyDie();
if (!IdxTy) {
// Construct an anonymous type for index type.
- IdxTy = new DIE(dwarf::DW_TAG_base_type);
+ IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *CUDie.get());
addString(IdxTy, dwarf::DW_AT_name, "int");
- addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+ addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int32_t));
addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
dwarf::DW_ATE_signed);
- addDie(IdxTy);
setIndexTyDie(IdxTy);
}
// Add subranges to array type.
+ DIArray Elements = CTy.getTypeArray();
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIDescriptor Element = Elements.getElement(i);
if (Element.getTag() == dwarf::DW_TAG_subrange_type)
@@ -1529,168 +1700,180 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
}
}
-/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
- DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
- StringRef Name = ETy.getName();
- addString(Enumerator, dwarf::DW_AT_name, Name);
- int64_t Value = ETy.getEnumValue();
- addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
- return Enumerator;
+/// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType.
+void CompileUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) {
+ DIArray Elements = CTy.getTypeArray();
+
+ // Add enumerators to enumeration type.
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIEnumerator Enum(Elements.getElement(i));
+ if (Enum.isEnumerator()) {
+ DIE *Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
+ StringRef Name = Enum.getName();
+ addString(Enumerator, dwarf::DW_AT_name, Name);
+ int64_t Value = Enum.getEnumValue();
+ addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+ }
+ }
+ DIType DTy = resolve(CTy.getTypeDerivedFrom());
+ if (DTy) {
+ addType(&Buffer, DTy);
+ addFlag(&Buffer, dwarf::DW_AT_enum_class);
+ }
}
/// constructContainingTypeDIEs - Construct DIEs for types that contain
/// vtables.
void CompileUnit::constructContainingTypeDIEs() {
for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
- CE = ContainingTypeMap.end(); CI != CE; ++CI) {
+ CE = ContainingTypeMap.end();
+ CI != CE; ++CI) {
DIE *SPDie = CI->first;
- const MDNode *N = CI->second;
- if (!N) continue;
- DIE *NDie = getDIE(N);
- if (!NDie) continue;
- addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+ DIDescriptor D(CI->second);
+ if (!D)
+ continue;
+ DIE *NDie = getDIE(D);
+ if (!NDie)
+ continue;
+ addDIEEntry(SPDie, dwarf::DW_AT_containing_type, NDie);
}
}
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
-DIE *CompileUnit::constructVariableDIE(DbgVariable *DV,
- bool isScopeAbstract) {
- StringRef Name = DV->getName();
-
- // Translate tag to proper Dwarf tag.
- unsigned Tag = DV->getTag();
+DIE *CompileUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) {
+ StringRef Name = DV.getName();
// Define variable debug information entry.
- DIE *VariableDie = new DIE(Tag);
- DbgVariable *AbsVar = DV->getAbstractVariable();
+ DIE *VariableDie = new DIE(DV.getTag());
+ DbgVariable *AbsVar = DV.getAbstractVariable();
DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL;
if (AbsDIE)
- addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
- dwarf::DW_FORM_ref4, AbsDIE);
+ addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, AbsDIE);
else {
- addString(VariableDie, dwarf::DW_AT_name, Name);
- addSourceLine(VariableDie, DV->getVariable());
- addType(VariableDie, DV->getType());
+ if (!Name.empty())
+ addString(VariableDie, dwarf::DW_AT_name, Name);
+ addSourceLine(VariableDie, DV.getVariable());
+ addType(VariableDie, DV.getType());
}
- if (DV->isArtificial())
+ if (DV.isArtificial())
addFlag(VariableDie, dwarf::DW_AT_artificial);
if (isScopeAbstract) {
- DV->setDIE(VariableDie);
+ DV.setDIE(VariableDie);
return VariableDie;
}
// Add variable address.
- unsigned Offset = DV->getDotDebugLocOffset();
+ unsigned Offset = DV.getDotDebugLocOffset();
if (Offset != ~0U) {
- addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4,
+ addLabel(VariableDie, dwarf::DW_AT_location,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
Asm->GetTempSymbol("debug_loc", Offset));
- DV->setDIE(VariableDie);
+ DV.setDIE(VariableDie);
return VariableDie;
}
// Check if variable is described by a DBG_VALUE instruction.
- if (const MachineInstr *DVInsn = DV->getMInsn()) {
+ if (const MachineInstr *DVInsn = DV.getMInsn()) {
assert(DVInsn->getNumOperands() == 3);
if (DVInsn->getOperand(0).isReg()) {
const MachineOperand RegOp = DVInsn->getOperand(0);
// If the second operand is an immediate, this is an indirect value.
if (DVInsn->getOperand(1).isImm()) {
- MachineLocation Location(RegOp.getReg(), DVInsn->getOperand(1).getImm());
- addVariableAddress(*DV, VariableDie, Location);
+ MachineLocation Location(RegOp.getReg(),
+ DVInsn->getOperand(1).getImm());
+ addVariableAddress(DV, VariableDie, Location);
} else if (RegOp.getReg())
- addVariableAddress(*DV, VariableDie, MachineLocation(RegOp.getReg()));
+ addVariableAddress(DV, VariableDie, MachineLocation(RegOp.getReg()));
} else if (DVInsn->getOperand(0).isImm())
- addConstantValue(VariableDie, DVInsn->getOperand(0), DV->getType());
+ addConstantValue(VariableDie, DVInsn->getOperand(0), DV.getType());
else if (DVInsn->getOperand(0).isFPImm())
addConstantFPValue(VariableDie, DVInsn->getOperand(0));
else if (DVInsn->getOperand(0).isCImm())
addConstantValue(VariableDie, DVInsn->getOperand(0).getCImm(),
- DV->getType().isUnsignedDIType());
+ isUnsignedDIType(DD, DV.getType()));
- DV->setDIE(VariableDie);
+ DV.setDIE(VariableDie);
return VariableDie;
} else {
// .. else use frame index.
- int FI = DV->getFrameIndex();
+ int FI = DV.getFrameIndex();
if (FI != ~0) {
unsigned FrameReg = 0;
const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
- int Offset =
- TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
MachineLocation Location(FrameReg, Offset);
- addVariableAddress(*DV, VariableDie, Location);
+ addVariableAddress(DV, VariableDie, Location);
}
}
- DV->setDIE(VariableDie);
+ DV.setDIE(VariableDie);
return VariableDie;
}
-/// createMemberDIE - Create new member DIE.
-DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
- DIE *MemberDie = new DIE(DT.getTag());
+/// constructMemberDIE - Construct member DIE from DIDerivedType.
+void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
+ DIE *MemberDie = createAndAddDIE(DT.getTag(), Buffer);
StringRef Name = DT.getName();
if (!Name.empty())
addString(MemberDie, dwarf::DW_AT_name, Name);
- addType(MemberDie, DT.getTypeDerivedFrom());
+ addType(MemberDie, resolve(DT.getTypeDerivedFrom()));
addSourceLine(MemberDie, DT);
DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
- addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-
- uint64_t Size = DT.getSizeInBits();
- uint64_t FieldSize = DT.getOriginalTypeSize();
-
- if (Size != FieldSize) {
- // Handle bitfield.
- addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
- addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
-
- uint64_t Offset = DT.getOffsetInBits();
- uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
- uint64_t HiMark = (Offset + FieldSize) & AlignMask;
- uint64_t FieldOffset = (HiMark - FieldSize);
- Offset -= FieldOffset;
-
- // Maybe we need to work from the other end.
- if (Asm->getDataLayout().isLittleEndian())
- Offset = FieldSize - (Offset + Size);
- addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
-
- // Here WD_AT_data_member_location points to the anonymous
- // field that includes this bit field.
- addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
-
- } else
- // This is not a bitfield.
- addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+ addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- if (DT.getTag() == dwarf::DW_TAG_inheritance
- && DT.isVirtual()) {
+ if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) {
// For C++, virtual base classes are not at fixed offset. Use following
// expression to extract appropriate offset from vtable.
// BaseAddr = ObAddr + *((*ObAddr) - Offset)
DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
- addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
- addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
- addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
- addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
- addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
- addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
-
- addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
- VBaseLocationDie);
- } else
- addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+ addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+ addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+ addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+ addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie);
+ } else {
+ uint64_t Size = DT.getSizeInBits();
+ uint64_t FieldSize = getBaseTypeSize(DD, DT);
+ uint64_t OffsetInBytes;
+
+ if (Size != FieldSize) {
+ // Handle bitfield.
+ addUInt(MemberDie, dwarf::DW_AT_byte_size, None,
+ getBaseTypeSize(DD, DT) >> 3);
+ addUInt(MemberDie, dwarf::DW_AT_bit_size, None, DT.getSizeInBits());
+
+ uint64_t Offset = DT.getOffsetInBits();
+ uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+ uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+ uint64_t FieldOffset = (HiMark - FieldSize);
+ Offset -= FieldOffset;
+
+ // Maybe we need to work from the other end.
+ if (Asm->getDataLayout().isLittleEndian())
+ Offset = FieldSize - (Offset + Size);
+ addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset);
+
+ // Here WD_AT_data_member_location points to the anonymous
+ // field that includes this bit field.
+ OffsetInBytes = FieldOffset >> 3;
+ } else
+ // This is not a bitfield.
+ OffsetInBytes = DT.getOffsetInBits() >> 3;
+ addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, OffsetInBytes);
+ }
if (DT.isProtected())
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
@@ -1714,17 +1897,26 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
if (DT.isArtificial())
addFlag(MemberDie, dwarf::DW_AT_artificial);
-
- return MemberDie;
}
-/// createStaticMemberDIE - Create new DIE for C++ static member.
-DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) {
+/// getOrCreateStaticMemberDIE - Create new DIE for C++ static member.
+DIE *CompileUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
if (!DT.Verify())
return NULL;
- DIE *StaticMemberDIE = new DIE(DT.getTag());
- DIType Ty = DT.getTypeDerivedFrom();
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(resolve(DT.getContext()));
+ assert(dwarf::isType(ContextDIE->getTag()) &&
+ "Static member should belong to a type.");
+
+ DIE *StaticMemberDIE = getDIE(DT);
+ if (StaticMemberDIE)
+ return StaticMemberDIE;
+
+ StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT);
+
+ DIType Ty = resolve(DT.getTypeDerivedFrom());
addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName());
addType(StaticMemberDIE, Ty);
@@ -1745,10 +1937,20 @@ DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) {
dwarf::DW_ACCESS_public);
if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant()))
- addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType());
+ addConstantValue(StaticMemberDIE, CI, isUnsignedDIType(DD, Ty));
if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant()))
addConstantFPValue(StaticMemberDIE, CFP);
- insertDIE(DT, StaticMemberDIE);
return StaticMemberDIE;
}
+
+void CompileUnit::emitHeader(const MCSection *ASection,
+ const MCSymbol *ASectionSym) {
+ Asm->OutStreamer.AddComment("DWARF version number");
+ Asm->EmitInt16(DD->getDwarfVersion());
+ Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()),
+ ASectionSym);
+ Asm->OutStreamer.AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 3908b37..d782c88 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -15,7 +15,9 @@
#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
#include "DIE.h"
+#include "DwarfDebug.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/DebugInfo.h"
@@ -23,8 +25,6 @@
namespace llvm {
-class DwarfDebug;
-class DwarfUnits;
class MachineLocation;
class MachineOperand;
class ConstantInt;
@@ -39,11 +39,10 @@ class CompileUnit {
///
unsigned UniqueID;
- /// Language - The DW_AT_language of the compile unit
- ///
- unsigned Language;
+ /// Node - MDNode for the compile unit.
+ DICompileUnit Node;
- /// Die - Compile unit debug information entry.
+ /// CUDie - Compile unit debug information entry.
///
const OwningPtr<DIE> CUDie;
@@ -67,18 +66,18 @@ class CompileUnit {
/// GlobalNames - A map of globally visible named entities for this unit.
///
- StringMap<DIE*> GlobalNames;
+ StringMap<DIE *> GlobalNames;
/// GlobalTypes - A map of globally visible types for this unit.
///
- StringMap<DIE*> GlobalTypes;
+ StringMap<DIE *> GlobalTypes;
/// AccelNames - A map of names for the name accelerator table.
///
- StringMap<std::vector<DIE*> > AccelNames;
- StringMap<std::vector<DIE*> > AccelObjC;
- StringMap<std::vector<DIE*> > AccelNamespace;
- StringMap<std::vector<std::pair<DIE*, unsigned> > > AccelTypes;
+ StringMap<std::vector<DIE *> > AccelNames;
+ StringMap<std::vector<DIE *> > AccelObjC;
+ StringMap<std::vector<DIE *> > AccelNamespace;
+ StringMap<std::vector<std::pair<DIE *, unsigned> > > AccelTypes;
/// DIEBlocks - A list of all the DIEBlocks in use.
std::vector<DIEBlock *> DIEBlocks;
@@ -88,165 +87,154 @@ class CompileUnit {
/// corresponds to the MDNode mapped with the subprogram DIE.
DenseMap<DIE *, const MDNode *> ContainingTypeMap;
- /// Offset of the CUDie from beginning of debug info section.
- unsigned DebugInfoOffset;
+ // DIEValueAllocator - All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
- /// getLowerBoundDefault - Return the default lower bound for an array. If the
- /// DWARF version doesn't handle the language, return -1.
- int64_t getDefaultLowerBound() const;
+ // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently.
+ DIEInteger *DIEIntegerOne;
public:
- CompileUnit(unsigned UID, unsigned L, DIE *D, const MDNode *N, AsmPrinter *A,
+ CompileUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A,
DwarfDebug *DW, DwarfUnits *DWU);
~CompileUnit();
// Accessors.
- unsigned getUniqueID() const { return UniqueID; }
- unsigned getLanguage() const { return Language; }
- DIE* getCUDie() const { return CUDie.get(); }
- unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
- const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; }
- const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
-
- const StringMap<std::vector<DIE*> > &getAccelNames() const {
+ unsigned getUniqueID() const { return UniqueID; }
+ uint16_t getLanguage() const { return Node.getLanguage(); }
+ DICompileUnit getNode() const { return Node; }
+ DIE *getCUDie() const { return CUDie.get(); }
+ const StringMap<DIE *> &getGlobalNames() const { return GlobalNames; }
+ const StringMap<DIE *> &getGlobalTypes() const { return GlobalTypes; }
+
+ const StringMap<std::vector<DIE *> > &getAccelNames() const {
return AccelNames;
}
- const StringMap<std::vector<DIE*> > &getAccelObjC() const {
+ const StringMap<std::vector<DIE *> > &getAccelObjC() const {
return AccelObjC;
}
- const StringMap<std::vector<DIE*> > &getAccelNamespace() const {
+ const StringMap<std::vector<DIE *> > &getAccelNamespace() const {
return AccelNamespace;
}
- const StringMap<std::vector<std::pair<DIE*, unsigned > > >
- &getAccelTypes() const {
+ const StringMap<std::vector<std::pair<DIE *, unsigned> > > &
+ getAccelTypes() const {
return AccelTypes;
}
+ unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
+
/// hasContent - Return true if this compile unit has something to write out.
///
bool hasContent() const { return !CUDie->getChildren().empty(); }
+ /// getParentContextString - Get a string containing the language specific
+ /// context for a global name.
+ std::string getParentContextString(DIScope Context) const;
+
/// addGlobalName - Add a new global entity to the compile unit.
///
- void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; }
+ void addGlobalName(StringRef Name, DIE *Die, DIScope Context);
/// addGlobalType - Add a new global type to the compile unit.
///
void addGlobalType(DIType Ty);
+ /// addPubTypes - Add a set of types from the subprogram to the global types.
+ void addPubTypes(DISubprogram SP);
/// addAccelName - Add a new name to the name accelerator table.
- void addAccelName(StringRef Name, DIE *Die) {
- std::vector<DIE*> &DIEs = AccelNames[Name];
- DIEs.push_back(Die);
- }
- void addAccelObjC(StringRef Name, DIE *Die) {
- std::vector<DIE*> &DIEs = AccelObjC[Name];
- DIEs.push_back(Die);
- }
- void addAccelNamespace(StringRef Name, DIE *Die) {
- std::vector<DIE*> &DIEs = AccelNamespace[Name];
- DIEs.push_back(Die);
- }
- void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) {
- std::vector<std::pair<DIE *, unsigned> > &DIEs = AccelTypes[Name];
- DIEs.push_back(Die);
- }
+ void addAccelName(StringRef Name, DIE *Die);
- /// getDIE - Returns the debug information entry map slot for the
- /// specified debug variable.
- DIE *getDIE(const MDNode *N) const { return MDNodeToDieMap.lookup(N); }
+ /// addAccelObjC - Add a new name to the ObjC accelerator table.
+ void addAccelObjC(StringRef Name, DIE *Die);
- DIEBlock *getDIEBlock() {
- return new (DIEValueAllocator) DIEBlock();
- }
+ /// addAccelNamespace - Add a new name to the namespace accelerator table.
+ void addAccelNamespace(StringRef Name, DIE *Die);
- /// insertDIE - Insert DIE into the map.
- void insertDIE(const MDNode *N, DIE *D) {
- MDNodeToDieMap.insert(std::make_pair(N, D));
- }
+ /// addAccelType - Add a new type to the type accelerator table.
+ void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die);
- /// getDIEEntry - Returns the debug information entry for the specified
- /// debug variable.
- DIEEntry *getDIEEntry(const MDNode *N) const {
- return MDNodeToDIEEntryMap.lookup(N);
- }
+ /// getDIE - Returns the debug information entry map slot for the
+ /// specified debug variable. We delegate the request to DwarfDebug
+ /// when the MDNode can be part of the type system, since DIEs for
+ /// the type system can be shared across CUs and the mappings are
+ /// kept in DwarfDebug.
+ DIE *getDIE(DIDescriptor D) const;
- /// insertDIEEntry - Insert debug information entry into the map.
- void insertDIEEntry(const MDNode *N, DIEEntry *E) {
- MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
- }
+ DIEBlock *getDIEBlock() { return new (DIEValueAllocator) DIEBlock(); }
+
+ /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug
+ /// when the MDNode can be part of the type system, since DIEs for
+ /// the type system can be shared across CUs and the mappings are
+ /// kept in DwarfDebug.
+ void insertDIE(DIDescriptor Desc, DIE *D);
/// addDie - Adds or interns the DIE to the compile unit.
///
- void addDie(DIE *Buffer) {
- this->CUDie->addChild(Buffer);
- }
-
- // getIndexTyDie - Get an anonymous type for index type.
- DIE *getIndexTyDie() {
- return IndexTyDie;
- }
-
- // setIndexTyDie - Set D as anonymous type for index which can be reused
- // later.
- void setIndexTyDie(DIE *D) {
- IndexTyDie = D;
- }
+ void addDie(DIE *Buffer) { CUDie->addChild(Buffer); }
/// addFlag - Add a flag that is true to the DIE.
- void addFlag(DIE *Die, unsigned Attribute);
+ void addFlag(DIE *Die, dwarf::Attribute Attribute);
/// addUInt - Add an unsigned integer attribute data and value.
///
- void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+ void addUInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
+ uint64_t Integer);
+
+ void addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer);
/// addSInt - Add an signed integer attribute data and value.
///
- void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+ void addSInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
+ int64_t Integer);
+
+ void addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, int64_t Integer);
/// addString - Add a string attribute data and value.
///
- void addString(DIE *Die, unsigned Attribute, const StringRef Str);
+ void addString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str);
/// addLocalString - Add a string attribute data and value.
///
- void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str);
+ void addLocalString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str);
/// addExpr - Add a Dwarf expression attribute data and value.
///
- void addExpr(DIE *Die, unsigned Attribute, unsigned Form,
- const MCExpr *Expr);
+ void addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr);
/// addLabel - Add a Dwarf label attribute data and value.
///
- void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+ void addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form,
const MCSymbol *Label);
+ void addLabel(DIEBlock *Die, dwarf::Form Form, const MCSymbol *Label);
+
/// addLabelAddress - Add a dwarf label attribute data and value using
/// either DW_FORM_addr or DW_FORM_GNU_addr_index.
///
- void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label);
+ void addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label);
/// addOpAddress - Add a dwarf op address data and value using the
/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
///
- void addOpAddress(DIE *Die, const MCSymbol *Label);
- void addOpAddress(DIE *Die, const MCSymbolRefExpr *Label);
+ void addOpAddress(DIEBlock *Die, const MCSymbol *Label);
/// addDelta - Add a label delta attribute data and value.
///
- void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
- const MCSymbol *Hi, const MCSymbol *Lo);
+ void addDelta(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Hi,
+ const MCSymbol *Lo);
/// addDIEEntry - Add a DIE attribute data and value.
///
- void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
+ void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry);
+
+ /// addDIEEntry - Add a DIE attribute data and value.
+ ///
+ void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIEEntry *Entry);
/// addBlock - Add block data.
///
- void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+ void addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block);
/// addSourceLine - Add location information to specified debug information
/// entry.
@@ -259,8 +247,8 @@ public:
/// addAddress - Add an address attribute to a die based on the location
/// provided.
- void addAddress(DIE *Die, unsigned Attribute,
- const MachineLocation &Location, bool Indirect = false);
+ void addAddress(DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location,
+ bool Indirect = false);
/// addConstantValue - Add constant value entry in variable DIE.
void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
@@ -275,17 +263,17 @@ public:
void addTemplateParams(DIE &Buffer, DIArray TParams);
/// addRegisterOp - Add register operand.
- void addRegisterOp(DIE *TheDie, unsigned Reg);
+ void addRegisterOp(DIEBlock *TheDie, unsigned Reg);
/// addRegisterOffset - Add register offset.
- void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset);
+ void addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset);
/// addComplexAddress - Start with the address based on the location provided,
/// and generate the DWARF information necessary to find the actual variable
/// (navigating the extra location information encoded in the type) based on
/// the starting location. Add the DWARF information to the die.
///
- void addComplexAddress(const DbgVariable &DV, DIE *Die, unsigned Attribute,
+ void addComplexAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute,
const MachineLocation &Location);
// FIXME: Should be reformulated in terms of addComplexAddress.
@@ -295,7 +283,7 @@ public:
/// starting location. Add the DWARF information to the die. Obsolete,
/// please use addComplexAddress instead.
///
- void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, unsigned Attribute,
+ void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute,
const MachineLocation &Location);
/// addVariableAddress - Add DW_AT_location attribute for a
@@ -303,13 +291,10 @@ public:
void addVariableAddress(const DbgVariable &DV, DIE *Die,
MachineLocation Location);
- /// addToContextOwner - Add Die into the list of its context owner's children.
- void addToContextOwner(DIE *Die, DIDescriptor Context);
-
/// addType - Add a new type attribute to the specified entity. This takes
/// and attribute parameter because DW_AT_friend attributes are also
/// type references.
- void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type);
+ void addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute = dwarf::DW_AT_type);
/// getOrCreateNameSpace - Create a DIE for DINameSpace.
DIE *getOrCreateNameSpace(DINameSpace NS);
@@ -321,66 +306,103 @@ public:
/// given DIType.
DIE *getOrCreateTypeDIE(const MDNode *N);
- /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
- /// for the given DITemplateTypeParameter.
- DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+ /// getOrCreateContextDIE - Get context owner's DIE.
+ DIE *getOrCreateContextDIE(DIScope Context);
- /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create
- /// new DIE for the given DITemplateValueParameter.
- DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+ /// createGlobalVariableDIE - create global variable DIE.
+ void createGlobalVariableDIE(DIGlobalVariable GV);
- /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
- /// information entry.
- DIEEntry *createDIEEntry(DIE *Entry);
+ /// constructContainingTypeDIEs - Construct DIEs for types that contain
+ /// vtables.
+ void constructContainingTypeDIEs();
- /// createGlobalVariableDIE - create global variable DIE.
- void createGlobalVariableDIE(const MDNode *N);
+ /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+ DIE *constructVariableDIE(DbgVariable &DV, bool isScopeAbstract);
+
+ /// Create a DIE with the given Tag, add the DIE to its parent, and
+ /// call insertDIE if MD is not null.
+ DIE *createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N = DIDescriptor());
+
+ /// Compute the size of a header for this unit, not including the initial
+ /// length field.
+ unsigned getHeaderSize() const {
+ return sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+ }
- void addPubTypes(DISubprogram SP);
+ /// Emit the header for this unit, not including the initial length field.
+ void emitHeader(const MCSection *ASection, const MCSymbol *ASectionSym);
+private:
/// constructTypeDIE - Construct basic type die from DIBasicType.
- void constructTypeDIE(DIE &Buffer,
- DIBasicType BTy);
+ void constructTypeDIE(DIE &Buffer, DIBasicType BTy);
/// constructTypeDIE - Construct derived type die from DIDerivedType.
- void constructTypeDIE(DIE &Buffer,
- DIDerivedType DTy);
+ void constructTypeDIE(DIE &Buffer, DIDerivedType DTy);
/// constructTypeDIE - Construct type DIE from DICompositeType.
- void constructTypeDIE(DIE &Buffer,
- DICompositeType CTy);
+ void constructTypeDIE(DIE &Buffer, DICompositeType CTy);
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
- void constructArrayTypeDIE(DIE &Buffer,
- DICompositeType *CTy);
+ void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy);
/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
- DIE *constructEnumTypeDIE(DIEnumerator ETy);
+ void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy);
- /// constructContainingTypeDIEs - Construct DIEs for types that contain
- /// vtables.
- void constructContainingTypeDIEs();
+ /// constructMemberDIE - Construct member DIE from DIDerivedType.
+ void constructMemberDIE(DIE &Buffer, DIDerivedType DT);
- /// constructVariableDIE - Construct a DIE for the given DbgVariable.
- DIE *constructVariableDIE(DbgVariable *DV, bool isScopeAbstract);
+ /// constructTemplateTypeParameterDIE - Construct new DIE for the given
+ /// DITemplateTypeParameter.
+ void constructTemplateTypeParameterDIE(DIE &Buffer,
+ DITemplateTypeParameter TP);
- /// createMemberDIE - Create new member DIE.
- DIE *createMemberDIE(DIDerivedType DT);
+ /// constructTemplateValueParameterDIE - Construct new DIE for the given
+ /// DITemplateValueParameter.
+ void constructTemplateValueParameterDIE(DIE &Buffer,
+ DITemplateValueParameter TVP);
- /// createStaticMemberDIE - Create new static data member DIE.
- DIE *createStaticMemberDIE(DIDerivedType DT);
+ /// getOrCreateStaticMemberDIE - Create new static data member DIE.
+ DIE *getOrCreateStaticMemberDIE(DIDerivedType DT);
- /// getOrCreateContextDIE - Get context owner's DIE.
- DIE *getOrCreateContextDIE(DIDescriptor Context);
+ /// Offset of the CUDie from beginning of debug info section.
+ unsigned DebugInfoOffset;
-private:
+ /// getLowerBoundDefault - Return the default lower bound for an array. If the
+ /// DWARF version doesn't handle the language, return -1.
+ int64_t getDefaultLowerBound() const;
- // DIEValueAllocator - All DIEValues are allocated through this allocator.
- BumpPtrAllocator DIEValueAllocator;
- DIEInteger *DIEIntegerOne;
+ /// getDIEEntry - Returns the debug information entry for the specified
+ /// debug variable.
+ DIEEntry *getDIEEntry(const MDNode *N) const {
+ return MDNodeToDIEEntryMap.lookup(N);
+ }
+
+ /// insertDIEEntry - Insert debug information entry into the map.
+ void insertDIEEntry(const MDNode *N, DIEEntry *E) {
+ MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
+ }
+
+ // getIndexTyDie - Get an anonymous type for index type.
+ DIE *getIndexTyDie() { return IndexTyDie; }
+
+ // setIndexTyDie - Set D as anonymous type for index which can be reused
+ // later.
+ void setIndexTyDie(DIE *D) { IndexTyDie = D; }
+
+ /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+ /// information entry.
+ DIEEntry *createDIEEntry(DIE *Entry);
+
+ /// resolve - Look in the DwarfDebug map for the MDNode that
+ /// corresponds to the reference.
+ template <typename T> T resolve(DIRef<T> Ref) const {
+ return DD->resolve(Ref);
+ }
};
} // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 979c0c3..24e2c05 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "dwarfdebug"
#include "DwarfDebug.h"
#include "DIE.h"
+#include "DIEHash.h"
#include "DwarfAccelTable.h"
#include "DwarfCompileUnit.h"
#include "llvm/ADT/STLExtras.h"
@@ -34,6 +35,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/MD5.h"
@@ -57,15 +59,20 @@ static cl::opt<bool> UnknownLocations(
cl::init(false));
static cl::opt<bool>
-GenerateDwarfPubNamesSection("generate-dwarf-pubnames", cl::Hidden,
- cl::init(false),
- cl::desc("Generate DWARF pubnames section"));
-
-static cl::opt<bool>
GenerateODRHash("generate-odr-hash", cl::Hidden,
cl::desc("Add an ODR hash to external type DIEs."),
cl::init(false));
+static cl::opt<bool>
+GenerateCUHash("generate-cu-hash", cl::Hidden,
+ cl::desc("Add the CU hash as the dwo_id."),
+ cl::init(false));
+
+static cl::opt<bool>
+GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden,
+ cl::desc("Generate GNU-style pubnames and pubtypes"),
+ cl::init(false));
+
namespace {
enum DefaultOnOff {
Default,
@@ -83,14 +90,6 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
cl::init(Default));
static cl::opt<DefaultOnOff>
-DarwinGDBCompat("darwin-gdb-compat", cl::Hidden,
- cl::desc("Compatibility with Darwin gdb."),
- cl::values(clEnumVal(Default, "Default for platform"),
- clEnumVal(Enable, "Enabled"),
- clEnumVal(Disable, "Disabled"), clEnumValEnd),
- cl::init(Default));
-
-static cl::opt<DefaultOnOff>
SplitDwarf("split-dwarf", cl::Hidden,
cl::desc("Output prototype dwarf split debug info."),
cl::values(clEnumVal(Default, "Default for platform"),
@@ -98,16 +97,16 @@ SplitDwarf("split-dwarf", cl::Hidden,
clEnumVal(Disable, "Disabled"), clEnumValEnd),
cl::init(Default));
-namespace {
- const char *const DWARFGroupName = "DWARF Emission";
- const char *const DbgTimerName = "DWARF Debug Writer";
+static cl::opt<DefaultOnOff>
+DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
+ cl::desc("Generate DWARF pubnames and pubtypes sections"),
+ cl::values(clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"), clEnumValEnd),
+ cl::init(Default));
- struct CompareFirst {
- template <typename T> bool operator()(const T &lhs, const T &rhs) const {
- return lhs.first < rhs.first;
- }
- };
-} // end anonymous namespace
+static const char *const DWARFGroupName = "DWARF Emission";
+static const char *const DbgTimerName = "DWARF Debug Writer";
//===----------------------------------------------------------------------===//
@@ -117,6 +116,13 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
namespace llvm {
+/// resolve - Look in the DwarfDebug map for the MDNode that
+/// corresponds to the reference.
+template <typename T>
+T DbgVariable::resolve(DIRef<T> Ref) const {
+ return DD->resolve(Ref);
+}
+
DIType DbgVariable::getType() const {
DIType Ty = Var.getType();
// FIXME: isBlockByrefVariable should be reformulated in terms of complex
@@ -147,21 +153,16 @@ DIType DbgVariable::getType() const {
the pointers and __Block_byref_x_VarName struct to find the actual
value of the variable. The function addBlockByrefType does this. */
DIType subType = Ty;
- unsigned tag = Ty.getTag();
+ uint16_t tag = Ty.getTag();
- if (tag == dwarf::DW_TAG_pointer_type) {
- DIDerivedType DTy = DIDerivedType(Ty);
- subType = DTy.getTypeDerivedFrom();
- }
-
- DICompositeType blockStruct = DICompositeType(subType);
- DIArray Elements = blockStruct.getTypeArray();
+ if (tag == dwarf::DW_TAG_pointer_type)
+ subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom());
+ DIArray Elements = DICompositeType(subType).getTypeArray();
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
- DIDescriptor Element = Elements.getElement(i);
- DIDerivedType DT = DIDerivedType(Element);
+ DIDerivedType DT(Elements.getElement(i));
if (getName() == DT.getName())
- return (DT.getTypeDerivedFrom());
+ return (resolve(DT.getTypeDerivedFrom()));
}
}
return Ty;
@@ -182,10 +183,10 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
AbbreviationsSet(InitAbbreviationsSetSize),
SourceIdMap(DIEValueAllocator),
PrevLabel(NULL), GlobalCUIndexCount(0),
- InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string",
+ InfoHolder(A, &AbbreviationsSet, Abbreviations, "info_string",
DIEValueAllocator),
SkeletonAbbrevSet(InitAbbreviationsSetSize),
- SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string",
+ SkeletonHolder(A, &SkeletonAbbrevSet, SkeletonAbbrevs, "skel_string",
DIEValueAllocator) {
DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
@@ -195,29 +196,24 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
FunctionBeginSym = FunctionEndSym = 0;
- // Turn on accelerator tables and older gdb compatibility
- // for Darwin.
+ // Turn on accelerator tables for Darwin by default, pubnames by
+ // default for non-Darwin, and handle split dwarf.
bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin();
- if (DarwinGDBCompat == Default) {
- if (IsDarwin)
- IsDarwinGDBCompat = true;
- else
- IsDarwinGDBCompat = false;
- } else
- IsDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false;
- if (DwarfAccelTables == Default) {
- if (IsDarwin)
- HasDwarfAccelTables = true;
- else
- HasDwarfAccelTables = false;
- } else
- HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false;
+ if (DwarfAccelTables == Default)
+ HasDwarfAccelTables = IsDarwin;
+ else
+ HasDwarfAccelTables = DwarfAccelTables == Enable;
if (SplitDwarf == Default)
HasSplitDwarf = false;
else
- HasSplitDwarf = SplitDwarf == Enable ? true : false;
+ HasSplitDwarf = SplitDwarf == Enable;
+
+ if (DwarfPubSections == Default)
+ HasDwarfPubSections = !IsDarwin;
+ else
+ HasDwarfPubSections = DwarfPubSections == Enable;
DwarfVersion = getDwarfVersionFromModule(MMI->getModule());
@@ -226,8 +222,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
beginModule();
}
}
-DwarfDebug::~DwarfDebug() {
-}
// Switch to the specified MCSection and emit an assembler
// temporary label to it if SymbolStem is specified.
@@ -285,10 +279,10 @@ void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) {
// If it's newly added.
if (InSet == &Abbrev) {
// Add to abbreviation list.
- Abbreviations->push_back(&Abbrev);
+ Abbreviations.push_back(&Abbrev);
// Assign the vector position + 1 as its number.
- Abbrev.setNumber(Abbreviations->size());
+ Abbrev.setNumber(Abbreviations.size());
} else {
// Assign existing abbreviation number.
Abbrev.setNumber(InSet->getNumber());
@@ -302,12 +296,7 @@ static bool isObjCClass(StringRef Name) {
static bool hasObjCCategory(StringRef Name) {
if (!isObjCClass(Name)) return false;
- size_t pos = Name.find(')');
- if (pos != std::string::npos) {
- if (Name[pos+1] != ' ') return false;
- return true;
- }
- return false;
+ return Name.find(") ") != StringRef::npos;
}
static void getObjCClassCategory(StringRef In, StringRef &Class,
@@ -327,11 +316,20 @@ static StringRef getObjCMethodName(StringRef In) {
return In.slice(In.find(' ') + 1, In.find(']'));
}
+// Helper for sorting sections into a stable output order.
+static bool SectionSort(const MCSection *A, const MCSection *B) {
+ std::string LA = (A ? A->getLabelBeginName() : "");
+ std::string LB = (B ? B->getLabelBeginName() : "");
+ return LA < LB;
+}
+
// Add the various names to the Dwarf accelerator table names.
+// TODO: Determine whether or not we should add names for programs
+// that do not have a DW_AT_name or DW_AT_linkage_name field - this
+// is only slightly different than the lookup of non-standard ObjC names.
static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
DIE* Die) {
if (!SP.isDefinition()) return;
-
TheCU->addAccelName(SP.getName(), Die);
// If the linkage name is different than the name, go ahead and output
@@ -352,30 +350,34 @@ static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
}
}
+/// isSubprogramContext - Return true if Context is either a subprogram
+/// or another context nested inside a subprogram.
+bool DwarfDebug::isSubprogramContext(const MDNode *Context) {
+ if (!Context)
+ return false;
+ DIDescriptor D(Context);
+ if (D.isSubprogram())
+ return true;
+ if (D.isType())
+ return isSubprogramContext(resolve(DIType(Context).getContext()));
+ return false;
+}
+
// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
// and DW_AT_high_pc attributes. If there are global variables in this
// scope then create and insert DIEs for these variables.
-DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
- const MDNode *SPNode) {
- DIE *SPDie = SPCU->getDIE(SPNode);
+DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) {
+ DIE *SPDie = SPCU->getDIE(SP);
assert(SPDie && "Unable to find subprogram DIE!");
- DISubprogram SP(SPNode);
// If we're updating an abstract DIE, then we will be adding the children and
// object pointer later on. But what we don't want to do is process the
// concrete DIE twice.
- DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode);
- if (AbsSPDIE) {
- bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie());
+ if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) {
// Pick up abstract subprogram DIE.
- SPDie = new DIE(dwarf::DW_TAG_subprogram);
- // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of
- // DW_FORM_ref4.
- SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
- InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
- AbsSPDIE);
- SPCU->addDie(SPDie);
+ SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie());
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, AbsSPDIE);
} else {
DISubprogram SPDecl = SP.getFunctionDeclaration();
if (!SPDecl.isSubprogram()) {
@@ -384,32 +386,31 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
// function then gdb prefers the definition at top level and but does not
// expect specification DIE in parent function. So avoid creating
// specification DIE for a function defined inside a function.
- if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
- !SP.getContext().isFile() &&
- !isSubprogramContext(SP.getContext())) {
+ DIScope SPContext = resolve(SP.getContext());
+ if (SP.isDefinition() && !SPContext.isCompileUnit() &&
+ !SPContext.isFile() &&
+ !isSubprogramContext(SPContext)) {
SPCU->addFlag(SPDie, dwarf::DW_AT_declaration);
// Add arguments.
DICompositeType SPTy = SP.getType();
DIArray Args = SPTy.getTypeArray();
- unsigned SPTag = SPTy.getTag();
+ uint16_t SPTag = SPTy.getTag();
if (SPTag == dwarf::DW_TAG_subroutine_type)
for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
- DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
- DIType ATy = DIType(Args.getElement(i));
+ DIE *Arg =
+ SPCU->createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie);
+ DIType ATy(Args.getElement(i));
SPCU->addType(Arg, ATy);
if (ATy.isArtificial())
SPCU->addFlag(Arg, dwarf::DW_AT_artificial);
if (ATy.isObjectPointer())
- SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer,
- dwarf::DW_FORM_ref4, Arg);
- SPDie->addChild(Arg);
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, Arg);
}
DIE *SPDeclDie = SPDie;
- SPDie = new DIE(dwarf::DW_TAG_subprogram);
- SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification,
- dwarf::DW_FORM_ref4, SPDeclDie);
- SPCU->addDie(SPDie);
+ SPDie =
+ SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie());
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, SPDeclDie);
}
}
}
@@ -431,18 +432,39 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
return SPDie;
}
+/// Check whether we should create a DIE for the given Scope, return true
+/// if we don't create a DIE (the corresponding DIE is null).
+bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
+ if (Scope->isAbstractScope())
+ return false;
+
+ // We don't create a DIE if there is no Range.
+ const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges();
+ if (Ranges.empty())
+ return true;
+
+ if (Ranges.size() > 1)
+ return false;
+
+ // We don't create a DIE if we have a single Range and the end label
+ // is null.
+ SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin();
+ MCSymbol *End = getLabelAfterInsn(RI->second);
+ return !End;
+}
+
// Construct new DW_TAG_lexical_block for this scope and attach
// DW_AT_low_pc/DW_AT_high_pc labels.
DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
LexicalScope *Scope) {
+ if (isLexicalScopeDIENull(Scope))
+ return 0;
+
DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
if (Scope->isAbstractScope())
return ScopeDIE;
const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges();
- if (Ranges.empty())
- return 0;
-
// If we have multiple ranges, emit them into the range section.
if (Ranges.size() > 1) {
// .debug_range section has not been laid out yet. Emit offset in
@@ -467,8 +489,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin();
MCSymbol *Start = getLabelBeforeInsn(RI->first);
MCSymbol *End = getLabelAfterInsn(RI->second);
-
- if (End == 0) return 0;
+ assert(End && "End label should not be null!");
assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
assert(End->isDefined() && "Invalid end label for an inlined scope!");
@@ -498,8 +519,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
}
DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
- TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
- dwarf::DW_FORM_ref4, OriginDIE);
+ TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, OriginDIE);
if (Ranges.size() > 1) {
// .debug_range section has not been laid out yet. Emit offset in
@@ -535,26 +555,10 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
// Add the call site information to the DIE.
DILocation DL(Scope->getInlinedAt());
- TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, None,
getOrCreateSourceID(DL.getFilename(), DL.getDirectory(),
TheCU->getUniqueID()));
- TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
-
- // Track the start label for this inlined function.
- //.debug_inlined section specification does not clearly state how
- // to emit inlined scopes that are split into multiple instruction ranges.
- // For now, use the first instruction range and emit low_pc/high_pc pair and
- // corresponding the .debug_inlined section entry for this pair.
- if (Asm->MAI->doesDwarfUseInlineInfoSection()) {
- MCSymbol *StartLabel = getLabelBeforeInsn(Ranges.begin()->first);
- InlineInfoMap::iterator I = InlineInfo.find(InlinedSP);
-
- if (I == InlineInfo.end()) {
- InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE));
- InlinedSPNodes.push_back(InlinedSP);
- } else
- I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
- }
+ TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber());
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_inlined_subprogram nodes.
@@ -563,26 +567,16 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
return ScopeDIE;
}
-// Construct a DIE for this scope.
-DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
- if (!Scope || !Scope->getScopeNode())
- return NULL;
-
- DIScope DS(Scope->getScopeNode());
- // Early return to avoid creating dangling variable|scope DIEs.
- if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() &&
- !TheCU->getDIE(DS))
- return NULL;
-
- SmallVector<DIE *, 8> Children;
- DIE *ObjectPointer = NULL;
+DIE *DwarfDebug::createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope,
+ SmallVectorImpl<DIE*> &Children) {
+ DIE *ObjectPointer = NULL;
// Collect arguments for current function.
if (LScopes.isCurrentFunctionScope(Scope))
for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
if (DbgVariable *ArgDV = CurrentFnArguments[i])
if (DIE *Arg =
- TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) {
+ TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) {
Children.push_back(Arg);
if (ArgDV->isObjectPointer()) ObjectPointer = Arg;
}
@@ -591,7 +585,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
const SmallVectorImpl<DbgVariable *> &Variables =ScopeVariables.lookup(Scope);
for (unsigned i = 0, N = Variables.size(); i < N; ++i)
if (DIE *Variable =
- TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) {
+ TheCU->constructVariableDIE(*Variables[i], Scope->isAbstractScope())) {
Children.push_back(Variable);
if (Variables[i]->isObjectPointer()) ObjectPointer = Variable;
}
@@ -599,6 +593,23 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j]))
Children.push_back(Nested);
+ return ObjectPointer;
+}
+
+// Construct a DIE for this scope.
+DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
+ if (!Scope || !Scope->getScopeNode())
+ return NULL;
+
+ DIScope DS(Scope->getScopeNode());
+
+ SmallVector<DIE *, 8> Children;
+ DIE *ObjectPointer = NULL;
+ bool ChildrenCreated = false;
+
+ // We try to create the scope DIE first, then the children DIEs. This will
+ // avoid creating un-used children then removing them later when we find out
+ // the scope DIE is null.
DIE *ScopeDIE = NULL;
if (Scope->getInlinedAt())
ScopeDIE = constructInlinedScopeDIE(TheCU, Scope);
@@ -609,26 +620,41 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
// Note down abstract DIE.
if (ScopeDIE)
AbstractSPDies.insert(std::make_pair(DS, ScopeDIE));
- }
- else
- ScopeDIE = updateSubprogramScopeDIE(TheCU, DS);
- }
- else {
+ } else
+ ScopeDIE = updateSubprogramScopeDIE(TheCU, DISubprogram(DS));
+ } else {
+ // Early exit when we know the scope DIE is going to be null.
+ if (isLexicalScopeDIENull(Scope))
+ return NULL;
+
+ // We create children here when we know the scope DIE is not going to be
+ // null and the children will be added to the scope DIE.
+ ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children);
+ ChildrenCreated = true;
+
// There is no need to emit empty lexical block DIE.
std::pair<ImportedEntityMap::const_iterator,
ImportedEntityMap::const_iterator> Range = std::equal_range(
ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(),
std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0),
- CompareFirst());
+ less_first());
if (Children.empty() && Range.first == Range.second)
return NULL;
ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
+ assert(ScopeDIE && "Scope DIE should not be null.");
for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second;
++i)
constructImportedEntityDIE(TheCU, i->second, ScopeDIE);
}
- if (!ScopeDIE) return NULL;
+ if (!ScopeDIE) {
+ assert(Children.empty() &&
+ "We create children only when the scope DIE is not null.");
+ return NULL;
+ }
+ if (!ChildrenCreated)
+ // We create children when the scope DIE is not null.
+ ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children);
// Add children
for (SmallVectorImpl<DIE *>::iterator I = Children.begin(),
@@ -636,8 +662,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
ScopeDIE->addChild(*I);
if (DS.isSubprogram() && ObjectPointer != NULL)
- TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer,
- dwarf::DW_FORM_ref4, ObjectPointer);
+ TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, ObjectPointer);
if (DS.isSubprogram())
TheCU->addPubTypes(DISubprogram(DS));
@@ -653,8 +678,10 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
StringRef DirName, unsigned CUID) {
// If we use .loc in assembly, we can't separate .file entries according to
// compile units. Thus all files will belong to the default compile unit.
- if (Asm->TM.hasMCUseLoc() &&
- Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer)
+
+ // FIXME: add a better feature test than hasRawTextSupport. Even better,
+ // extend .file to support this.
+ if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport())
CUID = 0;
// If FE did not provide a file name, then assume stdin.
@@ -689,14 +716,12 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
// Create new CompileUnit for the given metadata node with tag
// DW_TAG_compile_unit.
-CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
- DICompileUnit DIUnit(N);
+CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) {
StringRef FN = DIUnit.getFilename();
CompilationDir = DIUnit.getDirectory();
DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
- CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
- DIUnit.getLanguage(), Die, N, Asm,
+ CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, Die, DIUnit, Asm,
this, &InfoHolder);
FileIDCUMap[NewCU->getUniqueID()] = 0;
@@ -723,31 +748,57 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
// Use a single line table if we are using .loc and generating assembly.
bool UseTheFirstCU =
- (Asm->TM.hasMCUseLoc() &&
- Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) ||
- (NewCU->getUniqueID() == 0);
+ (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) ||
+ (NewCU->getUniqueID() == 0);
- // DW_AT_stmt_list is a offset of line number information for this
- // compile unit in debug_line section. For split dwarf this is
- // left in the skeleton CU and so not included.
- // The line table entries are not always emitted in assembly, so it
- // is not okay to use line_table_start here.
if (!useSplitDwarf()) {
+ // DW_AT_stmt_list is a offset of line number information for this
+ // compile unit in debug_line section. For split dwarf this is
+ // left in the skeleton CU and so not included.
+ // The line table entries are not always emitted in assembly, so it
+ // is not okay to use line_table_start here.
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
- UseTheFirstCU ?
- Asm->GetTempSymbol("section_line") : LineTableStartSym);
+ NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset,
+ UseTheFirstCU ? Asm->GetTempSymbol("section_line")
+ : LineTableStartSym);
else if (UseTheFirstCU)
NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
else
NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
LineTableStartSym, DwarfLineSectionSym);
+
+ // If we're using split dwarf the compilation dir is going to be in the
+ // skeleton CU and so we don't need to duplicate it here.
+ if (!CompilationDir.empty())
+ NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+
+ // Flags to let the linker know we have emitted new style pubnames. Only
+ // emit it here if we don't have a skeleton CU for split dwarf.
+ if (GenerateGnuPubSections) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubnames,
+ dwarf::DW_FORM_sec_offset,
+ Asm->GetTempSymbol("gnu_pubnames",
+ NewCU->getUniqueID()));
+ else
+ NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_data4,
+ Asm->GetTempSymbol("gnu_pubnames",
+ NewCU->getUniqueID()),
+ DwarfGnuPubNamesSectionSym);
+
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubtypes,
+ dwarf::DW_FORM_sec_offset,
+ Asm->GetTempSymbol("gnu_pubtypes",
+ NewCU->getUniqueID()));
+ else
+ NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_data4,
+ Asm->GetTempSymbol("gnu_pubtypes",
+ NewCU->getUniqueID()),
+ DwarfGnuPubTypesSectionSym);
+ }
}
- // If we're using split dwarf the compilation dir is going to be in the
- // skeleton CU and so we don't need to duplicate it here.
- if (!useSplitDwarf() && !CompilationDir.empty())
- NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
if (DIUnit.isOptimized())
NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized);
@@ -764,13 +815,17 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
InfoHolder.addUnit(NewCU);
- CUMap.insert(std::make_pair(N, NewCU));
+ CUMap.insert(std::make_pair(DIUnit, NewCU));
+ CUDieMap.insert(std::make_pair(Die, NewCU));
return NewCU;
}
// Construct subprogram DIE.
-void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
- const MDNode *N) {
+void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) {
+ // FIXME: We should only call this routine once, however, during LTO if a
+ // program is defined in multiple CUs we could end up calling it out of
+ // beginModule as we walk the CUs.
+
CompileUnit *&CURef = SPMap[N];
if (CURef)
return;
@@ -784,15 +839,8 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP);
- // Add to map.
- TheCU->insertDIE(N, SubprogramDie);
-
- // Add to context owner.
- TheCU->addToContextOwner(SubprogramDie, SP.getContext());
-
- // Expose as global, if requested.
- if (GenerateDwarfPubNamesSection)
- TheCU->addGlobalName(SP.getName(), SubprogramDie);
+ // Expose as a global name.
+ TheCU->addGlobalName(SP.getName(), SubprogramDie, resolve(SP.getContext()));
}
void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU,
@@ -833,10 +881,9 @@ void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU,
unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(),
Module.getContext().getDirectory(),
TheCU->getUniqueID());
- TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID);
- TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber());
- TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4,
- EntityDie);
+ TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, None, FileID);
+ TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, None, Module.getLineNumber());
+ TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, EntityDie);
StringRef Name = Module.getName();
if (!Name.empty())
TheCU->addString(IMDie, dwarf::DW_AT_name, Name);
@@ -857,6 +904,7 @@ void DwarfDebug::beginModule() {
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes)
return;
+ TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes);
// Emit initial sections so we can reference labels later.
emitSectionLabels();
@@ -870,10 +918,10 @@ void DwarfDebug::beginModule() {
DIImportedEntity(ImportedEntities.getElement(i)).getContext(),
ImportedEntities.getElement(i)));
std::sort(ScopesWithImportedEntities.begin(),
- ScopesWithImportedEntities.end(), CompareFirst());
+ ScopesWithImportedEntities.end(), less_first());
DIArray GVs = CUNode.getGlobalVariables();
for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
- CU->createGlobalVariableDIE(GVs.getElement(i));
+ CU->createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
DIArray SPs = CUNode.getSubprograms();
for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
constructSubprogramDIE(CU, SPs.getElement(i));
@@ -887,22 +935,13 @@ void DwarfDebug::beginModule() {
// available.
for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
constructImportedEntityDIE(CU, ImportedEntities.getElement(i));
- // If we're splitting the dwarf out now that we've got the entire
- // CU then construct a skeleton CU based upon it.
- if (useSplitDwarf()) {
- // This should be a unique identifier when we want to build .dwp files.
- CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id,
- dwarf::DW_FORM_data8, 0);
- // Now construct the skeleton CU associated.
- constructSkeletonCU(CUNode);
- }
}
// Tell MMI that we have debug info.
MMI->setDebugInfoAvailability(true);
// Prime section data.
- SectionMap.insert(Asm->getObjFileLowering().getTextSection());
+ SectionMap[Asm->getObjFileLowering().getTextSection()];
}
// Attach DW_AT_inline attribute with inlined subprogram DIEs.
@@ -911,21 +950,20 @@ void DwarfDebug::computeInlinedDIEs() {
for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
DIE *ISP = *AI;
- FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
}
for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
AE = AbstractSPDies.end(); AI != AE; ++AI) {
DIE *ISP = AI->second;
if (InlinedSubprogramDIEs.count(ISP))
continue;
- FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
}
}
// Collect info for variables that were optimized out.
void DwarfDebug::collectDeadVariables() {
const Module *M = MMI->getModule();
- DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap;
if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
@@ -933,37 +971,38 @@ void DwarfDebug::collectDeadVariables() {
DIArray Subprograms = TheCU.getSubprograms();
for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
DISubprogram SP(Subprograms.getElement(i));
- if (ProcessedSPNodes.count(SP) != 0) continue;
- if (!SP.isSubprogram()) continue;
- if (!SP.isDefinition()) continue;
+ if (ProcessedSPNodes.count(SP) != 0)
+ continue;
+ if (!SP.isSubprogram())
+ continue;
+ if (!SP.isDefinition())
+ continue;
DIArray Variables = SP.getVariables();
- if (Variables.getNumElements() == 0) continue;
-
- LexicalScope *Scope =
- new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
- DeadFnScopeMap[SP] = Scope;
+ if (Variables.getNumElements() == 0)
+ continue;
// Construct subprogram DIE and add variables DIEs.
CompileUnit *SPCU = CUMap.lookup(TheCU);
assert(SPCU && "Unable to find Compile Unit!");
+ // FIXME: See the comment in constructSubprogramDIE about duplicate
+ // subprogram DIEs.
constructSubprogramDIE(SPCU, SP);
- DIE *ScopeDIE = SPCU->getDIE(SP);
+ DIE *SPDIE = SPCU->getDIE(SP);
for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
DIVariable DV(Variables.getElement(vi));
- if (!DV.isVariable()) continue;
- DbgVariable NewVar(DV, NULL);
+ if (!DV.isVariable())
+ continue;
+ DbgVariable NewVar(DV, NULL, this);
if (DIE *VariableDIE =
- SPCU->constructVariableDIE(&NewVar, Scope->isAbstractScope()))
- ScopeDIE->addChild(VariableDIE);
+ SPCU->constructVariableDIE(NewVar, false))
+ SPDIE->addChild(VariableDIE);
}
}
}
}
- DeleteContainerSeconds(DeadFnScopeMap);
}
-// Type Signature [7.27] computation code.
-typedef ArrayRef<uint8_t> HashValue;
+// Type Signature [7.27] and ODR Hash code.
/// \brief Grabs the string in whichever attribute is passed in and returns
/// a reference to it. Returns "" if the attribute doesn't exist.
@@ -976,100 +1015,6 @@ static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) {
return StringRef("");
}
-/// \brief Adds the string in \p Str to the hash in \p Hash. This also hashes
-/// a trailing NULL with the string.
-static void addStringToHash(MD5 &Hash, StringRef Str) {
- DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");
- Hash.update(Str);
- Hash.update(makeArrayRef((uint8_t)'\0'));
-}
-
-// FIXME: These are copied and only slightly modified out of LEB128.h.
-
-/// \brief Adds the unsigned in \p N to the hash in \p Hash. This also encodes
-/// the unsigned as a ULEB128.
-static void addULEB128ToHash(MD5 &Hash, uint64_t Value) {
- DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
- do {
- uint8_t Byte = Value & 0x7f;
- Value >>= 7;
- if (Value != 0)
- Byte |= 0x80; // Mark this byte to show that more bytes will follow.
- Hash.update(Byte);
- } while (Value != 0);
-}
-
-/// \brief Including \p Parent adds the context of Parent to \p Hash.
-static void addParentContextToHash(MD5 &Hash, DIE *Parent) {
-
- DEBUG(dbgs() << "Adding parent context to hash...\n");
-
- // [7.27.2] For each surrounding type or namespace beginning with the
- // outermost such construct...
- SmallVector<DIE *, 1> Parents;
- while (Parent->getTag() != dwarf::DW_TAG_compile_unit) {
- Parents.push_back(Parent);
- Parent = Parent->getParent();
- }
-
- // Reverse iterate over our list to go from the outermost construct to the
- // innermost.
- for (SmallVectorImpl<DIE *>::reverse_iterator I = Parents.rbegin(),
- E = Parents.rend();
- I != E; ++I) {
- DIE *Die = *I;
-
- // ... Append the letter "C" to the sequence...
- addULEB128ToHash(Hash, 'C');
-
- // ... Followed by the DWARF tag of the construct...
- addULEB128ToHash(Hash, Die->getTag());
-
- // ... Then the name, taken from the DW_AT_name attribute.
- StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name);
- DEBUG(dbgs() << "... adding context: " << Name << "\n");
- if (!Name.empty())
- addStringToHash(Hash, Name);
- }
-}
-
-/// This is based on the type signature computation given in section 7.27 of the
-/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE with
-/// the exception that we are hashing only the context and the name of the type.
-static void addDIEODRSignature(MD5 &Hash, CompileUnit *CU, DIE *Die) {
-
- // Add the contexts to the hash. We won't be computing the ODR hash for
- // function local types so it's safe to use the generic context hashing
- // algorithm here.
- // FIXME: If we figure out how to account for linkage in some way we could
- // actually do this with a slight modification to the parent hash algorithm.
- DIE *Parent = Die->getParent();
- if (Parent)
- addParentContextToHash(Hash, Parent);
-
- // Add the current DIE information.
-
- // Add the DWARF tag of the DIE.
- addULEB128ToHash(Hash, Die->getTag());
-
- // Add the name of the type to the hash.
- addStringToHash(Hash, getDIEStringAttr(Die, dwarf::DW_AT_name));
-
- // Now get the result.
- MD5::MD5Result Result;
- Hash.final(Result);
-
- // ... take the least significant 8 bytes and store those as the attribute.
- // Our MD5 implementation always returns its results in little endian, swap
- // bytes appropriately.
- uint64_t Signature = *reinterpret_cast<support::ulittle64_t *>(Result + 8);
-
- // FIXME: This should be added onto the type unit, not the type, but this
- // works as an intermediate stage.
- CU->addUInt(Die, dwarf::DW_AT_GNU_odr_signature, dwarf::DW_FORM_data8,
- Signature);
-}
-
/// Return true if the current DIE is contained within an anonymous namespace.
static bool isContainedInAnonNamespace(DIE *Die) {
DIE *Parent = Die->getParent();
@@ -1090,7 +1035,7 @@ static bool shouldAddODRHash(CompileUnit *CU, DIE *Die) {
return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus &&
getDIEStringAttr(Die, dwarf::DW_AT_name) != "" &&
!isContainedInAnonNamespace(Die);
- }
+}
void DwarfDebug::finalizeModuleInfo() {
// Collect info for variables that were optimized out.
@@ -1099,43 +1044,102 @@ void DwarfDebug::finalizeModuleInfo() {
// Attach DW_AT_inline attribute with inlined subprogram DIEs.
computeInlinedDIEs();
- // Emit DW_AT_containing_type attribute to connect types with their
- // vtable holding type.
- for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(),
- CUE = CUMap.end(); CUI != CUE; ++CUI) {
- CompileUnit *TheCU = CUI->second;
- TheCU->constructContainingTypeDIEs();
- }
-
// Split out type units and conditionally add an ODR tag to the split
// out type.
// FIXME: Do type splitting.
for (unsigned i = 0, e = TypeUnits.size(); i != e; ++i) {
- MD5 Hash;
DIE *Die = TypeUnits[i];
+ DIEHash Hash;
// If we've requested ODR hashes and it's applicable for an ODR hash then
// add the ODR signature now.
+ // FIXME: This should be added onto the type unit, not the type, but this
+ // works as an intermediate stage.
if (GenerateODRHash && shouldAddODRHash(CUMap.begin()->second, Die))
- addDIEODRSignature(Hash, CUMap.begin()->second, Die);
+ CUMap.begin()->second->addUInt(Die, dwarf::DW_AT_GNU_odr_signature,
+ dwarf::DW_FORM_data8,
+ Hash.computeDIEODRSignature(*Die));
}
- // Compute DIE offsets and sizes.
+ // Handle anything that needs to be done on a per-cu basis.
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(),
+ CUE = CUMap.end();
+ CUI != CUE; ++CUI) {
+ CompileUnit *TheCU = CUI->second;
+ // Emit DW_AT_containing_type attribute to connect types with their
+ // vtable holding type.
+ TheCU->constructContainingTypeDIEs();
+
+ // If we're splitting the dwarf out now that we've got the entire
+ // CU then construct a skeleton CU based upon it.
+ if (useSplitDwarf()) {
+ uint64_t ID = 0;
+ if (GenerateCUHash) {
+ DIEHash CUHash;
+ ID = CUHash.computeCUSignature(*TheCU->getCUDie());
+ }
+ // This should be a unique identifier when we want to build .dwp files.
+ TheCU->addUInt(TheCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, ID);
+ // Now construct the skeleton CU associated.
+ CompileUnit *SkCU = constructSkeletonCU(TheCU);
+ // This should be a unique identifier when we want to build .dwp files.
+ SkCU->addUInt(SkCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, ID);
+ }
+ }
+
+ // Compute DIE offsets and sizes.
InfoHolder.computeSizeAndOffsets();
if (useSplitDwarf())
SkeletonHolder.computeSizeAndOffsets();
}
void DwarfDebug::endSections() {
- // Standard sections final addresses.
- Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end"));
- Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection());
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end"));
+ // Filter labels by section.
+ for (size_t n = 0; n < ArangeLabels.size(); n++) {
+ const SymbolCU &SCU = ArangeLabels[n];
+ if (SCU.Sym->isInSection()) {
+ // Make a note of this symbol and it's section.
+ const MCSection *Section = &SCU.Sym->getSection();
+ if (!Section->getKind().isMetadata())
+ SectionMap[Section].push_back(SCU);
+ } else {
+ // Some symbols (e.g. common/bss on mach-o) can have no section but still
+ // appear in the output. This sucks as we rely on sections to build
+ // arange spans. We can do it without, but it's icky.
+ SectionMap[NULL].push_back(SCU);
+ }
+ }
- // End text sections.
- for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) {
- Asm->OutStreamer.SwitchSection(SectionMap[I]);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1));
+ // Build a list of sections used.
+ std::vector<const MCSection *> Sections;
+ for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end();
+ it++) {
+ const MCSection *Section = it->first;
+ Sections.push_back(Section);
+ }
+
+ // Sort the sections into order.
+ // This is only done to ensure consistent output order across different runs.
+ std::sort(Sections.begin(), Sections.end(), SectionSort);
+
+ // Add terminating symbols for each section.
+ for (unsigned ID=0;ID<Sections.size();ID++) {
+ const MCSection *Section = Sections[ID];
+ MCSymbol *Sym = NULL;
+
+ if (Section) {
+ // We can't call MCSection::getLabelEndName, as it's only safe to do so
+ // if we know the section name up-front. For user-created sections, the resulting
+ // label may not be valid to use as a label. (section names can use a greater
+ // set of characters on some systems)
+ Sym = Asm->GetTempSymbol("debug_end", ID);
+ Asm->OutStreamer.SwitchSection(Section);
+ Asm->OutStreamer.EmitLabel(Sym);
+ }
+
+ // Insert a final terminator.
+ SectionMap[Section].push_back(SymbolCU(NULL, Sym));
}
}
@@ -1152,6 +1156,8 @@ void DwarfDebug::endModule() {
finalizeModuleInfo();
if (!useSplitDwarf()) {
+ emitDebugStr();
+
// Emit all the DIEs into a debug info section.
emitDebugInfo();
@@ -1170,15 +1176,12 @@ void DwarfDebug::endModule() {
// Emit info into a debug macinfo section.
emitDebugMacInfo();
- // Emit inline info.
- // TODO: When we don't need the option anymore we
- // can remove all of the code that this section
- // depends upon.
- if (useDarwinGDBCompat())
- emitDebugInlineInfo();
} else {
// TODO: Fill this in for separated debug sections and separate
// out information into new sections.
+ emitDebugStr();
+ if (useSplitDwarf())
+ emitDebugStrDWO();
// Emit the debug info section and compile units.
emitDebugInfo();
@@ -1203,12 +1206,6 @@ void DwarfDebug::endModule() {
// Emit DWO addresses.
InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection());
- // Emit inline info.
- // TODO: When we don't need the option anymore we
- // can remove all of the code that this section
- // depends upon.
- if (useDarwinGDBCompat())
- emitDebugInlineInfo();
}
// Emit info into the dwarf accelerator table sections.
@@ -1219,20 +1216,11 @@ void DwarfDebug::endModule() {
emitAccelTypes();
}
- // Emit info into a debug pubnames section, if requested.
- if (GenerateDwarfPubNamesSection)
- emitDebugPubnames();
-
- // Emit info into a debug pubtypes section.
- // TODO: When we don't need the option anymore we can
- // remove all of the code that adds to the table.
- if (useDarwinGDBCompat())
- emitDebugPubTypes();
-
- // Finally emit string information into a string table.
- emitDebugStr();
- if (useSplitDwarf())
- emitDebugStrDWO();
+ // Emit the pubnames and pubtypes sections if requested.
+ if (HasDwarfPubSections) {
+ emitDebugPubNames(GenerateGnuPubSections);
+ emitDebugPubTypes(GenerateGnuPubSections);
+ }
// clean up.
SPMap.clear();
@@ -1262,7 +1250,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
if (!Scope)
return NULL;
- AbsDbgVariable = new DbgVariable(Var, NULL);
+ AbsDbgVariable = new DbgVariable(Var, NULL, this);
addScopeVariable(Scope, AbsDbgVariable);
AbstractVariables[Var] = AbsDbgVariable;
return AbsDbgVariable;
@@ -1311,7 +1299,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
continue;
DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second);
- DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable);
+ DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this);
RegVar->setFrameIndex(VP.first);
if (!addCurrentFnArgument(MF, RegVar, Scope))
addScopeVariable(Scope, RegVar);
@@ -1396,7 +1384,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
Processed.insert(DV);
assert(MInsn->isDebugValue() && "History must begin with debug value");
DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc());
- DbgVariable *RegVar = new DbgVariable(DV, AbsVar);
+ DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this);
if (!addCurrentFnArgument(MF, RegVar, Scope))
addScopeVariable(Scope, RegVar);
if (AbsVar)
@@ -1459,7 +1447,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
if (!DV || !DV.isVariable() || !Processed.insert(DV))
continue;
if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext()))
- addScopeVariable(Scope, new DbgVariable(DV, NULL));
+ addScopeVariable(Scope, new DbgVariable(DV, NULL, this));
}
}
@@ -1602,36 +1590,45 @@ static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
// Gather pre-function debug information. Assumes being called immediately
// after the function entry point has been emitted.
void DwarfDebug::beginFunction(const MachineFunction *MF) {
- if (!MMI->hasDebugInfo()) return;
+
+ // If there's no debug info for the function we're not going to do anything.
+ if (!MMI->hasDebugInfo())
+ return;
+
+ // Grab the lexical scopes for the function, if we don't have any of those
+ // then we're not going to be able to do anything.
LScopes.initialize(*MF);
- if (LScopes.empty()) return;
+ if (LScopes.empty())
+ return;
+
+ assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
+
+ // Make sure that each lexical scope will have a begin/end label.
identifyScopeMarkers();
// Set DwarfCompileUnitID in MCContext to the Compile Unit this function
- // belongs to.
+ // belongs to so that we add to the correct per-cu line table in the
+ // non-asm case.
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
assert(TheCU && "Unable to find compile unit!");
- if (Asm->TM.hasMCUseLoc() &&
- Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer)
+ if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport())
// Use a single line table if we are using .loc and generating assembly.
Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
else
Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
- FunctionBeginSym = Asm->GetTempSymbol("func_begin",
- Asm->getFunctionNumber());
+ // Emit a label for the function so that we have a beginning address.
+ FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber());
// Assumes in correct section after the entry point.
Asm->OutStreamer.EmitLabel(FunctionBeginSym);
- assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
-
const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
// LiveUserVar - Map physreg numbers to the MDNode they contain.
- std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
+ std::vector<const MDNode *> LiveUserVar(TRI->getNumRegs());
- for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
- I != E; ++I) {
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+ ++I) {
bool AtBlockEntry = true;
for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
II != IE; ++II) {
@@ -1642,22 +1639,21 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// Keep track of user variables.
const MDNode *Var =
- MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+ MI->getOperand(MI->getNumOperands() - 1).getMetadata();
// Variable is in a register, we need to check for clobbers.
if (isDbgValueInDefinedReg(MI))
LiveUserVar[MI->getOperand(0).getReg()] = Var;
// Check the history of this variable.
- SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+ SmallVectorImpl<const MachineInstr *> &History = DbgValues[Var];
if (History.empty()) {
UserVariables.push_back(Var);
// The first mention of a function argument gets the FunctionBeginSym
// label, so arguments are visible when breaking at function entry.
DIVariable DV(Var);
if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
- DISubprogram(getDISubprogram(DV.getContext()))
- .describes(MF->getFunction()))
+ getDISubprogram(DV.getContext()).describes(MF->getFunction()))
LabelsBeforeInsn[MI] = FunctionBeginSym;
} else {
// We have seen this variable before. Try to coalesce DBG_VALUEs.
@@ -1667,8 +1663,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (History.size() >= 2 &&
Prev->isIdenticalTo(History[History.size() - 2])) {
DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
- << "\t" << *Prev
- << "\t" << *History[History.size() - 2] << "\n");
+ << "\t" << *Prev << "\t"
+ << *History[History.size() - 2] << "\n");
History.pop_back();
}
@@ -1679,11 +1675,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// Previous register assignment needs to terminate at the end of
// its basic block.
MachineBasicBlock::const_iterator LastMI =
- PrevMBB->getLastNonDebugInstr();
+ PrevMBB->getLastNonDebugInstr();
if (LastMI == PrevMBB->end()) {
// Drop DBG_VALUE for empty range.
DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n"
- << "\t" << *Prev << "\n");
+ << "\t" << *Prev << "\n");
History.pop_back();
} else if (llvm::next(PrevMBB) != PrevMBB->getParent()->end())
// Terminate after LastMI.
@@ -1705,11 +1701,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// Check if the instruction clobbers any registers with debug vars.
for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ MOE = MI->operands_end();
+ MOI != MOE; ++MOI) {
if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
continue;
- for (MCRegAliasIterator AI(MOI->getReg(), TRI, true);
- AI.isValid(); ++AI) {
+ for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); AI.isValid();
+ ++AI) {
unsigned Reg = *AI;
const MDNode *Var = LiveUserVar[Reg];
if (!Var)
@@ -1721,7 +1718,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
DbgValueHistoryMap::iterator HistI = DbgValues.find(Var);
if (HistI == DbgValues.end())
continue;
- SmallVectorImpl<const MachineInstr*> &History = HistI->second;
+ SmallVectorImpl<const MachineInstr *> &History = HistI->second;
if (History.empty())
continue;
const MachineInstr *Prev = History.back();
@@ -1743,7 +1740,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end();
I != E; ++I) {
- SmallVectorImpl<const MachineInstr*> &History = I->second;
+ SmallVectorImpl<const MachineInstr *> &History = I->second;
if (History.empty())
continue;
@@ -1752,7 +1749,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) {
const MachineBasicBlock *PrevMBB = Prev->getParent();
MachineBasicBlock::const_iterator LastMI =
- PrevMBB->getLastNonDebugInstr();
+ PrevMBB->getLastNonDebugInstr();
if (LastMI == PrevMBB->end())
// Drop DBG_VALUE for empty range.
History.pop_back();
@@ -1776,13 +1773,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// Record beginning of function.
if (!PrologEndLoc.isUnknown()) {
- DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc,
- MF->getFunction()->getContext());
- recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
- FnStartDL.getScope(MF->getFunction()->getContext()),
- // We'd like to list the prologue as "not statements" but GDB behaves
- // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
- DWARF2_FLAG_IS_STMT);
+ DebugLoc FnStartDL =
+ getFnDebugLoc(PrologEndLoc, MF->getFunction()->getContext());
+ recordSourceLine(
+ FnStartDL.getLine(), FnStartDL.getCol(),
+ FnStartDL.getScope(MF->getFunction()->getContext()),
+ // We'd like to list the prologue as "not statements" but GDB behaves
+ // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+ DWARF2_FLAG_IS_STMT);
}
}
@@ -1855,7 +1853,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
if (AbstractVariables.lookup(CleanDV))
continue;
if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext()))
- addScopeVariable(Scope, new DbgVariable(DV, NULL));
+ addScopeVariable(Scope, new DbgVariable(DV, NULL, this));
}
}
if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0)
@@ -1924,7 +1922,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
// Emit Methods
//===----------------------------------------------------------------------===//
-// Compute the size and offset of a DIE.
+// Compute the size and offset of a DIE. The offset is relative to start of the
+// CU. It returns the offset after laying out the DIE.
unsigned
DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
// Get the children.
@@ -1935,7 +1934,7 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
// Get the abbreviation for this DIE.
unsigned AbbrevNumber = Die->getAbbrevNumber();
- const DIEAbbrev *Abbrev = Abbreviations->at(AbbrevNumber - 1);
+ const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
// Set DIE offset
Die->setOffset(Offset);
@@ -1967,19 +1966,23 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
return Offset;
}
-// Compute the size and offset of all the DIEs.
+// Compute the size and offset for each DIE.
void DwarfUnits::computeSizeAndOffsets() {
- // Offset from the beginning of debug info section.
+ // Offset from the first CU in the debug info section is 0 initially.
unsigned SecOffset = 0;
+
+ // Iterate over each compile unit and set the size and offsets for each
+ // DIE within each compile unit. All offsets are CU relative.
for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
E = CUs.end(); I != E; ++I) {
(*I)->setDebugInfoOffset(SecOffset);
- unsigned Offset =
- sizeof(int32_t) + // Length of Compilation Unit Info
- sizeof(int16_t) + // DWARF version number
- sizeof(int32_t) + // Offset Into Abbrev. Section
- sizeof(int8_t); // Pointer Size (in bytes)
+ // CU-relative offset is reset to 0 here.
+ unsigned Offset = sizeof(int32_t) + // Length of Unit Info
+ (*I)->getHeaderSize(); // Unit-specific headers
+
+ // EndOffset here is CU-relative, after laying out
+ // all of the CU DIE.
unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset);
SecOffset += EndOffset;
}
@@ -2006,9 +2009,16 @@ void DwarfDebug::emitSectionLabels() {
DwarfLineSectionSym =
emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
emitSectionSym(Asm, TLOF.getDwarfLocSection());
- if (GenerateDwarfPubNamesSection)
+ if (GenerateGnuPubSections) {
+ DwarfGnuPubNamesSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection());
+ DwarfGnuPubTypesSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfGnuPubTypesSection());
+ } else if (HasDwarfPubSections) {
emitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
- emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
+ emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
+ }
+
DwarfStrSectionSym =
emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
if (useSplitDwarf()) {
@@ -2028,10 +2038,10 @@ void DwarfDebug::emitSectionLabels() {
}
// Recursively emits a debug information entry.
-void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
+void DwarfDebug::emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs) {
// Get the abbreviation for this DIE.
unsigned AbbrevNumber = Die->getAbbrevNumber();
- const DIEAbbrev *Abbrev = Abbrevs->at(AbbrevNumber - 1);
+ const DIEAbbrev *Abbrev = Abbrevs[AbbrevNumber - 1];
// Emit the code (index) for the abbreviation.
if (Asm->isVerbose())
@@ -2046,27 +2056,44 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
// Emit the DIE attribute values.
for (unsigned i = 0, N = Values.size(); i < N; ++i) {
- unsigned Attr = AbbrevData[i].getAttribute();
- unsigned Form = AbbrevData[i].getForm();
+ dwarf::Attribute Attr = AbbrevData[i].getAttribute();
+ dwarf::Form Form = AbbrevData[i].getForm();
assert(Form && "Too many attributes for DIE (check abbreviation)");
if (Asm->isVerbose())
Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
switch (Attr) {
- case dwarf::DW_AT_abstract_origin: {
+ case dwarf::DW_AT_abstract_origin:
+ case dwarf::DW_AT_type:
+ case dwarf::DW_AT_friend:
+ case dwarf::DW_AT_specification:
+ case dwarf::DW_AT_import:
+ case dwarf::DW_AT_containing_type: {
DIEEntry *E = cast<DIEEntry>(Values[i]);
DIE *Origin = E->getEntry();
unsigned Addr = Origin->getOffset();
if (Form == dwarf::DW_FORM_ref_addr) {
+ assert(!useSplitDwarf() && "TODO: dwo files can't have relocations.");
// For DW_FORM_ref_addr, output the offset from beginning of debug info
// section. Origin->getOffset() returns the offset from start of the
// compile unit.
- DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
- Addr += Holder.getCUOffset(Origin->getCompileUnit());
+ CompileUnit *CU = CUDieMap.lookup(Origin->getCompileUnit());
+ assert(CU && "CUDie should belong to a CU.");
+ Addr += CU->getDebugInfoOffset();
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ Asm->EmitLabelPlusOffset(DwarfInfoSectionSym, Addr,
+ DIEEntry::getRefAddrSize(Asm));
+ else
+ Asm->EmitLabelOffsetDifference(DwarfInfoSectionSym, Addr,
+ DwarfInfoSectionSym,
+ DIEEntry::getRefAddrSize(Asm));
+ } else {
+ // Make sure Origin belong to the same CU.
+ assert(Die->getCompileUnit() == Origin->getCompileUnit() &&
+ "The referenced DIE should belong to the same CU in ref4");
+ Asm->EmitInt32(Addr);
}
- Asm->OutStreamer.EmitIntValue(Addr,
- Form == dwarf::DW_FORM_ref_addr ? DIEEntry::getRefAddrSize(Asm) : 4);
break;
}
case dwarf::DW_AT_ranges: {
@@ -2088,7 +2115,7 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
case dwarf::DW_AT_location: {
if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) {
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- Asm->EmitLabelReference(L->getValue(), 4);
+ Asm->EmitSectionOffset(L->getValue(), DwarfDebugLocSectionSym);
else
Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
} else {
@@ -2142,20 +2169,10 @@ void DwarfUnits::emitUnits(DwarfDebug *DD,
TheCU->getUniqueID()));
// Emit size of content not including length itself
- unsigned ContentSize = Die->getSize() +
- sizeof(int16_t) + // DWARF version number
- sizeof(int32_t) + // Offset Into Abbrev. Section
- sizeof(int8_t); // Pointer Size (in bytes)
+ Asm->OutStreamer.AddComment("Length of Unit");
+ Asm->EmitInt32(TheCU->getHeaderSize() + Die->getSize());
- Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
- Asm->EmitInt32(ContentSize);
- Asm->OutStreamer.AddComment("DWARF version number");
- Asm->EmitInt16(DD->getDwarfVersion());
- Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
- Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()),
- ASectionSym);
- Asm->OutStreamer.AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+ TheCU->emitHeader(ASection, ASectionSym);
DD->emitDIE(Die, Abbreviations);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(),
@@ -2163,19 +2180,6 @@ void DwarfUnits::emitUnits(DwarfDebug *DD,
}
}
-/// For a given compile unit DIE, returns offset from beginning of debug info.
-unsigned DwarfUnits::getCUOffset(DIE *Die) {
- assert(Die->getTag() == dwarf::DW_TAG_compile_unit &&
- "Input DIE should be compile unit in getCUOffset.");
- for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
- E = CUs.end(); I != E; ++I) {
- CompileUnit *TheCU = *I;
- if (TheCU->getCUDie() == Die)
- return TheCU->getDebugInfoOffset();
- }
- llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits.");
-}
-
// Emit the debug info section.
void DwarfDebug::emitDebugInfo() {
DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
@@ -2249,7 +2253,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
// Emit visible names into a hashed accelerator table section.
void DwarfDebug::emitAccelNames() {
- DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4));
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I) {
@@ -2278,7 +2282,7 @@ void DwarfDebug::emitAccelNames() {
// Emit objective C classes and categories into a hashed accelerator table
// section.
void DwarfDebug::emitAccelObjC() {
- DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4));
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I) {
@@ -2306,7 +2310,7 @@ void DwarfDebug::emitAccelObjC() {
// Emit namespace dies into a hashed accelerator table.
void DwarfDebug::emitAccelNamespaces() {
- DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4));
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I) {
@@ -2335,11 +2339,11 @@ void DwarfDebug::emitAccelNamespaces() {
// Emit type dies into a hashed accelerator table.
void DwarfDebug::emitAccelTypes() {
std::vector<DwarfAccelTable::Atom> Atoms;
- Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+ Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4));
- Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag,
+ Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag,
dwarf::DW_FORM_data2));
- Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags,
+ Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags,
dwarf::DW_FORM_data1));
DwarfAccelTable AT(Atoms);
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
@@ -2367,23 +2371,85 @@ void DwarfDebug::emitAccelTypes() {
AT.Emit(Asm, SectionBegin, &InfoHolder);
}
-/// emitDebugPubnames - Emit visible names into a debug pubnames section.
+// Public name handling.
+// The format for the various pubnames:
+//
+// dwarf pubnames - offset/name pairs where the offset is the offset into the CU
+// for the DIE that is named.
+//
+// gnu pubnames - offset/index value/name tuples where the offset is the offset
+// into the CU and the index value is computed according to the type of value
+// for the DIE that is named.
+//
+// For type units the offset is the offset of the skeleton DIE. For split dwarf
+// it's the offset within the debug_info/debug_types dwo section, however, the
+// reference in the pubname header doesn't change.
+
+/// computeIndexValue - Compute the gdb index value for the DIE and CU.
+static dwarf::PubIndexEntryDescriptor computeIndexValue(CompileUnit *CU,
+ DIE *Die) {
+ dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC;
+
+ // We could have a specification DIE that has our most of our knowledge,
+ // look for that now.
+ DIEValue *SpecVal = Die->findAttribute(dwarf::DW_AT_specification);
+ if (SpecVal) {
+ DIE *SpecDIE = cast<DIEEntry>(SpecVal)->getEntry();
+ if (SpecDIE->findAttribute(dwarf::DW_AT_external))
+ Linkage = dwarf::GIEL_EXTERNAL;
+ } else if (Die->findAttribute(dwarf::DW_AT_external))
+ Linkage = dwarf::GIEL_EXTERNAL;
+
+ switch (Die->getTag()) {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_enumeration_type:
+ return dwarf::PubIndexEntryDescriptor(
+ dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus
+ ? dwarf::GIEL_STATIC
+ : dwarf::GIEL_EXTERNAL);
+ case dwarf::DW_TAG_typedef:
+ case dwarf::DW_TAG_base_type:
+ case dwarf::DW_TAG_subrange_type:
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC);
+ case dwarf::DW_TAG_namespace:
+ return dwarf::GIEK_TYPE;
+ case dwarf::DW_TAG_subprogram:
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage);
+ case dwarf::DW_TAG_constant:
+ case dwarf::DW_TAG_variable:
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage);
+ case dwarf::DW_TAG_enumerator:
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE,
+ dwarf::GIEL_STATIC);
+ default:
+ return dwarf::GIEK_NONE;
+ }
+}
+
+/// emitDebugPubNames - Emit visible names into a debug pubnames section.
///
-void DwarfDebug::emitDebugPubnames() {
+void DwarfDebug::emitDebugPubNames(bool GnuStyle) {
const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
+ const MCSection *PSec =
+ GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
+ : Asm->getObjFileLowering().getDwarfPubNamesSection();
typedef DenseMap<const MDNode*, CompileUnit*> CUMapType;
for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) {
CompileUnit *TheCU = I->second;
unsigned ID = TheCU->getUniqueID();
- if (TheCU->getGlobalNames().empty())
- continue;
-
// Start the dwarf pubnames section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfPubNamesSection());
+ Asm->OutStreamer.SwitchSection(PSec);
+
+ // Emit a label so we can reference the beginning of this pubname section.
+ if (GnuStyle)
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubnames",
+ TheCU->getUniqueID()));
+ // Emit the header.
Asm->OutStreamer.AddComment("Length of Public Names Info");
Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID),
Asm->GetTempSymbol("pubnames_begin", ID), 4);
@@ -2391,7 +2457,7 @@ void DwarfDebug::emitDebugPubnames() {
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID));
Asm->OutStreamer.AddComment("DWARF Version");
- Asm->EmitInt16(DwarfVersion);
+ Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
@@ -2402,15 +2468,24 @@ void DwarfDebug::emitDebugPubnames() {
Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
4);
+ // Emit the pubnames for this compilation unit.
const StringMap<DIE*> &Globals = TheCU->getGlobalNames();
for (StringMap<DIE*>::const_iterator
GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
const char *Name = GI->getKeyData();
- const DIE *Entity = GI->second;
+ DIE *Entity = GI->second;
Asm->OutStreamer.AddComment("DIE offset");
Asm->EmitInt32(Entity->getOffset());
+ if (GnuStyle) {
+ dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity);
+ Asm->OutStreamer.AddComment(
+ Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
+ dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
+ Asm->EmitInt8(Desc.toBits());
+ }
+
if (Asm->isVerbose())
Asm->OutStreamer.AddComment("External Name");
Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1));
@@ -2422,55 +2497,78 @@ void DwarfDebug::emitDebugPubnames() {
}
}
-void DwarfDebug::emitDebugPubTypes() {
+void DwarfDebug::emitDebugPubTypes(bool GnuStyle) {
+ const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
+ const MCSection *PSec =
+ GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
+ : Asm->getObjFileLowering().getDwarfPubTypesSection();
+
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
- E = CUMap.end(); I != E; ++I) {
+ E = CUMap.end();
+ I != E; ++I) {
CompileUnit *TheCU = I->second;
// Start the dwarf pubtypes section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfPubTypesSection());
+ Asm->OutStreamer.SwitchSection(PSec);
+
+ // Emit a label so we can reference the beginning of this pubtype section.
+ if (GnuStyle)
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubtypes",
+ TheCU->getUniqueID()));
+
+ // Emit the header.
Asm->OutStreamer.AddComment("Length of Public Types Info");
Asm->EmitLabelDifference(
- Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()),
- Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4);
+ Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()),
+ Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
- TheCU->getUniqueID()));
+ Asm->OutStreamer.EmitLabel(
+ Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()));
- if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
- Asm->EmitInt16(DwarfVersion);
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DW_PUBTYPES_VERSION);
Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
- const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
- Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(),
- TheCU->getUniqueID()),
- DwarfInfoSectionSym);
+ Asm->EmitSectionOffset(
+ Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()),
+ DwarfInfoSectionSym);
Asm->OutStreamer.AddComment("Compilation Unit Length");
- Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(),
- TheCU->getUniqueID()),
- Asm->GetTempSymbol(ISec->getLabelBeginName(),
- TheCU->getUniqueID()),
- 4);
-
- const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
- for (StringMap<DIE*>::const_iterator
- GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol(ISec->getLabelEndName(), TheCU->getUniqueID()),
+ Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), 4);
+
+ // Emit the pubtypes.
+ const StringMap<DIE *> &Globals = TheCU->getGlobalTypes();
+ for (StringMap<DIE *>::const_iterator GI = Globals.begin(),
+ GE = Globals.end();
+ GI != GE; ++GI) {
const char *Name = GI->getKeyData();
DIE *Entity = GI->second;
- if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("DIE offset");
Asm->EmitInt32(Entity->getOffset());
- if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+ if (GnuStyle) {
+ dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity);
+ Asm->OutStreamer.AddComment(
+ Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
+ dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
+ Asm->EmitInt8(Desc.toBits());
+ }
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("External Name");
+
// Emit the name with a terminating null byte.
- Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1));
+ Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength() + 1));
}
Asm->OutStreamer.AddComment("End Mark");
Asm->EmitInt32(0);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
- TheCU->getUniqueID()));
+ Asm->OutStreamer.EmitLabel(
+ Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()));
}
}
@@ -2649,18 +2747,178 @@ void DwarfDebug::emitDebugLoc() {
}
}
-// Emit visible names into a debug aranges section.
+struct SymbolCUSorter {
+ SymbolCUSorter(const MCStreamer &s) : Streamer(s) {}
+ const MCStreamer &Streamer;
+
+ bool operator() (const SymbolCU &A, const SymbolCU &B) {
+ unsigned IA = A.Sym ? Streamer.GetSymbolOrder(A.Sym) : 0;
+ unsigned IB = B.Sym ? Streamer.GetSymbolOrder(B.Sym) : 0;
+
+ // Symbols with no order assigned should be placed at the end.
+ // (e.g. section end labels)
+ if (IA == 0)
+ IA = (unsigned)(-1);
+ if (IB == 0)
+ IB = (unsigned)(-1);
+ return IA < IB;
+ }
+};
+
+static bool CUSort(const CompileUnit *A, const CompileUnit *B) {
+ return (A->getUniqueID() < B->getUniqueID());
+}
+
+struct ArangeSpan {
+ const MCSymbol *Start, *End;
+};
+
+// Emit a debug aranges section, containing a CU lookup for any
+// address we can tie back to a CU.
void DwarfDebug::emitDebugARanges() {
// Start the dwarf aranges section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfARangesSection());
+ Asm->OutStreamer
+ .SwitchSection(Asm->getObjFileLowering().getDwarfARangesSection());
+
+ typedef DenseMap<CompileUnit *, std::vector<ArangeSpan> > SpansType;
+
+ SpansType Spans;
+
+ // Build a list of sections used.
+ std::vector<const MCSection *> Sections;
+ for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end();
+ it++) {
+ const MCSection *Section = it->first;
+ Sections.push_back(Section);
+ }
+
+ // Sort the sections into order.
+ // This is only done to ensure consistent output order across different runs.
+ std::sort(Sections.begin(), Sections.end(), SectionSort);
+
+ // Build a set of address spans, sorted by CU.
+ for (size_t SecIdx=0;SecIdx<Sections.size();SecIdx++) {
+ const MCSection *Section = Sections[SecIdx];
+ SmallVector<SymbolCU, 8> &List = SectionMap[Section];
+ if (List.size() < 2)
+ continue;
+
+ // Sort the symbols by offset within the section.
+ SymbolCUSorter sorter(Asm->OutStreamer);
+ std::sort(List.begin(), List.end(), sorter);
+
+ // If we have no section (e.g. common), just write out
+ // individual spans for each symbol.
+ if (Section == NULL) {
+ for (size_t n = 0; n < List.size(); n++) {
+ const SymbolCU &Cur = List[n];
+
+ ArangeSpan Span;
+ Span.Start = Cur.Sym;
+ Span.End = NULL;
+ if (Cur.CU)
+ Spans[Cur.CU].push_back(Span);
+ }
+ } else {
+ // Build spans between each label.
+ const MCSymbol *StartSym = List[0].Sym;
+ for (size_t n = 1; n < List.size(); n++) {
+ const SymbolCU &Prev = List[n - 1];
+ const SymbolCU &Cur = List[n];
+
+ // Try and build the longest span we can within the same CU.
+ if (Cur.CU != Prev.CU) {
+ ArangeSpan Span;
+ Span.Start = StartSym;
+ Span.End = Cur.Sym;
+ Spans[Prev.CU].push_back(Span);
+ StartSym = Cur.Sym;
+ }
+ }
+ }
+ }
+
+ const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
+ unsigned PtrSize = Asm->getDataLayout().getPointerSize();
+
+ // Build a list of CUs used.
+ std::vector<CompileUnit *> CUs;
+ for (SpansType::iterator it = Spans.begin(); it != Spans.end(); it++) {
+ CompileUnit *CU = it->first;
+ CUs.push_back(CU);
+ }
+
+ // Sort the CU list (again, to ensure consistent output order).
+ std::sort(CUs.begin(), CUs.end(), CUSort);
+
+ // Emit an arange table for each CU we used.
+ for (size_t CUIdx=0;CUIdx<CUs.size();CUIdx++) {
+ CompileUnit *CU = CUs[CUIdx];
+ std::vector<ArangeSpan> &List = Spans[CU];
+
+ // Emit size of content not including length itself.
+ unsigned ContentSize
+ = sizeof(int16_t) // DWARF ARange version number
+ + sizeof(int32_t) // Offset of CU in the .debug_info section
+ + sizeof(int8_t) // Pointer Size (in bytes)
+ + sizeof(int8_t); // Segment Size (in bytes)
+
+ unsigned TupleSize = PtrSize * 2;
+
+ // 7.20 in the Dwarf specs requires the table to be aligned to a tuple.
+ unsigned Padding = 0;
+ while (((sizeof(int32_t) + ContentSize + Padding) % TupleSize) != 0)
+ Padding++;
+
+ ContentSize += Padding;
+ ContentSize += (List.size() + 1) * TupleSize;
+
+ // For each compile unit, write the list of spans it covers.
+ Asm->OutStreamer.AddComment("Length of ARange Set");
+ Asm->EmitInt32(ContentSize);
+ Asm->OutStreamer.AddComment("DWARF Arange version number");
+ Asm->EmitInt16(dwarf::DW_ARANGES_VERSION);
+ Asm->OutStreamer.AddComment("Offset Into Debug Info Section");
+ Asm->EmitSectionOffset(
+ Asm->GetTempSymbol(ISec->getLabelBeginName(), CU->getUniqueID()),
+ DwarfInfoSectionSym);
+ Asm->OutStreamer.AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(PtrSize);
+ Asm->OutStreamer.AddComment("Segment Size (in bytes)");
+ Asm->EmitInt8(0);
+
+ for (unsigned n = 0; n < Padding; n++)
+ Asm->EmitInt8(0xff);
+
+ for (unsigned n = 0; n < List.size(); n++) {
+ const ArangeSpan &Span = List[n];
+ Asm->EmitLabelReference(Span.Start, PtrSize);
+
+ // Calculate the size as being from the span start to it's end.
+ if (Span.End) {
+ Asm->EmitLabelDifference(Span.End, Span.Start, PtrSize);
+ } else {
+ // For symbols without an end marker (e.g. common), we
+ // write a single arange entry containing just that one symbol.
+ uint64_t Size = SymSize[Span.Start];
+ if (Size == 0)
+ Size = 1;
+
+ Asm->OutStreamer.EmitIntValue(Size, PtrSize);
+ }
+ }
+
+ Asm->OutStreamer.AddComment("ARange terminator");
+ Asm->OutStreamer.EmitIntValue(0, PtrSize);
+ Asm->OutStreamer.EmitIntValue(0, PtrSize);
+ }
}
// Emit visible names into a debug ranges section.
void DwarfDebug::emitDebugRanges() {
// Start the dwarf ranges section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfRangesSection());
+ Asm->OutStreamer
+ .SwitchSection(Asm->getObjFileLowering().getDwarfRangesSection());
unsigned char Size = Asm->getDataLayout().getPointerSize();
for (SmallVectorImpl<const MCSymbol *>::iterator
I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
@@ -2681,103 +2939,19 @@ void DwarfDebug::emitDebugMacInfo() {
}
}
-// Emit inline info using following format.
-// Section Header:
-// 1. length of section
-// 2. Dwarf version number
-// 3. address size.
-//
-// Entries (one "entry" for each function that was inlined):
-//
-// 1. offset into __debug_str section for MIPS linkage name, if exists;
-// otherwise offset into __debug_str for regular function name.
-// 2. offset into __debug_str section for regular function name.
-// 3. an unsigned LEB128 number indicating the number of distinct inlining
-// instances for the function.
-//
-// The rest of the entry consists of a {die_offset, low_pc} pair for each
-// inlined instance; the die_offset points to the inlined_subroutine die in the
-// __debug_info section, and the low_pc is the starting address for the
-// inlining instance.
-void DwarfDebug::emitDebugInlineInfo() {
- if (!Asm->MAI->doesDwarfUseInlineInfoSection())
- return;
-
- if (!FirstCU)
- return;
-
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfDebugInlineSection());
-
- Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1),
- Asm->GetTempSymbol("debug_inlined_begin", 1), 4);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1));
-
- Asm->OutStreamer.AddComment("Dwarf Version");
- Asm->EmitInt16(DwarfVersion);
- Asm->OutStreamer.AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
-
- for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(),
- E = InlinedSPNodes.end(); I != E; ++I) {
-
- const MDNode *Node = *I;
- InlineInfoMap::iterator II = InlineInfo.find(Node);
- SmallVectorImpl<InlineInfoLabels> &Labels = II->second;
- DISubprogram SP(Node);
- StringRef LName = SP.getLinkageName();
- StringRef Name = SP.getName();
-
- Asm->OutStreamer.AddComment("MIPS linkage name");
- if (LName.empty())
- Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name),
- DwarfStrSectionSym);
- else
- Asm->EmitSectionOffset(
- InfoHolder.getStringPoolEntry(Function::getRealLinkageName(LName)),
- DwarfStrSectionSym);
-
- Asm->OutStreamer.AddComment("Function name");
- Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name),
- DwarfStrSectionSym);
- Asm->EmitULEB128(Labels.size(), "Inline count");
-
- for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(),
- LE = Labels.end(); LI != LE; ++LI) {
- if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
- Asm->EmitInt32(LI->second->getOffset());
-
- if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
- Asm->OutStreamer.EmitSymbolValue(LI->first,
- Asm->getDataLayout().getPointerSize());
- }
- }
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1));
-}
-
// DWARF5 Experimental Separate Dwarf emitters.
// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
-// DW_AT_ranges_base, DW_AT_addr_base. If DW_AT_ranges is present,
-// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa.
-CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
- DICompileUnit DIUnit(N);
- CompilationDir = DIUnit.getDirectory();
+// DW_AT_ranges_base, DW_AT_addr_base.
+CompileUnit *DwarfDebug::constructSkeletonCU(const CompileUnit *CU) {
DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
- CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
- DIUnit.getLanguage(), Die, N, Asm,
- this, &SkeletonHolder);
+ CompileUnit *NewCU = new CompileUnit(CU->getUniqueID(), Die, CU->getNode(),
+ Asm, this, &SkeletonHolder);
NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name,
- DIUnit.getSplitDebugFilename());
-
- // This should be a unique identifier when we want to build .dwp files.
- NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+ CU->getNode().getSplitDebugFilename());
// Relocate to the beginning of the addr_base section, else 0 for the
// beginning of the one for this compile unit.
@@ -2804,6 +2978,35 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
if (!CompilationDir.empty())
NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+ // Flags to let the linker know we have emitted new style pubnames.
+ if (GenerateGnuPubSections) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_sec_offset,
+ Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()));
+ else
+ NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_data4,
+ Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()),
+ DwarfGnuPubNamesSectionSym);
+
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_sec_offset,
+ Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()));
+ else
+ NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_data4,
+ Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()),
+ DwarfGnuPubTypesSectionSym);
+ }
+
+ // Flag if we've emitted any ranges and their location for the compile unit.
+ if (DebugRangeSymbols.size()) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_GNU_ranges_base,
+ dwarf::DW_FORM_sec_offset, DwarfDebugRangeSectionSym);
+ else
+ NewCU->addUInt(Die, dwarf::DW_AT_GNU_ranges_base, dwarf::DW_FORM_data4,
+ 0);
+ }
+
SkeletonHolder.addUnit(NewCU);
SkeletonCUs.push_back(NewCU);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index e14f9b1..cebac39 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -150,11 +150,12 @@ class DbgVariable {
DbgVariable *AbsVar; // Corresponding Abstract variable, if any.
const MachineInstr *MInsn; // DBG_VALUE instruction of the variable.
int FrameIndex;
+ DwarfDebug *DD;
public:
// AbsVar may be NULL.
- DbgVariable(DIVariable V, DbgVariable *AV)
+ DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD)
: Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0),
- FrameIndex(~0) {}
+ FrameIndex(~0), DD(DD) {}
// Accessors.
DIVariable getVariable() const { return Var; }
@@ -169,7 +170,7 @@ public:
int getFrameIndex() const { return FrameIndex; }
void setFrameIndex(int FI) { FrameIndex = FI; }
// Translate tag to proper Dwarf tag.
- unsigned getTag() const {
+ uint16_t getTag() const {
if (Var.getTag() == dwarf::DW_TAG_arg_variable)
return dwarf::DW_TAG_formal_parameter;
@@ -208,6 +209,11 @@ public:
return Var.getAddrElement(i);
}
DIType getType() const;
+
+private:
+ /// resolve - Look in the DwarfDebug map for the MDNode that
+ /// corresponds to the reference.
+ template <typename T> T resolve(DIRef<T> Ref) const;
};
/// \brief Collects and handles information specific to a particular
@@ -220,7 +226,7 @@ class DwarfUnits {
FoldingSet<DIEAbbrev> *AbbreviationsSet;
// A list of all the unique abbreviations in use.
- std::vector<DIEAbbrev *> *Abbreviations;
+ std::vector<DIEAbbrev *> &Abbreviations;
// A pointer to all units in the section.
SmallVector<CompileUnit *, 1> CUs;
@@ -243,7 +249,7 @@ class DwarfUnits {
public:
DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS,
- std::vector<DIEAbbrev *> *A, const char *Pref,
+ std::vector<DIEAbbrev *> &A, const char *Pref,
BumpPtrAllocator &DA)
: Asm(AP), AbbreviationsSet(AS), Abbreviations(A), StringPool(DA),
NextStringPoolNumber(0), StringPref(Pref), AddressPool(),
@@ -294,10 +300,13 @@ public:
/// \brief Returns the address pool.
AddrPool *getAddrPool() { return &AddressPool; }
+};
- /// \brief for a given compile unit DIE, returns offset from beginning of
- /// debug info.
- unsigned getCUOffset(DIE *Die);
+/// \brief Helper used to pair up a symbol and its DWARF compile unit.
+struct SymbolCU {
+ SymbolCU(CompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {}
+ const MCSymbol *Sym;
+ CompileUnit *CU;
};
/// \brief Collects and handles dwarf debug information.
@@ -320,6 +329,14 @@ class DwarfDebug {
// Maps subprogram MDNode with its corresponding CompileUnit.
DenseMap <const MDNode *, CompileUnit *> SPMap;
+ // Maps a CU DIE with its corresponding CompileUnit.
+ DenseMap <const DIE *, CompileUnit *> CUDieMap;
+
+ /// Maps MDNodes for type sysstem with the corresponding DIEs. These DIEs can
+ /// be shared across CUs, that is why we keep the map here instead
+ /// of in CompileUnit.
+ DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap;
+
// Used to uniquely define abbreviations.
FoldingSet<DIEAbbrev> AbbreviationsSet;
@@ -332,8 +349,15 @@ class DwarfDebug {
// separated by a zero byte, mapped to a unique id.
StringMap<unsigned, BumpPtrAllocator&> SourceIdMap;
+ // List of all labels used in aranges generation.
+ std::vector<SymbolCU> ArangeLabels;
+
+ // Size of each symbol emitted (for those symbols that have a specific size).
+ DenseMap <const MCSymbol *, uint64_t> SymSize;
+
// Provides a unique id per text section.
- SetVector<const MCSection*> SectionMap;
+ typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType;
+ SectionMapType SectionMap;
// List of arguments for current function.
SmallVector<DbgVariable *, 8> CurrentFnArguments;
@@ -358,14 +382,6 @@ class DwarfDebug {
// as DW_AT_inline.
SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
- // Keep track of inlined functions and their location. This
- // information is used to populate the debug_inlined section.
- typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
- typedef DenseMap<const MDNode *,
- SmallVector<InlineInfoLabels, 4> > InlineInfoMap;
- InlineInfoMap InlineInfo;
- SmallVector<const MDNode *, 4> InlinedSPNodes;
-
// This is a collection of subprogram MDNodes that are processed to
// create DIEs.
SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
@@ -406,6 +422,7 @@ class DwarfDebug {
MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
+ MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym;
// As an optimization, there is no need to emit an entry in the directory
// table for the same directory as DW_AT_comp_dir.
@@ -420,9 +437,6 @@ class DwarfDebug {
// Holders for the various debug information flags that we might need to
// have exposed. See accessor functions below for description.
- // Whether or not we're emitting info for older versions of gdb on darwin.
- bool IsDarwinGDBCompat;
-
// Holder for imported entities.
typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
ImportedEntityMap;
@@ -431,12 +445,16 @@ class DwarfDebug {
// Holder for types that are going to be extracted out into a type unit.
std::vector<DIE *> TypeUnits;
+ // Whether to emit the pubnames/pubtypes sections.
+ bool HasDwarfPubSections;
+
+ // Version of dwarf we're emitting.
+ unsigned DwarfVersion;
+
// DWARF5 Experimental Options
bool HasDwarfAccelTables;
bool HasSplitDwarf;
- unsigned DwarfVersion;
-
// Separated Dwarf Variables
// In general these will all be for bits that are left in the
// original object file, rather than things that are meant
@@ -454,6 +472,9 @@ class DwarfDebug {
// Holder for the skeleton information.
DwarfUnits SkeletonHolder;
+ // Maps from a type identifier to the actual MDNode.
+ DITypeIdentifierMap TypeIdentifierMap;
+
private:
void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
@@ -465,11 +486,14 @@ private:
/// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
/// variables in this scope then create and insert DIEs for these
/// variables.
- DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode);
+ DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP);
/// \brief Construct new DW_TAG_lexical_block for this scope and
/// attach DW_AT_low_pc/DW_AT_high_pc labels.
DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
+ /// A helper function to check whether the DIE for a given Scope is going
+ /// to be null.
+ bool isLexicalScopeDIENull(LexicalScope *Scope);
/// \brief This scope represents inlined body of a function. Construct
/// DIE to represent this concrete inlined copy of the function.
@@ -477,6 +501,9 @@ private:
/// \brief Construct a DIE for this scope.
DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
+ /// A helper function to create children of a Scope DIE.
+ DIE *createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope,
+ SmallVectorImpl<DIE*> &Children);
/// \brief Emit initial Dwarf sections with a label at the start of each one.
void emitSectionLabels();
@@ -528,10 +555,16 @@ private:
void emitAccelTypes();
/// \brief Emit visible names into a debug pubnames section.
- void emitDebugPubnames();
+ /// \param GnuStyle determines whether or not we want to emit
+ /// additional information into the table ala newer gcc for gdb
+ /// index.
+ void emitDebugPubNames(bool GnuStyle = false);
/// \brief Emit visible types into a debug pubtypes section.
- void emitDebugPubTypes();
+ /// \param GnuStyle determines whether or not we want to emit
+ /// additional information into the table ala newer gcc for gdb
+ /// index.
+ void emitDebugPubTypes(bool GnuStyle = false);
/// \brief Emit visible names into a debug str section.
void emitDebugStr();
@@ -555,7 +588,7 @@ private:
/// \brief Construct the split debug info compile unit for the debug info
/// section.
- CompileUnit *constructSkeletonCU(const MDNode *);
+ CompileUnit *constructSkeletonCU(const CompileUnit *CU);
/// \brief Emit the local split abbreviations.
void emitSkeletonAbbrevs(const MCSection *);
@@ -571,7 +604,7 @@ private:
/// \brief Create new CompileUnit for the given metadata node with tag
/// DW_TAG_compile_unit.
- CompileUnit *constructCompileUnit(const MDNode *N);
+ CompileUnit *constructCompileUnit(DICompileUnit DIUnit);
/// \brief Construct subprogram DIE.
void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
@@ -633,7 +666,13 @@ public:
// Main entry points.
//
DwarfDebug(AsmPrinter *A, Module *M);
- ~DwarfDebug();
+
+ void insertDIE(const MDNode *TypeMD, DIE *Die) {
+ MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
+ }
+ DIE *getDIE(const MDNode *TypeMD) {
+ return MDTypeNodeToDieMap.lookup(TypeMD);
+ }
/// \brief Emit all Dwarf sections that should come prior to the
/// content.
@@ -658,6 +697,13 @@ public:
/// type units.
void addTypeUnitType(DIE *Die) { TypeUnits.push_back(Die); }
+ /// \brief Add a label so that arange data can be generated for it.
+ void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
+
+ /// \brief For symbols that have a size designated (e.g. common symbols),
+ /// this tracks that size.
+ void setSymbolSize(const MCSymbol *Sym, uint64_t Size) { SymSize[Sym] = Size;}
+
/// \brief Look up the source id with the given directory and source file
/// names. If none currently exists, create a new id and insert it in the
/// SourceIds map.
@@ -665,11 +711,7 @@ public:
unsigned CUID);
/// \brief Recursively Emits a debug information entry.
- void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs);
-
- /// \brief Returns whether or not to limit some of our debug
- /// output to the limitations of darwin gdb.
- bool useDarwinGDBCompat() { return IsDarwinGDBCompat; }
+ void emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs);
// Experimental DWARF5 features.
@@ -683,6 +725,16 @@ public:
/// Returns the Dwarf Version.
unsigned getDwarfVersion() const { return DwarfVersion; }
+
+ /// Find the MDNode for the given reference.
+ template <typename T> T resolve(DIRef<T> Ref) const {
+ return Ref.resolve(TypeIdentifierMap);
+ }
+
+ /// isSubprogramContext - Return true if Context is either a subprogram
+ /// or another context nested inside a subprogram.
+ bool isSubprogramContext(const MDNode *Context);
+
};
} // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 49a85d8..1575161 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -29,6 +29,7 @@ class MCAsmInfo;
class MCExpr;
class MCSymbol;
class Function;
+class ARMTargetStreamer;
class AsmPrinter;
//===----------------------------------------------------------------------===//
@@ -177,6 +178,8 @@ public:
class ARMException : public DwarfException {
void EmitTypeInfos(unsigned TTypeEncoding);
+ ARMTargetStreamer &getTargetStreamer();
+
public:
//===--------------------------------------------------------------------===//
// Main entry points.
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index b48b817..24aa1ab 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -83,6 +83,8 @@ public:
virtual unsigned getJumpBufAlignment() const;
virtual unsigned getJumpBufSize() const;
virtual bool shouldBuildLookupTables() const;
+ virtual bool haveFastSqrt(Type *Ty) const;
+ virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
/// @}
@@ -111,6 +113,7 @@ public:
ArrayRef<Type*> Tys) const;
virtual unsigned getNumberOfParts(Type *Tp) const;
virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const;
+ virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) const;
/// @}
};
@@ -182,6 +185,14 @@ bool BasicTTI::shouldBuildLookupTables() const {
TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
}
+bool BasicTTI::haveFastSqrt(Type *Ty) const {
+ const TargetLoweringBase *TLI = getTLI();
+ EVT VT = TLI->getValueType(Ty);
+ return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
+}
+
+void BasicTTI::getUnrollingPreferences(Loop *, UnrollingPreferences &) const { }
+
//===----------------------------------------------------------------------===//
//
// Calls used by the vectorizers.
@@ -443,12 +454,14 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::log10: ISD = ISD::FLOG10; break;
case Intrinsic::log2: ISD = ISD::FLOG2; break;
case Intrinsic::fabs: ISD = ISD::FABS; break;
+ case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break;
case Intrinsic::floor: ISD = ISD::FFLOOR; break;
case Intrinsic::ceil: ISD = ISD::FCEIL; break;
case Intrinsic::trunc: ISD = ISD::FTRUNC; break;
case Intrinsic::nearbyint:
ISD = ISD::FNEARBYINT; break;
case Intrinsic::rint: ISD = ISD::FRINT; break;
+ case Intrinsic::round: ISD = ISD::FROUND; break;
case Intrinsic::pow: ISD = ISD::FPOW; break;
case Intrinsic::fma: ISD = ISD::FMA; break;
case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add?
@@ -498,3 +511,17 @@ unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
return 0;
}
+
+unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty,
+ bool IsPairwise) const {
+ assert(Ty->isVectorTy() && "Expect a vector type");
+ unsigned NumVecElts = Ty->getVectorNumElements();
+ unsigned NumReduxLevels = Log2_32(NumVecElts);
+ unsigned ArithCost = NumReduxLevels *
+ TopTTI->getArithmeticInstrCost(Opcode, Ty);
+ // Assume the pairwise shuffles add a cost.
+ unsigned ShuffleCost =
+ NumReduxLevels * (IsPairwise + 1) *
+ TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty);
+ return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
+}
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 26bdca9..0d15ed7 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -1,4 +1,4 @@
-//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===//
+//===-- BranchFolding.h - Fold machine code branch instructions -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 56aa330..10cc9ff 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -35,6 +35,7 @@ add_llvm_library(LLVMCodeGen
LiveRangeCalc.cpp
LiveRangeEdit.cpp
LiveRegMatrix.cpp
+ LiveRegUnits.cpp
LiveStackAnalysis.cpp
LiveVariables.cpp
LocalStackSlotAllocation.cpp
@@ -88,7 +89,6 @@ add_llvm_library(LLVMCodeGen
ScheduleDAGPrinter.cpp
ScoreboardHazardRecognizer.cpp
ShadowStackGC.cpp
- ShrinkWrapping.cpp
SjLjEHPrepare.cpp
SlotIndexes.cpp
SpillPlacement.cpp
@@ -97,7 +97,7 @@ add_llvm_library(LLVMCodeGen
StackColoring.cpp
StackProtector.cpp
StackSlotColoring.cpp
- StrongPHIElimination.cpp
+ StackMaps.cpp
TailDuplication.cpp
TargetFrameLoweringImpl.cpp
TargetInstrInfo.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index b03c325..4925c4d 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -9,14 +9,12 @@
#define DEBUG_TYPE "calcspillweights"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -24,38 +22,22 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
-char CalculateSpillWeights::ID = 0;
-INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights",
- "Calculate spill weights", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights",
- "Calculate spill weights", false, false)
-
-void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
- au.addRequired<LiveIntervals>();
- au.addRequired<MachineBlockFrequencyInfo>();
- au.addRequired<MachineLoopInfo>();
- au.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(au);
-}
-
-bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) {
-
+void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
+ MachineFunction &MF,
+ const MachineLoopInfo &MLI,
+ const MachineBlockFrequencyInfo &MBFI,
+ VirtRegAuxInfo::NormalizingFn norm) {
DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
<< "********** Function: " << MF.getName() << '\n');
- LiveIntervals &LIS = getAnalysis<LiveIntervals>();
MachineRegisterInfo &MRI = MF.getRegInfo();
- VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>(),
- getAnalysis<MachineBlockFrequencyInfo>());
+ VirtRegAuxInfo VRAI(MF, LIS, MLI, MBFI, norm);
for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (MRI.reg_nodbg_empty(Reg))
continue;
- VRAI.CalculateWeightAndHint(LIS.getInterval(Reg));
+ VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg));
}
- return false;
}
// Return the preferred allocation register for reg, given a COPY instruction.
@@ -111,7 +93,7 @@ static bool isRematerializable(const LiveInterval &LI,
}
void
-VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
+VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
MachineRegisterInfo &mri = MF.getRegInfo();
const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
MachineBasicBlock *mbb = 0;
@@ -201,5 +183,5 @@ VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo()))
totalWeight *= 0.5F;
- li.weight = normalizeSpillWeight(totalWeight, li.getSize());
+ li.weight = normalize(totalWeight, li.getSize());
}
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index c641991..7430c53 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -22,7 +22,6 @@ using namespace llvm;
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBasicTTIPass(Registry);
initializeBranchFolderPassPass(Registry);
- initializeCalculateSpillWeightsPass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
initializeEarlyIfConverterPass(Registry);
initializeExpandPostRAPass(Registry);
@@ -60,7 +59,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeStackProtectorPass(Registry);
initializeStackColoringPass(Registry);
initializeStackSlotColoringPass(Registry);
- initializeStrongPHIEliminationPass(Registry);
initializeTailDuplicatePassPass(Registry);
initializeTargetPassConfigPass(Registry);
initializeTwoAddressInstructionPassPass(Registry);
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 840a101..6619bcf 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -160,7 +160,8 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
MachineBasicBlock::iterator EndItr) {
assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
VLIWScheduler->startBlock(MBB);
- VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size());
+ VLIWScheduler->enterRegion(MBB, BeginItr, EndItr,
+ std::distance(BeginItr, EndItr));
VLIWScheduler->schedule();
// Generate MI -> SU map.
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index e277f5c..031f19c 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -23,6 +23,7 @@
#define DEBUG_TYPE "execution-fix"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Allocator.h"
@@ -136,6 +137,12 @@ class ExeDepsFix : public MachineFunctionPass {
typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap;
LiveOutMap LiveOuts;
+ /// List of undefined register reads in this block in forward order.
+ std::vector<std::pair<MachineInstr*, unsigned> > UndefReads;
+
+ /// Storage for register unit liveness.
+ LiveRegUnits LiveUnits;
+
/// Current instruction number.
/// The first instruction in each basic block is 0.
int CurInstr;
@@ -185,6 +192,8 @@ private:
void processDefs(MachineInstr*, bool Kill);
void visitSoftInstr(MachineInstr*, unsigned mask);
void visitHardInstr(MachineInstr*, unsigned domain);
+ bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
+ void processUndefReads(MachineBasicBlock*);
};
}
@@ -341,6 +350,10 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// Reset instruction counter in each basic block.
CurInstr = 0;
+ // Set up UndefReads to track undefined register reads.
+ UndefReads.clear();
+ LiveUnits.clear();
+
// Set up LiveRegs to represent registers entering MBB.
if (!LiveRegs)
LiveRegs = new LiveReg[NumRegs];
@@ -448,10 +461,46 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
processDefs(MI, !DomP.first);
}
+/// \brief Return true to if it makes sense to break dependence on a partial def
+/// or undef use.
+bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref) {
+ int rx = regIndex(MI->getOperand(OpIdx).getReg());
+ if (rx < 0)
+ return false;
+
+ unsigned Clearance = CurInstr - LiveRegs[rx].Def;
+ DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
+
+ if (Pref > Clearance) {
+ DEBUG(dbgs() << ": Break dependency.\n");
+ return true;
+ }
+ // The current clearance seems OK, but we may be ignoring a def from a
+ // back-edge.
+ if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
+ DEBUG(dbgs() << ": OK .\n");
+ return false;
+ }
+ // A def from an unprocessed back-edge may make us break this dependency.
+ DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
+ return false;
+}
+
// Update def-ages for registers defined by MI.
// If Kill is set, also kill off DomainValues clobbered by the defs.
+//
+// Also break dependencies on partial defs and undef uses.
void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
assert(!MI->isDebugValue() && "Won't process debug values");
+
+ // Break dependence on undef uses. Do this before updating LiveRegs below.
+ unsigned OpNum;
+ unsigned Pref = TII->getUndefRegClearance(MI, OpNum, TRI);
+ if (Pref) {
+ if (shouldBreakDependence(MI, OpNum, Pref))
+ UndefReads.push_back(std::make_pair(MI, OpNum));
+ }
const MCInstrDesc &MCID = MI->getDesc();
for (unsigned i = 0,
e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
@@ -471,37 +520,58 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
<< '\t' << *MI);
+ // Check clearance before partial register updates.
+ // Call breakDependence before setting LiveRegs[rx].Def.
+ unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI);
+ if (Pref && shouldBreakDependence(MI, i, Pref))
+ TII->breakPartialRegDependency(MI, i, TRI);
+
// How many instructions since rx was last written?
- unsigned Clearance = CurInstr - LiveRegs[rx].Def;
LiveRegs[rx].Def = CurInstr;
// Kill off domains redefined by generic instructions.
if (Kill)
kill(rx);
+ }
+ ++CurInstr;
+}
- // Verify clearance before partial register updates.
- unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI);
- if (!Pref)
- continue;
- DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
- if (Pref > Clearance) {
- DEBUG(dbgs() << ": Break dependency.\n");
- TII->breakPartialRegDependency(MI, i, TRI);
- continue;
- }
-
- // The current clearance seems OK, but we may be ignoring a def from a
- // back-edge.
- if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
- DEBUG(dbgs() << ": OK.\n");
- continue;
- }
+/// \break Break false dependencies on undefined register reads.
+///
+/// Walk the block backward computing precise liveness. This is expensive, so we
+/// only do it on demand. Note that the occurrence of undefined register reads
+/// that should be broken is very rare, but when they occur we may have many in
+/// a single block.
+void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
+ if (UndefReads.empty())
+ return;
- // A def from an unprocessed back-edge may make us break this dependency.
- DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
+ // Collect this block's live out register units.
+ LiveUnits.init(TRI);
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ LiveUnits.addLiveIns(*SI, *TRI);
}
+ MachineInstr *UndefMI = UndefReads.back().first;
+ unsigned OpIdx = UndefReads.back().second;
- ++CurInstr;
+ for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend();
+ I != E; ++I) {
+ // Update liveness, including the current instrucion's defs.
+ LiveUnits.stepBackward(*I, *TRI);
+
+ if (UndefMI == &*I) {
+ if (!LiveUnits.contains(UndefMI->getOperand(OpIdx).getReg(), *TRI))
+ TII->breakPartialRegDependency(UndefMI, OpIdx, TRI);
+
+ UndefReads.pop_back();
+ if (UndefReads.empty())
+ return;
+
+ UndefMI = UndefReads.back().first;
+ OpIdx = UndefReads.back().second;
+ }
+ }
}
// A hard instruction only works in one domain. All input registers will be
@@ -549,7 +619,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// Is it possible to use this collapsed register for free?
if (dv->isCollapsed()) {
// Restrict available domains to the ones in common with the operand.
- // If there are no common domains, we must pay the cross-domain
+ // If there are no common domains, we must pay the cross-domain
// penalty for this operand.
if (common) available = common;
} else if (common)
@@ -686,6 +756,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
++I)
visitInstr(I);
+ processUndefReads(MBB);
leaveBasicBlock(MBB);
}
@@ -698,6 +769,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
++I)
if (!I->isDebugValue())
processDefs(I, false);
+ processUndefReads(MBB);
leaveBasicBlock(MBB);
}
@@ -713,6 +785,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
delete[] FI->second;
}
LiveOuts.clear();
+ UndefReads.clear();
Avail.clear();
Allocator.DestroyAll();
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index 1611db8..6c73fff 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -104,7 +104,7 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
}
if (DstSubReg == InsReg) {
- // No need to insert an identify copy instruction.
+ // No need to insert an identity copy instruction.
// Watch out for case like this:
// %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3
// We must leave %RAX live.
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 1ae7e3b..e2d0eb4 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -22,6 +22,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -31,6 +33,8 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
using namespace llvm;
// Hidden options for help debugging.
@@ -150,14 +154,17 @@ namespace {
/// BBAnalysis - Results of if-conversion feasibility analysis indexed by
/// basic block number.
std::vector<BBInfo> BBAnalysis;
+ TargetSchedModel SchedModel;
const TargetLoweringBase *TLI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- const InstrItineraryData *InstrItins;
const MachineBranchProbabilityInfo *MBPI;
MachineRegisterInfo *MRI;
+ LiveRegUnits Redefs;
+ LiveRegUnits DontKill;
+
bool PreRegAlloc;
bool MadeChange;
int FnNum;
@@ -198,11 +205,9 @@ namespace {
void PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> &Redefs,
SmallSet<unsigned, 4> *LaterRedefs = 0);
void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> &Redefs,
bool IgnoreBr = false);
void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
@@ -267,7 +272,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getTarget().getRegisterInfo();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MRI = &MF.getRegInfo();
- InstrItins = MF.getTarget().getInstrItineraryData();
+
+ const TargetSubtargetInfo &ST =
+ MF.getTarget().getSubtarget<TargetSubtargetInfo>();
+ SchedModel.init(*ST.getSchedModel(), &ST, TII);
+
if (!TII) return false;
PreRegAlloc = MRI->isSSA();
@@ -666,32 +675,28 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
bool isPredicated = TII->isPredicated(I);
bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch();
- if (!isCondBr) {
- if (!isPredicated) {
- BBI.NonPredSize++;
- unsigned ExtraPredCost = 0;
- unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
- &ExtraPredCost);
- if (NumCycles > 1)
- BBI.ExtraCost += NumCycles-1;
- BBI.ExtraCost2 += ExtraPredCost;
- } else if (!AlreadyPredicated) {
- // FIXME: This instruction is already predicated before the
- // if-conversion pass. It's probably something like a conditional move.
- // Mark this block unpredicable for now.
- BBI.IsUnpredicable = true;
- return;
- }
+ // A conditional branch is not predicable, but it may be eliminated.
+ if (isCondBr)
+ continue;
+
+ if (!isPredicated) {
+ BBI.NonPredSize++;
+ unsigned ExtraPredCost = TII->getPredicationCost(&*I);
+ unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false);
+ if (NumCycles > 1)
+ BBI.ExtraCost += NumCycles-1;
+ BBI.ExtraCost2 += ExtraPredCost;
+ } else if (!AlreadyPredicated) {
+ // FIXME: This instruction is already predicated before the
+ // if-conversion pass. It's probably something like a conditional move.
+ // Mark this block unpredicable for now.
+ BBI.IsUnpredicable = true;
+ return;
}
if (BBI.ClobbersPred && !isPredicated) {
// Predicate modification instruction should end the block (except for
// already predicated instructions and end of block branches).
- if (isCondBr) {
- // A conditional branch is not predicable, but it may be eliminated.
- continue;
- }
-
// Predicate may have been modified, the subsequent (currently)
// unpredicated instructions cannot be correctly predicated.
BBI.IsUnpredicable = true;
@@ -961,64 +966,58 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
}
-/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are
-/// modeled as read + write (sort like two-address instructions). These
-/// routines track register liveness and add implicit uses to if-converted
-/// instructions to conform to the model.
-static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
- const TargetRegisterInfo *TRI) {
- for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
- E = BB->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- Redefs.insert(*SubRegs);
- }
-}
-
-static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
- const TargetRegisterInfo *TRI,
- bool AddImpUse = false) {
- SmallVector<unsigned, 4> Defs;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
+/// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
+/// values defined in MI which are not live/used by MI.
+static void UpdatePredRedefs(MachineInstr *MI, LiveRegUnits &Redefs,
+ const TargetRegisterInfo *TRI) {
+ for (ConstMIBundleOperands Ops(MI); Ops.isValid(); ++Ops) {
+ if (!Ops->isReg() || !Ops->isKill())
continue;
- unsigned Reg = MO.getReg();
- if (!Reg)
+ unsigned Reg = Ops->getReg();
+ if (Reg == 0)
continue;
- if (MO.isDef())
- Defs.push_back(Reg);
- else if (MO.isKill()) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- Redefs.erase(*SubRegs);
- }
+ Redefs.removeReg(Reg, *TRI);
}
- MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
- for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
- unsigned Reg = Defs[i];
- if (!Redefs.insert(Reg)) {
- if (AddImpUse)
- // Treat predicated update as read + write.
- MIB.addReg(Reg, RegState::Implicit | RegState::Undef);
- } else {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- Redefs.insert(*SubRegs);
- }
+ for (MIBundleOperands Ops(MI); Ops.isValid(); ++Ops) {
+ if (!Ops->isReg() || !Ops->isDef())
+ continue;
+ unsigned Reg = Ops->getReg();
+ if (Reg == 0 || Redefs.contains(Reg, *TRI))
+ continue;
+ Redefs.addReg(Reg, *TRI);
+
+ MachineOperand &Op = *Ops;
+ MachineInstr *MI = Op.getParent();
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ MIB.addReg(Reg, RegState::Implicit | RegState::Undef);
}
}
-static void UpdatePredRedefs(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator E,
- SmallSet<unsigned,4> &Redefs,
- const TargetRegisterInfo *TRI) {
- while (I != E) {
- UpdatePredRedefs(I, Redefs, TRI);
- ++I;
+/**
+ * Remove kill flags from operands with a registers in the @p DontKill set.
+ */
+static void RemoveKills(MachineInstr &MI, const LiveRegUnits &DontKill,
+ const MCRegisterInfo &MCRI) {
+ for (MIBundleOperands O(&MI); O.isValid(); ++O) {
+ if (!O->isReg() || !O->isKill())
+ continue;
+ if (DontKill.contains(O->getReg(), MCRI))
+ O->setIsKill(false);
}
}
+/**
+ * Walks a range of machine instructions and removes kill flags for registers
+ * in the @p DontKill set.
+ */
+static void RemoveKills(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ const LiveRegUnits &DontKill,
+ const MCRegisterInfo &MCRI) {
+ for ( ; I != E; ++I)
+ RemoveKills(*I, DontKill, MCRI);
+}
+
/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
///
bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
@@ -1049,21 +1048,27 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
// Initialize liveins to the first BB. These are potentiall redefined by
// predicated instructions.
- SmallSet<unsigned, 4> Redefs;
- InitPredRedefs(CvtBBI->BB, Redefs, TRI);
- InitPredRedefs(NextBBI->BB, Redefs, TRI);
+ Redefs.init(TRI);
+ Redefs.addLiveIns(CvtBBI->BB, *TRI);
+ Redefs.addLiveIns(NextBBI->BB, *TRI);
+
+ // Compute a set of registers which must not be killed by instructions in
+ // BB1: This is everything live-in to BB2.
+ DontKill.init(TRI);
+ DontKill.addLiveIns(NextBBI->BB, *TRI);
if (CvtBBI->BB->pred_size() > 1) {
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
- CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
// RemoveExtraEdges won't work if the block has an unanalyzable branch, so
// explicitly remove CvtBBI as a successor.
BBI.BB->removeSuccessor(CvtBBI->BB);
} else {
- PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+ RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
// Merge converted block into entry block.
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
@@ -1148,16 +1153,18 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// Initialize liveins to the first BB. These are potentially redefined by
// predicated instructions.
- SmallSet<unsigned, 4> Redefs;
- InitPredRedefs(CvtBBI->BB, Redefs, TRI);
- InitPredRedefs(NextBBI->BB, Redefs, TRI);
+ Redefs.init(TRI);
+ Redefs.addLiveIns(CvtBBI->BB, *TRI);
+ Redefs.addLiveIns(NextBBI->BB, *TRI);
+
+ DontKill.clear();
bool HasEarlyExit = CvtBBI->FalseBB != NULL;
if (CvtBBI->BB->pred_size() > 1) {
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
- CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
// RemoveExtraEdges won't work if the block has an unanalyzable branch, so
// explicitly remove CvtBBI as a successor.
@@ -1165,7 +1172,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
} else {
// Predicate the 'true' block after removing its branch.
CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
- PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
// Now merge the entry of the triangle with the true block.
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
@@ -1276,8 +1283,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// Initialize liveins to the first BB. These are potentially redefined by
// predicated instructions.
- SmallSet<unsigned, 4> Redefs;
- InitPredRedefs(BBI1->BB, Redefs, TRI);
+ Redefs.init(TRI);
+ Redefs.addLiveIns(BBI1->BB, *TRI);
// Remove the duplicated instructions at the beginnings of both paths.
MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
@@ -1304,7 +1311,19 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
--NumDups1;
}
- UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI);
+ // Compute a set of registers which must not be killed by instructions in BB1:
+ // This is everything used+live in BB2 after the duplicated instructions. We
+ // can compute this set by simulating liveness backwards from the end of BB2.
+ DontKill.init(TRI);
+ for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(),
+ E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) {
+ DontKill.stepBackward(*I, *TRI);
+ }
+
+ for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E;
+ ++I) {
+ Redefs.stepForward(*I, *TRI);
+ }
BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
BBI2->BB->erase(BBI2->BB->begin(), DI2);
@@ -1322,6 +1341,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
}
BBI1->BB->erase(DI1, BBI1->BB->end());
+ // Kill flags in the true block for registers living into the false block
+ // must be removed.
+ RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI);
+
// Remove 'false' block branch and find the last instruction to predicate.
BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
DI2 = BBI2->BB->end();
@@ -1380,10 +1403,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
}
// Predicate the 'true' block.
- PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse);
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse);
// Predicate the 'false' block.
- PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
+ PredicateBlock(*BBI2, DI2, *Cond2);
// Merge the true block into the entry of the diamond.
MergeBlocks(BBI, *BBI1, TailBB == 0);
@@ -1458,7 +1481,6 @@ static bool MaySpeculate(const MachineInstr *MI,
void IfConverter::PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> &Redefs,
SmallSet<unsigned, 4> *LaterRedefs) {
bool AnyUnpred = false;
bool MaySpec = LaterRedefs != 0;
@@ -1484,7 +1506,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
// If the predicated instruction now redefines a register as the result of
// if-conversion, add an implicit kill.
- UpdatePredRedefs(I, Redefs, TRI, true);
+ UpdatePredRedefs(I, Redefs, TRI);
}
std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
@@ -1501,7 +1523,6 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
/// the destination block. Skip end of block branches if IgnoreBr is true.
void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> &Redefs,
bool IgnoreBr) {
MachineFunction &MF = *ToBBI.BB->getParent();
@@ -1514,8 +1535,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
MachineInstr *MI = MF.CloneMachineInstr(I);
ToBBI.BB->insert(ToBBI.BB->end(), MI);
ToBBI.NonPredSize++;
- unsigned ExtraPredCost = 0;
- unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+ unsigned ExtraPredCost = TII->getPredicationCost(&*I);
+ unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false);
if (NumCycles > 1)
ToBBI.ExtraCost += NumCycles-1;
ToBBI.ExtraCost2 += ExtraPredCost;
@@ -1531,7 +1552,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
// If the predicated instruction now redefines a register as the result of
// if-conversion, add an implicit kill.
- UpdatePredRedefs(MI, Redefs, TRI, true);
+ UpdatePredRedefs(MI, Redefs, TRI);
+
+ // Some kill flags may not be correct anymore.
+ if (!DontKill.empty())
+ RemoveKills(*MI, DontKill, *TRI);
}
if (!IgnoreBr) {
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 8910652..bb0e642 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -179,10 +179,8 @@ private:
bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >,
MachineInstr *LoadMI = 0);
- void insertReload(LiveInterval &NewLI, SlotIndex,
- MachineBasicBlock::iterator MI);
- void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
- SlotIndex, MachineBasicBlock::iterator MI);
+ void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI);
+ void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI);
void spillAroundUses(unsigned Reg);
void spillAll();
@@ -580,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
if (isSibling(SrcReg)) {
LiveInterval &SrcLI = LIS.getInterval(SrcReg);
- LiveRangeQuery SrcQ(SrcLI, VNI->def);
+ LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
assert(SrcQ.valueIn() && "Copy from non-existing value");
// Check if this COPY kills its source.
SVI->second.KillsSource = SrcQ.isKill();
@@ -885,12 +883,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
}
// Alocate a new register for the remat.
- LiveInterval &NewLI = Edit->createFrom(Original);
- NewLI.markNotSpillable();
+ unsigned NewVReg = Edit->createFrom(Original);
// Finally we can rematerialize OrigMI before MI.
- SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
+ SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewVReg, RM,
TRI);
+ (void)DefIdx;
DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
<< *LIS.getInstructionFromIndex(DefIdx));
@@ -898,15 +896,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(Ops[i].second);
if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
- MO.setReg(NewLI.reg);
+ MO.setReg(NewVReg);
MO.setIsKill();
}
}
- DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI);
+ DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI << '\n');
- VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator());
- NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI));
- DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
++NumRemats;
return true;
}
@@ -1009,6 +1004,40 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
return true;
}
+#if !defined(NDEBUG)
+// Dump the range of instructions from B to E with their slot indexes.
+static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
+ MachineBasicBlock::iterator E,
+ LiveIntervals const &LIS,
+ const char *const header,
+ unsigned VReg =0) {
+ char NextLine = '\n';
+ char SlotIndent = '\t';
+
+ if (llvm::next(B) == E) {
+ NextLine = ' ';
+ SlotIndent = ' ';
+ }
+
+ dbgs() << '\t' << header << ": " << NextLine;
+
+ for (MachineBasicBlock::iterator I = B; I != E; ++I) {
+ SlotIndex Idx = LIS.getInstructionIndex(I).getRegSlot();
+
+ // If a register was passed in and this instruction has it as a
+ // destination that is marked as an early clobber, print the
+ // early-clobber slot index.
+ if (VReg) {
+ MachineOperand *MO = I->findRegisterDefOperand(VReg);
+ if (MO && MO->isEarlyClobber())
+ Idx = Idx.getRegSlot(true);
+ }
+
+ dbgs() << SlotIndent << Idx << '\t' << *I;
+ }
+}
+#endif
+
/// foldMemoryOperand - Try folding stack slot references in Ops into their
/// instructions.
///
@@ -1028,6 +1057,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
bool WasCopy = MI->isCopy();
unsigned ImpReg = 0;
+ bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI->getOpcode() == TargetOpcode::STACKMAP);
+
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
// operands.
SmallVector<unsigned, 8> FoldOps;
@@ -1039,7 +1071,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
continue;
}
// FIXME: Teach targets to deal with subregs.
- if (MO.getSubReg())
+ if (!SpillSubRegs && MO.getSubReg())
return false;
// We cannot fold a load instruction into a def.
if (LoadMI && MO.isDef())
@@ -1049,6 +1081,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
FoldOps.push_back(Idx);
}
+ MachineInstrSpan MIS(MI);
+
MachineInstr *FoldMI =
LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI)
: TII.foldMemoryOperand(MI, FoldOps, StackSlot);
@@ -1075,16 +1109,24 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
// FoldMI does not define this physreg. Remove the LI segment.
assert(MO->isDead() && "Cannot fold physreg def");
for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) {
- if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) {
+ if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) {
SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
- if (VNInfo *VNI = LI->getVNInfoAt(Idx))
- LI->removeValNo(VNI);
+ if (VNInfo *VNI = LR->getVNInfoAt(Idx))
+ LR->removeValNo(VNI);
}
}
}
+
LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
MI->eraseFromParent();
+ // Insert any new instructions other than FoldMI into the LIS maps.
+ assert(!MIS.empty() && "Unexpected empty span of instructions!");
+ for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end();
+ MII != End; ++MII)
+ if (&*MII != FoldMI)
+ LIS.InsertMachineInstrInMaps(&*MII);
+
// TII.foldMemoryOperand may have left some implicit operands on the
// instruction. Strip them.
if (ImpReg)
@@ -1096,8 +1138,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
FoldMI->RemoveOperand(i - 1);
}
- DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t'
- << *FoldMI);
+ DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS,
+ "folded"));
+
if (!WasCopy)
++NumFolded;
else if (Ops.front().second == 0)
@@ -1107,36 +1150,35 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
return true;
}
-/// insertReload - Insert a reload of NewLI.reg before MI.
-void InlineSpiller::insertReload(LiveInterval &NewLI,
+void InlineSpiller::insertReload(unsigned NewVReg,
SlotIndex Idx,
MachineBasicBlock::iterator MI) {
MachineBasicBlock &MBB = *MI->getParent();
- TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot,
- MRI.getRegClass(NewLI.reg), &TRI);
- --MI; // Point to load instruction.
- SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
- // Some (out-of-tree) targets have EC reload instructions.
- if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg))
- if (MO->isEarlyClobber())
- LoadIdx = LoadIdx.getRegSlot(true);
- DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
- VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator());
- NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
+
+ MachineInstrSpan MIS(MI);
+ TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot,
+ MRI.getRegClass(NewVReg), &TRI);
+
+ LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI);
+
+ DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload",
+ NewVReg));
++NumReloads;
}
-/// insertSpill - Insert a spill of NewLI.reg after MI.
-void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
- SlotIndex Idx, MachineBasicBlock::iterator MI) {
+/// insertSpill - Insert a spill of NewVReg after MI.
+void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
+ MachineBasicBlock::iterator MI) {
MachineBasicBlock &MBB = *MI->getParent();
- TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot,
- MRI.getRegClass(NewLI.reg), &TRI);
- --MI; // Point to store instruction.
- SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
- DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
- VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
- NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
+
+ MachineInstrSpan MIS(MI);
+ TII.storeRegToStackSlot(MBB, llvm::next(MI), NewVReg, isKill, StackSlot,
+ MRI.getRegClass(NewVReg), &TRI);
+
+ LIS.InsertMachineInstrRangeInMaps(llvm::next(MI), MIS.end());
+
+ DEBUG(dumpMachineInstrRangeWithSlotIndex(llvm::next(MI), MIS.end(), LIS,
+ "spill"));
++NumSpills;
}
@@ -1152,7 +1194,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
// Debug values are not allowed to affect codegen.
if (MI->isDebugValue()) {
// Modify DBG_VALUE now that the value is in a spill slot.
- bool IsIndirect = MI->getOperand(1).isImm();
+ bool IsIndirect = MI->isIndirectDebugValue();
uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
const MDNode *MDPtr = MI->getOperand(2).getMetadata();
DebugLoc DL = MI->getDebugLoc();
@@ -1212,19 +1254,18 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
if (foldMemoryOperand(Ops))
continue;
- // Allocate interval around instruction.
+ // Create a new virtual register for spill/fill.
// FIXME: Infer regclass from instruction alone.
- LiveInterval &NewLI = Edit->createFrom(Reg);
- NewLI.markNotSpillable();
+ unsigned NewVReg = Edit->createFrom(Reg);
if (RI.Reads)
- insertReload(NewLI, Idx, MI);
+ insertReload(NewVReg, Idx, MI);
// Rewrite instruction operands.
bool hasLiveDef = false;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
- MO.setReg(NewLI.reg);
+ MO.setReg(NewVReg);
if (MO.isUse()) {
if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second))
MO.setIsKill();
@@ -1233,21 +1274,12 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
hasLiveDef = true;
}
}
- DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
+ DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n');
// FIXME: Use a second vreg if instruction has no tied ops.
- if (RI.Writes) {
+ if (RI.Writes)
if (hasLiveDef)
- insertSpill(NewLI, OldLI, Idx, MI);
- else {
- // This instruction defines a dead value. We don't need to spill it,
- // but do create a live range for the dead value.
- VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
- NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI));
- }
- }
-
- DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+ insertSpill(NewVReg, true, MI);
}
}
@@ -1266,8 +1298,8 @@ void InlineSpiller::spillAll() {
assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
- StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]),
- StackInt->getValNumInfo(0));
+ StackInt->MergeSegmentsInAsValue(LIS.getInterval(RegsToSpill[i]),
+ StackInt->getValNumInfo(0));
DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
// Spill around uses of all RegsToSpill.
@@ -1308,8 +1340,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
DEBUG(dbgs() << "Inline spilling "
<< MRI.getRegClass(edit.getReg())->getName()
- << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent()
- << "\nFrom original " << LIS.getInterval(Original) << '\n');
+ << ':' << edit.getParent()
+ << "\nFrom original " << PrintReg(Original) << '\n');
assert(edit.getParent().isSpillable() &&
"Attempting to spill already spilled value.");
assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index a8e711e..427225d 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -204,11 +204,11 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
// Fixed interference.
for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
LiveInterval::iterator &I = RegUnits[i].FixedI;
- LiveInterval *LI = RegUnits[i].Fixed;
- if (I == LI->end() || I->start >= Stop)
+ LiveRange *LR = RegUnits[i].Fixed;
+ if (I == LR->end() || I->start >= Stop)
continue;
- I = LI->advanceTo(I, Stop);
- bool Backup = I == LI->end() || I->start >= Stop;
+ I = LR->advanceTo(I, Stop);
+ bool Backup = I == LR->end() || I->start >= Stop;
if (Backup)
--I;
SlotIndex StopI = I->end;
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index c02fb9a..800f705 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -72,7 +72,7 @@ class InterferenceCache {
unsigned VirtTag;
/// Fixed interference in RegUnit.
- LiveInterval *Fixed;
+ LiveRange *Fixed;
/// Iterator pointing into the fixed RegUnit interference.
LiveInterval::iterator FixedI;
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index d894f66..c38d4fb 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -485,11 +485,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
case Intrinsic::memset: {
- Type *IntPtr = TD.getIntPtrType(Context);
+ Value *Op0 = CI->getArgOperand(0);
+ Type *IntPtr = TD.getIntPtrType(Op0->getType());
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
- Ops[0] = CI->getArgOperand(0);
+ Ops[0] = Op0;
// Extend the amount to i32.
Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1),
Type::getInt32Ty(Context),
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 6c9b2e5..ad2c553 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -175,12 +175,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
// Create a code emitter if asked to show the encoding.
MCCodeEmitter *MCE = 0;
- MCAsmBackend *MAB = 0;
- if (ShowMCEncoding) {
+ if (ShowMCEncoding)
MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context);
- MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU);
- }
+ MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
+ TargetCPU);
MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
getVerboseAsm(),
hasMCUseLoc(),
@@ -197,7 +196,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
// emission fails.
MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI,
*Context);
- MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(),
+ MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
TargetCPU);
if (MCE == 0 || MAB == 0)
return true;
@@ -232,7 +231,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
/// get machine code emitted. This uses a JITCodeEmitter object to handle
/// actually outputting the machine code and resolving things like the address
-/// of functions. This method should returns true if machine code emission is
+/// of functions. This method should return true if machine code emission is
/// not supported.
///
bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
@@ -271,7 +270,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
STI, *Ctx);
- MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU);
+ MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
+ TargetCPU);
if (MCE == 0 || MAB == 0)
return true;
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 85bed46..25645e0 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -131,7 +131,8 @@ class UserValue {
/// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
/// is live. Returns true if any changes were made.
- bool splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+ bool splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
+ LiveIntervals &LIS);
public:
/// UserValue - Create a new UserValue.
@@ -219,13 +220,13 @@ public:
/// End points where VNI is no longer live are added to Kills.
/// @param Idx Starting point for the definition.
/// @param LocNo Location number to propagate.
- /// @param LI Restrict liveness to where LI has the value VNI. May be null.
- /// @param VNI When LI is not null, this is the value to restrict to.
+ /// @param LR Restrict liveness to where LR has the value VNI. May be null.
+ /// @param VNI When LR is not null, this is the value to restrict to.
/// @param Kills Append end points of VNI's live range to Kills.
/// @param LIS Live intervals analysis.
/// @param MDT Dominator tree.
void extendDef(SlotIndex Idx, unsigned LocNo,
- LiveInterval *LI, const VNInfo *VNI,
+ LiveRange *LR, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS, MachineDominatorTree &MDT,
UserValueScopes &UVS);
@@ -251,7 +252,8 @@ public:
/// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
/// live. Returns true if any changes were made.
- bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
+ bool splitRegister(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
+ LiveIntervals &LIS);
/// rewriteLocations - Rewrite virtual register locations according to the
/// provided virtual register map.
@@ -345,7 +347,7 @@ public:
void mapVirtReg(unsigned VirtReg, UserValue *EC);
/// splitRegister - Replace all references to OldReg with NewRegs.
- void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+ void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs);
/// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
void emitDebugValues(VirtRegMap *VRM);
@@ -455,9 +457,10 @@ bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
}
// Get or create the UserValue for (variable,offset).
- bool IsIndirect = MI->getOperand(1).isImm();
+ bool IsIndirect = MI->isIndirectDebugValue();
unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
const MDNode *Var = MI->getOperand(2).getMetadata();
+ //here.
UserValue *UV = getUserValue(Var, Offset, IsIndirect, MI->getDebugLoc());
UV->addDef(Idx, MI->getOperand(0));
return true;
@@ -492,7 +495,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
}
void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
- LiveInterval *LI, const VNInfo *VNI,
+ LiveRange *LR, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
LiveIntervals &LIS, MachineDominatorTree &MDT,
UserValueScopes &UVS) {
@@ -506,15 +509,15 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
// Limit to VNI's live range.
bool ToEnd = true;
- if (LI && VNI) {
- LiveRange *Range = LI->getLiveRangeContaining(Start);
- if (!Range || Range->valno != VNI) {
+ if (LR && VNI) {
+ LiveInterval::Segment *Segment = LR->getSegmentContaining(Start);
+ if (!Segment || Segment->valno != VNI) {
if (Kills)
Kills->push_back(Start);
continue;
}
- if (Range->end < Stop)
- Stop = Range->end, ToEnd = false;
+ if (Segment->end < Stop)
+ Stop = Segment->end, ToEnd = false;
}
// There could already be a short def at Start.
@@ -666,10 +669,10 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
// For physregs, use the live range of the first regunit as a guide.
unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI);
- LiveInterval *LI = &LIS.getRegUnit(Unit);
- const VNInfo *VNI = LI->getVNInfoAt(Idx);
+ LiveRange *LR = &LIS.getRegUnit(Unit);
+ const VNInfo *VNI = LR->getVNInfoAt(Idx);
// Don't track copies from physregs, it is too expensive.
- extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS);
+ extendDef(Idx, LocNo, LR, VNI, 0, LIS, MDT, UVS);
}
// Finally, erase all the undefs.
@@ -729,7 +732,8 @@ LiveDebugVariables::~LiveDebugVariables() {
//===----------------------------------------------------------------------===//
bool
-UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) {
+UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
+ LiveIntervals& LIS) {
DEBUG({
dbgs() << "Splitting Loc" << OldLocNo << '\t';
print(dbgs(), 0);
@@ -738,7 +742,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) {
LocMap::iterator LocMapI;
LocMapI.setMap(locInts);
for (unsigned i = 0; i != NewRegs.size(); ++i) {
- LiveInterval *LI = NewRegs[i];
+ LiveInterval *LI = &LIS.getInterval(NewRegs[i]);
if (LI->empty())
continue;
@@ -827,7 +831,8 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) {
}
bool
-UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
+ LiveIntervals &LIS) {
bool DidChange = false;
// Split locations referring to OldReg. Iterate backwards so splitLocation can
// safely erase unused locations.
@@ -836,15 +841,15 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
const MachineOperand *Loc = &locations[LocNo];
if (!Loc->isReg() || Loc->getReg() != OldReg)
continue;
- DidChange |= splitLocation(LocNo, NewRegs);
+ DidChange |= splitLocation(LocNo, NewRegs, LIS);
}
return DidChange;
}
-void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) {
bool DidChange = false;
for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
- DidChange |= UV->splitRegister(OldReg, NewRegs);
+ DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
if (!DidChange)
return;
@@ -852,11 +857,11 @@ void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
// Map all of the new virtual registers.
UserValue *UV = lookupVirtReg(OldReg);
for (unsigned i = 0; i != NewRegs.size(); ++i)
- mapVirtReg(NewRegs[i]->reg, UV);
+ mapVirtReg(NewRegs[i], UV);
}
void LiveDebugVariables::
-splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
+splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) {
if (pImpl)
static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
}
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 3ce3c39..58a3f0f 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -27,6 +27,7 @@
namespace llvm {
class LiveInterval;
+class LiveIntervals;
class VirtRegMap;
class LiveDebugVariables : public MachineFunctionPass {
@@ -47,7 +48,8 @@ public:
/// splitRegister - Move any user variables in OldReg to the live ranges in
/// NewRegs where they are live. Mark the values as unavailable where no new
/// register is live.
- void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
+ void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
+ LiveIntervals &LIS);
/// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
/// that happened during register allocation.
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 6be6bf3..2b8feb8 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -9,12 +9,12 @@
//
// This file implements the LiveRange and LiveInterval classes. Given some
// numbering of each the machine instructions an interval [i, j) is said to be a
-// live interval for register v if there is no instruction with number j' > j
+// live range for register v if there is no instruction with number j' >= j
// such that v is live at j' and there is no instruction with number i' < i such
-// that v is live at i'. In this implementation intervals can have holes,
-// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each
-// individual range is represented as an instance of LiveRange, and the whole
-// interval is represented as an instance of LiveInterval.
+// that v is live at i'. In this implementation ranges can have holes,
+// i.e. a range might look like [1,20), [50,65), [1000,1001). Each
+// individual segment is represented as an instance of LiveRange::Segment,
+// and the whole range is represented as an instance of LiveRange.
//
//===----------------------------------------------------------------------===//
@@ -31,14 +31,14 @@
#include <algorithm>
using namespace llvm;
-LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
+LiveRange::iterator LiveRange::find(SlotIndex Pos) {
// This algorithm is basically std::upper_bound.
// Unfortunately, std::upper_bound cannot be used with mixed types until we
// adopt C++0x. Many libraries can do it, but not all.
if (empty() || Pos >= endIndex())
return end();
iterator I = begin();
- size_t Len = ranges.size();
+ size_t Len = size();
do {
size_t Mid = Len >> 1;
if (Pos < I[Mid].end)
@@ -49,13 +49,13 @@ LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
return I;
}
-VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
- VNInfo::Allocator &VNInfoAllocator) {
+VNInfo *LiveRange::createDeadDef(SlotIndex Def,
+ VNInfo::Allocator &VNInfoAllocator) {
assert(!Def.isDead() && "Cannot define a value at the dead slot");
iterator I = find(Def);
if (I == end()) {
VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
- ranges.push_back(LiveRange(Def, Def.getDeadSlot(), VNI));
+ segments.push_back(Segment(Def, Def.getDeadSlot(), VNI));
return VNI;
}
if (SlotIndex::isSameInstr(Def, I->start)) {
@@ -73,11 +73,11 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
}
assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def");
VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
- ranges.insert(I, LiveRange(Def, Def.getDeadSlot(), VNI));
+ segments.insert(I, Segment(Def, Def.getDeadSlot(), VNI));
return VNI;
}
-// overlaps - Return true if the intersection of the two live intervals is
+// overlaps - Return true if the intersection of the two live ranges is
// not empty.
//
// An example for overlaps():
@@ -86,7 +86,7 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
// 4: B = ...
// 8: C = A + B ;; last use of A
//
-// The live intervals should look like:
+// The live ranges should look like:
//
// A = [3, 11)
// B = [7, x)
@@ -95,9 +95,9 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
// A->overlaps(C) should return false since we want to be able to join
// A and C.
//
-bool LiveInterval::overlapsFrom(const LiveInterval& other,
- const_iterator StartPos) const {
- assert(!empty() && "empty interval");
+bool LiveRange::overlapsFrom(const LiveRange& other,
+ const_iterator StartPos) const {
+ assert(!empty() && "empty range");
const_iterator i = begin();
const_iterator ie = end();
const_iterator j = StartPos;
@@ -108,13 +108,13 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other,
if (i->start < j->start) {
i = std::upper_bound(i, ie, j->start);
- if (i != ranges.begin()) --i;
+ if (i != begin()) --i;
} else if (j->start < i->start) {
++StartPos;
if (StartPos != other.end() && StartPos->start <= i->start) {
assert(StartPos < other.end() && i < end());
j = std::upper_bound(j, je, i->start);
- if (j != other.ranges.begin()) --j;
+ if (j != other.begin()) --j;
}
} else {
return true;
@@ -136,10 +136,9 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other,
return false;
}
-bool LiveInterval::overlaps(const LiveInterval &Other,
- const CoalescerPair &CP,
- const SlotIndexes &Indexes) const {
- assert(!empty() && "empty interval");
+bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP,
+ const SlotIndexes &Indexes) const {
+ assert(!empty() && "empty range");
if (Other.empty())
return false;
@@ -178,9 +177,9 @@ bool LiveInterval::overlaps(const LiveInterval &Other,
}
}
-/// overlaps - Return true if the live interval overlaps a range specified
+/// overlaps - Return true if the live range overlaps an interval specified
/// by [Start, End).
-bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
+bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const {
assert(Start < End && "Invalid range");
const_iterator I = std::lower_bound(begin(), end(), End);
return I != begin() && (--I)->end > Start;
@@ -190,7 +189,7 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
/// ValNo is dead, remove it. If it is the largest value number, just nuke it
/// (and any other deleted values neighboring it), otherwise mark it as ~1U so
/// it can be nuked later.
-void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
+void LiveRange::markValNoForDeletion(VNInfo *ValNo) {
if (ValNo->id == getNumValNums()-1) {
do {
valnos.pop_back();
@@ -202,137 +201,135 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
/// RenumberValues - Renumber all values in order of appearance and delete the
/// remaining unused values.
-void LiveInterval::RenumberValues(LiveIntervals &lis) {
+void LiveRange::RenumberValues() {
SmallPtrSet<VNInfo*, 8> Seen;
valnos.clear();
for (const_iterator I = begin(), E = end(); I != E; ++I) {
VNInfo *VNI = I->valno;
if (!Seen.insert(VNI))
continue;
- assert(!VNI->isUnused() && "Unused valno used by live range");
+ assert(!VNI->isUnused() && "Unused valno used by live segment");
VNI->id = (unsigned)valnos.size();
valnos.push_back(VNI);
}
}
-/// extendIntervalEndTo - This method is used when we want to extend the range
-/// specified by I to end at the specified endpoint. To do this, we should
-/// merge and eliminate all ranges that this will overlap with. The iterator is
-/// not invalidated.
-void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
- assert(I != ranges.end() && "Not a valid interval!");
+/// This method is used when we want to extend the segment specified by I to end
+/// at the specified endpoint. To do this, we should merge and eliminate all
+/// segments that this will overlap with. The iterator is not invalidated.
+void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) {
+ assert(I != end() && "Not a valid segment!");
VNInfo *ValNo = I->valno;
- // Search for the first interval that we can't merge with.
- Ranges::iterator MergeTo = llvm::next(I);
- for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
+ // Search for the first segment that we can't merge with.
+ iterator MergeTo = llvm::next(I);
+ for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) {
assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
}
- // If NewEnd was in the middle of an interval, make sure to get its endpoint.
+ // If NewEnd was in the middle of a segment, make sure to get its endpoint.
I->end = std::max(NewEnd, prior(MergeTo)->end);
- // If the newly formed range now touches the range after it and if they have
- // the same value number, merge the two ranges into one range.
- if (MergeTo != ranges.end() && MergeTo->start <= I->end &&
+ // If the newly formed segment now touches the segment after it and if they
+ // have the same value number, merge the two segments into one segment.
+ if (MergeTo != end() && MergeTo->start <= I->end &&
MergeTo->valno == ValNo) {
I->end = MergeTo->end;
++MergeTo;
}
- // Erase any dead ranges.
- ranges.erase(llvm::next(I), MergeTo);
+ // Erase any dead segments.
+ segments.erase(llvm::next(I), MergeTo);
}
-/// extendIntervalStartTo - This method is used when we want to extend the range
-/// specified by I to start at the specified endpoint. To do this, we should
-/// merge and eliminate all ranges that this will overlap with.
-LiveInterval::Ranges::iterator
-LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) {
- assert(I != ranges.end() && "Not a valid interval!");
+/// This method is used when we want to extend the segment specified by I to
+/// start at the specified endpoint. To do this, we should merge and eliminate
+/// all segments that this will overlap with.
+LiveRange::iterator
+LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) {
+ assert(I != end() && "Not a valid segment!");
VNInfo *ValNo = I->valno;
- // Search for the first interval that we can't merge with.
- Ranges::iterator MergeTo = I;
+ // Search for the first segment that we can't merge with.
+ iterator MergeTo = I;
do {
- if (MergeTo == ranges.begin()) {
+ if (MergeTo == begin()) {
I->start = NewStart;
- ranges.erase(MergeTo, I);
+ segments.erase(MergeTo, I);
return I;
}
assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
--MergeTo;
} while (NewStart <= MergeTo->start);
- // If we start in the middle of another interval, just delete a range and
- // extend that interval.
+ // If we start in the middle of another segment, just delete a range and
+ // extend that segment.
if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
MergeTo->end = I->end;
} else {
- // Otherwise, extend the interval right after.
+ // Otherwise, extend the segment right after.
++MergeTo;
MergeTo->start = NewStart;
MergeTo->end = I->end;
}
- ranges.erase(llvm::next(MergeTo), llvm::next(I));
+ segments.erase(llvm::next(MergeTo), llvm::next(I));
return MergeTo;
}
-LiveInterval::iterator
-LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
- SlotIndex Start = LR.start, End = LR.end;
- iterator it = std::upper_bound(From, ranges.end(), Start);
+LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) {
+ SlotIndex Start = S.start, End = S.end;
+ iterator it = std::upper_bound(From, end(), Start);
- // If the inserted interval starts in the middle or right at the end of
- // another interval, just extend that interval to contain the range of LR.
- if (it != ranges.begin()) {
+ // If the inserted segment starts in the middle or right at the end of
+ // another segment, just extend that segment to contain the segment of S.
+ if (it != begin()) {
iterator B = prior(it);
- if (LR.valno == B->valno) {
+ if (S.valno == B->valno) {
if (B->start <= Start && B->end >= Start) {
- extendIntervalEndTo(B, End);
+ extendSegmentEndTo(B, End);
return B;
}
} else {
- // Check to make sure that we are not overlapping two live ranges with
+ // Check to make sure that we are not overlapping two live segments with
// different valno's.
assert(B->end <= Start &&
- "Cannot overlap two LiveRanges with differing ValID's"
+ "Cannot overlap two segments with differing ValID's"
" (did you def the same reg twice in a MachineInstr?)");
}
}
- // Otherwise, if this range ends in the middle of, or right next to, another
- // interval, merge it into that interval.
- if (it != ranges.end()) {
- if (LR.valno == it->valno) {
+ // Otherwise, if this segment ends in the middle of, or right next to, another
+ // segment, merge it into that segment.
+ if (it != end()) {
+ if (S.valno == it->valno) {
if (it->start <= End) {
- it = extendIntervalStartTo(it, Start);
+ it = extendSegmentStartTo(it, Start);
- // If LR is a complete superset of an interval, we may need to grow its
+ // If S is a complete superset of a segment, we may need to grow its
// endpoint as well.
if (End > it->end)
- extendIntervalEndTo(it, End);
+ extendSegmentEndTo(it, End);
return it;
}
} else {
- // Check to make sure that we are not overlapping two live ranges with
+ // Check to make sure that we are not overlapping two live segments with
// different valno's.
assert(it->start >= End &&
- "Cannot overlap two LiveRanges with differing ValID's");
+ "Cannot overlap two segments with differing ValID's");
}
}
- // Otherwise, this is just a new range that doesn't interact with anything.
+ // Otherwise, this is just a new segment that doesn't interact with anything.
// Insert it.
- return ranges.insert(it, LR);
+ return segments.insert(it, S);
}
-/// extendInBlock - If this interval is live before Kill in the basic
+/// extendInBlock - If this range is live before Kill in the basic
/// block that starts at StartIdx, extend it to be live up to Kill and return
/// the value. If there is no live range before Kill, return NULL.
-VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
+VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
if (empty())
return 0;
iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot());
@@ -342,20 +339,21 @@ VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
if (I->end <= StartIdx)
return 0;
if (I->end < Kill)
- extendIntervalEndTo(I, Kill);
+ extendSegmentEndTo(I, Kill);
return I->valno;
}
-/// removeRange - Remove the specified range from this interval. Note that
-/// the range must be in a single LiveRange in its entirety.
-void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
- bool RemoveDeadValNo) {
- // Find the LiveRange containing this span.
- Ranges::iterator I = find(Start);
- assert(I != ranges.end() && "Range is not in interval!");
- assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
+/// Remove the specified segment from this range. Note that the segment must
+/// be in a single Segment in its entirety.
+void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
+ bool RemoveDeadValNo) {
+ // Find the Segment containing this span.
+ iterator I = find(Start);
+ assert(I != end() && "Segment is not in range!");
+ assert(I->containsInterval(Start, End)
+ && "Segment is not entirely in range!");
- // If the span we are removing is at the start of the LiveRange, adjust it.
+ // If the span we are removing is at the start of the Segment, adjust it.
VNInfo *ValNo = I->valno;
if (I->start == Start) {
if (I->end == End) {
@@ -373,54 +371,50 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
}
}
- ranges.erase(I); // Removed the whole LiveRange.
+ segments.erase(I); // Removed the whole Segment.
} else
I->start = End;
return;
}
- // Otherwise if the span we are removing is at the end of the LiveRange,
+ // Otherwise if the span we are removing is at the end of the Segment,
// adjust the other way.
if (I->end == End) {
I->end = Start;
return;
}
- // Otherwise, we are splitting the LiveRange into two pieces.
+ // Otherwise, we are splitting the Segment into two pieces.
SlotIndex OldEnd = I->end;
- I->end = Start; // Trim the old interval.
+ I->end = Start; // Trim the old segment.
// Insert the new one.
- ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo));
+ segments.insert(llvm::next(I), Segment(End, OldEnd, ValNo));
}
-/// removeValNo - Remove all the ranges defined by the specified value#.
+/// removeValNo - Remove all the segments defined by the specified value#.
/// Also remove the value# from value# list.
-void LiveInterval::removeValNo(VNInfo *ValNo) {
+void LiveRange::removeValNo(VNInfo *ValNo) {
if (empty()) return;
- Ranges::iterator I = ranges.end();
- Ranges::iterator E = ranges.begin();
+ iterator I = end();
+ iterator E = begin();
do {
--I;
if (I->valno == ValNo)
- ranges.erase(I);
+ segments.erase(I);
} while (I != E);
// Now that ValNo is dead, remove it.
markValNoForDeletion(ValNo);
}
-/// join - Join two live intervals (this, and other) together. This applies
-/// mappings to the value numbers in the LHS/RHS intervals as specified. If
-/// the intervals are not joinable, this aborts.
-void LiveInterval::join(LiveInterval &Other,
- const int *LHSValNoAssignments,
- const int *RHSValNoAssignments,
- SmallVectorImpl<VNInfo *> &NewVNInfo,
- MachineRegisterInfo *MRI) {
+void LiveRange::join(LiveRange &Other,
+ const int *LHSValNoAssignments,
+ const int *RHSValNoAssignments,
+ SmallVectorImpl<VNInfo *> &NewVNInfo) {
verify();
- // Determine if any of our live range values are mapped. This is uncommon, so
- // we want to avoid the interval scan if not.
+ // Determine if any of our values are mapped. This is uncommon, so we want
+ // to avoid the range scan if not.
bool MustMapCurValNos = false;
unsigned NumVals = getNumValNums();
unsigned NumNewVals = NewVNInfo.size();
@@ -433,8 +427,7 @@ void LiveInterval::join(LiveInterval &Other,
}
}
- // If we have to apply a mapping to our base interval assignment, rewrite it
- // now.
+ // If we have to apply a mapping to our base range assignment, rewrite it now.
if (MustMapCurValNos && !empty()) {
// Map the first live range.
@@ -445,12 +438,12 @@ void LiveInterval::join(LiveInterval &Other,
assert(nextValNo != 0 && "Huh?");
// If this live range has the same value # as its immediate predecessor,
- // and if they are neighbors, remove one LiveRange. This happens when we
+ // and if they are neighbors, remove one Segment. This happens when we
// have [0,4:0)[4,7:1) and map 0/1 onto the same value #.
if (OutIt->valno == nextValNo && OutIt->end == I->start) {
OutIt->end = I->end;
} else {
- // Didn't merge. Move OutIt to the next interval,
+ // Didn't merge. Move OutIt to the next segment,
++OutIt;
OutIt->valno = nextValNo;
if (OutIt != I) {
@@ -459,9 +452,9 @@ void LiveInterval::join(LiveInterval &Other,
}
}
}
- // If we merge some live ranges, chop off the end.
+ // If we merge some segments, chop off the end.
++OutIt;
- ranges.erase(OutIt, end());
+ segments.erase(OutIt, end());
}
// Rewrite Other values before changing the VNInfo ids.
@@ -472,7 +465,7 @@ void LiveInterval::join(LiveInterval &Other,
I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]];
// Update val# info. Renumber them and make sure they all belong to this
- // LiveInterval now. Also remove dead val#'s.
+ // LiveRange now. Also remove dead val#'s.
unsigned NumValNos = 0;
for (unsigned i = 0; i < NumNewVals; ++i) {
VNInfo *VNI = NewVNInfo[i];
@@ -487,31 +480,31 @@ void LiveInterval::join(LiveInterval &Other,
if (NumNewVals < NumVals)
valnos.resize(NumNewVals); // shrinkify
- // Okay, now insert the RHS live ranges into the LHS.
+ // Okay, now insert the RHS live segments into the LHS.
LiveRangeUpdater Updater(this);
for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
Updater.add(*I);
}
-/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
-/// interval as the specified value number. The LiveRanges in RHS are
-/// allowed to overlap with LiveRanges in the current interval, but only if
-/// the overlapping LiveRanges have the specified value number.
-void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
- VNInfo *LHSValNo) {
+/// Merge all of the segments in RHS into this live range as the specified
+/// value number. The segments in RHS are allowed to overlap with segments in
+/// the current range, but only if the overlapping segments have the
+/// specified value number.
+void LiveRange::MergeSegmentsInAsValue(const LiveRange &RHS,
+ VNInfo *LHSValNo) {
LiveRangeUpdater Updater(this);
for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
Updater.add(I->start, I->end, LHSValNo);
}
-/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
-/// in RHS into this live interval as the specified value number.
-/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
-/// current interval, it will replace the value numbers of the overlaped
-/// live ranges with the specified value number.
-void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
- const VNInfo *RHSValNo,
- VNInfo *LHSValNo) {
+/// MergeValueInAsValue - Merge all of the live segments of a specific val#
+/// in RHS into this live range as the specified value number.
+/// The segments in RHS are allowed to overlap with segments in the
+/// current range, it will replace the value numbers of the overlaped
+/// segments with the specified value number.
+void LiveRange::MergeValueInAsValue(const LiveRange &RHS,
+ const VNInfo *RHSValNo,
+ VNInfo *LHSValNo) {
LiveRangeUpdater Updater(this);
for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
if (I->valno == RHSValNo)
@@ -520,9 +513,9 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
/// MergeValueNumberInto - This method is called when two value nubmers
/// are found to be equivalent. This eliminates V1, replacing all
-/// LiveRanges with the V1 value number with the V2 value number. This can
+/// segments with the V1 value number with the V2 value number. This can
/// cause merging of V1/V2 values numbers and compaction of the value space.
-VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
+VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
assert(V1 != V2 && "Identical value#'s are always equivalent!");
// This code actually merges the (numerically) larger value number into the
@@ -536,37 +529,37 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
std::swap(V1, V2);
}
- // Merge V1 live ranges into V2.
+ // Merge V1 segments into V2.
for (iterator I = begin(); I != end(); ) {
- iterator LR = I++;
- if (LR->valno != V1) continue; // Not a V1 LiveRange.
+ iterator S = I++;
+ if (S->valno != V1) continue; // Not a V1 Segment.
// Okay, we found a V1 live range. If it had a previous, touching, V2 live
// range, extend it.
- if (LR != begin()) {
- iterator Prev = LR-1;
- if (Prev->valno == V2 && Prev->end == LR->start) {
- Prev->end = LR->end;
+ if (S != begin()) {
+ iterator Prev = S-1;
+ if (Prev->valno == V2 && Prev->end == S->start) {
+ Prev->end = S->end;
// Erase this live-range.
- ranges.erase(LR);
+ segments.erase(S);
I = Prev+1;
- LR = Prev;
+ S = Prev;
}
}
// Okay, now we have a V1 or V2 live range that is maximally merged forward.
// Ensure that it is a V2 live-range.
- LR->valno = V2;
+ S->valno = V2;
- // If we can merge it into later V2 live ranges, do so now. We ignore any
- // following V1 live ranges, as they will be merged in subsequent iterations
+ // If we can merge it into later V2 segments, do so now. We ignore any
+ // following V1 segments, as they will be merged in subsequent iterations
// of the loop.
if (I != end()) {
- if (I->start == LR->end && I->valno == V2) {
- LR->end = I->end;
- ranges.erase(I);
- I = LR+1;
+ if (I->start == S->end && I->valno == V2) {
+ S->end = I->end;
+ segments.erase(I);
+ I = S+1;
}
}
}
@@ -584,22 +577,21 @@ unsigned LiveInterval::getSize() const {
return Sum;
}
-raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
- return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
+raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) {
+ return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ")";
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void LiveRange::dump() const {
+void LiveRange::Segment::dump() const {
dbgs() << *this << "\n";
}
#endif
-void LiveInterval::print(raw_ostream &OS) const {
+void LiveRange::print(raw_ostream &OS) const {
if (empty())
OS << "EMPTY";
else {
- for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
- E = ranges.end(); I != E; ++I) {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
OS << *I;
assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo");
}
@@ -625,19 +617,29 @@ void LiveInterval::print(raw_ostream &OS) const {
}
}
+void LiveInterval::print(raw_ostream &OS) const {
+ OS << PrintReg(reg) << ' ';
+ super::print(OS);
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void LiveRange::dump() const {
+ dbgs() << *this << "\n";
+}
+
void LiveInterval::dump() const {
dbgs() << *this << "\n";
}
#endif
#ifndef NDEBUG
-void LiveInterval::verify() const {
+void LiveRange::verify() const {
for (const_iterator I = begin(), E = end(); I != E; ++I) {
assert(I->start.isValid());
assert(I->end.isValid());
assert(I->start < I->end);
assert(I->valno != 0);
+ assert(I->valno->id < valnos.size());
assert(I->valno == valnos[I->valno->id]);
if (llvm::next(I) != E) {
assert(I->end <= llvm::next(I)->start);
@@ -649,10 +651,6 @@ void LiveInterval::verify() const {
#endif
-void LiveRange::print(raw_ostream &os) const {
- os << *this;
-}
-
//===----------------------------------------------------------------------===//
// LiveRangeUpdater class
//===----------------------------------------------------------------------===//
@@ -665,11 +663,11 @@ void LiveRange::print(raw_ostream &os) const {
//
// Otherwise, segments are kept in three separate areas:
//
-// 1. [begin; WriteI) at the front of LI.
-// 2. [ReadI; end) at the back of LI.
+// 1. [begin; WriteI) at the front of LR.
+// 2. [ReadI; end) at the back of LR.
// 3. Spills.
//
-// - LI.begin() <= WriteI <= ReadI <= LI.end().
+// - LR.begin() <= WriteI <= ReadI <= LR.end().
// - Segments in all three areas are fully ordered and coalesced.
// - Segments in area 1 precede and can't coalesce with segments in area 2.
// - Segments in Spills precede and can't coalesce with segments in area 2.
@@ -684,23 +682,23 @@ void LiveRange::print(raw_ostream &os) const {
void LiveRangeUpdater::print(raw_ostream &OS) const {
if (!isDirty()) {
- if (LI)
- OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n';
+ if (LR)
+ OS << "Clean updater: " << *LR << '\n';
else
OS << "Null updater.\n";
return;
}
- assert(LI && "Can't have null LI in dirty updater.");
- OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI)
+ assert(LR && "Can't have null LR in dirty updater.");
+ OS << " updater with gap = " << (ReadI - WriteI)
<< ", last start = " << LastStart
<< ":\n Area 1:";
- for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I)
+ for (LiveRange::const_iterator I = LR->begin(); I != WriteI; ++I)
OS << ' ' << *I;
OS << "\n Spills:";
for (unsigned I = 0, E = Spills.size(); I != E; ++I)
OS << ' ' << Spills[I];
OS << "\n Area 2:";
- for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I)
+ for (LiveRange::const_iterator I = ReadI, E = LR->end(); I != E; ++I)
OS << ' ' << *I;
OS << '\n';
}
@@ -711,8 +709,9 @@ void LiveRangeUpdater::dump() const
}
// Determine if A and B should be coalesced.
-static inline bool coalescable(const LiveRange &A, const LiveRange &B) {
- assert(A.start <= B.start && "Unordered live ranges.");
+static inline bool coalescable(const LiveRange::Segment &A,
+ const LiveRange::Segment &B) {
+ assert(A.start <= B.start && "Unordered live segments.");
if (A.end == B.start)
return A.valno == B.valno;
if (A.end < B.start)
@@ -721,8 +720,8 @@ static inline bool coalescable(const LiveRange &A, const LiveRange &B) {
return true;
}
-void LiveRangeUpdater::add(LiveRange Seg) {
- assert(LI && "Cannot add to a null destination");
+void LiveRangeUpdater::add(LiveRange::Segment Seg) {
+ assert(LR && "Cannot add to a null destination");
// Flush the state if Start moves backwards.
if (!LastStart.isValid() || LastStart > Seg.start) {
@@ -730,21 +729,21 @@ void LiveRangeUpdater::add(LiveRange Seg) {
flush();
// This brings us to an uninitialized state. Reinitialize.
assert(Spills.empty() && "Leftover spilled segments");
- WriteI = ReadI = LI->begin();
+ WriteI = ReadI = LR->begin();
}
// Remember start for next time.
LastStart = Seg.start;
// Advance ReadI until it ends after Seg.start.
- LiveInterval::iterator E = LI->end();
+ LiveRange::iterator E = LR->end();
if (ReadI != E && ReadI->end <= Seg.start) {
// First try to close the gap between WriteI and ReadI with spills.
if (ReadI != WriteI)
mergeSpills();
// Then advance ReadI.
if (ReadI == WriteI)
- ReadI = WriteI = LI->find(Seg.start);
+ ReadI = WriteI = LR->find(Seg.start);
else
while (ReadI != E && ReadI->end <= Seg.start)
*WriteI++ = *ReadI++;
@@ -777,7 +776,7 @@ void LiveRangeUpdater::add(LiveRange Seg) {
}
// Try coalescing Seg into WriteI[-1].
- if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) {
+ if (WriteI != LR->begin() && coalescable(WriteI[-1], Seg)) {
WriteI[-1].end = std::max(WriteI[-1].end, Seg.end);
return;
}
@@ -788,10 +787,10 @@ void LiveRangeUpdater::add(LiveRange Seg) {
return;
}
- // Finally, append to LI or Spills.
+ // Finally, append to LR or Spills.
if (WriteI == E) {
- LI->ranges.push_back(Seg);
- WriteI = ReadI = LI->ranges.end();
+ LR->segments.push_back(Seg);
+ WriteI = ReadI = LR->end();
} else
Spills.push_back(Seg);
}
@@ -802,10 +801,10 @@ void LiveRangeUpdater::mergeSpills() {
// Perform a backwards merge of Spills and [SpillI;WriteI).
size_t GapSize = ReadI - WriteI;
size_t NumMoved = std::min(Spills.size(), GapSize);
- LiveInterval::iterator Src = WriteI;
- LiveInterval::iterator Dst = Src + NumMoved;
- LiveInterval::iterator SpillSrc = Spills.end();
- LiveInterval::iterator B = LI->begin();
+ LiveRange::iterator Src = WriteI;
+ LiveRange::iterator Dst = Src + NumMoved;
+ LiveRange::iterator SpillSrc = Spills.end();
+ LiveRange::iterator B = LR->begin();
// This is the new WriteI position after merging spills.
WriteI = Dst;
@@ -827,12 +826,12 @@ void LiveRangeUpdater::flush() {
// Clear the dirty state.
LastStart = SlotIndex();
- assert(LI && "Cannot add to a null destination");
+ assert(LR && "Cannot add to a null destination");
// Nothing to merge?
if (Spills.empty()) {
- LI->ranges.erase(WriteI, ReadI);
- LI->verify();
+ LR->segments.erase(WriteI, ReadI);
+ LR->verify();
return;
}
@@ -840,17 +839,17 @@ void LiveRangeUpdater::flush() {
size_t GapSize = ReadI - WriteI;
if (GapSize < Spills.size()) {
// The gap is too small. Make some room.
- size_t WritePos = WriteI - LI->begin();
- LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange());
+ size_t WritePos = WriteI - LR->begin();
+ LR->segments.insert(ReadI, Spills.size() - GapSize, LiveRange::Segment());
// This also invalidated ReadI, but it is recomputed below.
- WriteI = LI->ranges.begin() + WritePos;
+ WriteI = LR->begin() + WritePos;
} else {
// Shrink the gap if necessary.
- LI->ranges.erase(WriteI + Spills.size(), ReadI);
+ LR->segments.erase(WriteI + Spills.size(), ReadI);
}
ReadI = WriteI + Spills.size();
mergeSpills();
- LI->verify();
+ LR->verify();
}
unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
@@ -918,7 +917,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
Idx = LIS.getSlotIndexes()->getIndexBefore(MI);
else
Idx = LIS.getInstructionIndex(MI);
- LiveRangeQuery LRQ(LI, Idx);
+ LiveQueryResult LRQ = LI.Query(Idx);
const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
// In the case of an <undef> use that isn't tied to any def, VNI will be
// NULL. If the use is tied to a def, VNI will be the defined value.
@@ -935,11 +934,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
if (unsigned eq = EqClass[I->valno->id]) {
assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
"New intervals should be empty");
- LIV[eq]->ranges.push_back(*I);
+ LIV[eq]->segments.push_back(*I);
} else
*J++ = *I;
}
- LI.ranges.erase(J, E);
+ LI.segments.erase(J, E);
// Transfer VNInfos to their new owners and renumber them.
unsigned j = 0, e = LI.getNumValNums();
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 3680943..e1c3217 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -95,15 +95,15 @@ void LiveIntervals::releaseMemory() {
RegMaskBits.clear();
RegMaskBlocks.clear();
- for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i)
- delete RegUnitIntervals[i];
- RegUnitIntervals.clear();
+ for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i)
+ delete RegUnitRanges[i];
+ RegUnitRanges.clear();
// Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
VNInfoAllocator.Reset();
}
-/// runOnMachineFunction - Register allocate the whole function
+/// runOnMachineFunction - calculates LiveIntervals
///
bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
@@ -139,15 +139,15 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
OS << "********** INTERVALS **********\n";
// Dump the regunits.
- for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i)
- if (LiveInterval *LI = RegUnitIntervals[i])
- OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n';
+ for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i)
+ if (LiveRange *LR = RegUnitRanges[i])
+ OS << PrintRegUnit(i, TRI) << ' ' << *LR << '\n';
// Dump the virtregs.
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (hasInterval(Reg))
- OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n';
+ OS << getInterval(Reg) << '\n';
}
OS << "RegMasks:";
@@ -170,16 +170,17 @@ void LiveIntervals::dumpInstrs() const {
#endif
LiveInterval* LiveIntervals::createInterval(unsigned reg) {
- float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F;
+ float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ?
+ llvm::huge_valf : 0.0F;
return new LiveInterval(reg, Weight);
}
/// computeVirtRegInterval - Compute the live interval of a virtual register,
/// based on defs and uses.
-void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) {
+void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
- assert(LI->empty() && "Should only compute empty intervals.");
+ assert(LI.empty() && "Should only compute empty intervals.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
LRCalc->createDeadDefs(LI);
LRCalc->extendToUses(LI);
@@ -190,9 +191,7 @@ void LiveIntervals::computeVirtRegs() {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
- LiveInterval *LI = createInterval(Reg);
- VirtRegIntervals[Reg] = LI;
- computeVirtRegInterval(LI);
+ createAndComputeVirtRegInterval(Reg);
}
}
@@ -229,12 +228,10 @@ void LiveIntervals::computeRegMasks() {
// interference.
//
-/// computeRegUnitInterval - Compute the live interval of a register unit, based
-/// on the uses and defs of aliasing registers. The interval should be empty,
+/// computeRegUnitInterval - Compute the live range of a register unit, based
+/// on the uses and defs of aliasing registers. The range should be empty,
/// or contain only dead phi-defs from ABI blocks.
-void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) {
- unsigned Unit = LI->reg;
-
+void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
assert(LRCalc && "LRCalc not initialized.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
@@ -247,18 +244,18 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) {
for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true);
Supers.isValid(); ++Supers) {
if (!MRI->reg_empty(*Supers))
- LRCalc->createDeadDefs(LI, *Supers);
+ LRCalc->createDeadDefs(LR, *Supers);
}
}
- // Now extend LI to reach all uses.
+ // Now extend LR to reach all uses.
// Ignore uses of reserved registers. We only track defs of those.
for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true);
Supers.isValid(); ++Supers) {
unsigned Reg = *Supers;
if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg))
- LRCalc->extendToUses(LI, Reg);
+ LRCalc->extendToUses(LR, Reg);
}
}
}
@@ -269,11 +266,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) {
/// without a corresponding def when entering the entry block or a landing pad.
///
void LiveIntervals::computeLiveInRegUnits() {
- RegUnitIntervals.resize(TRI->getNumRegUnits());
+ RegUnitRanges.resize(TRI->getNumRegUnits());
DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");
- // Keep track of the intervals allocated.
- SmallVector<LiveInterval*, 8> NewIntvs;
+ // Keep track of the live range sets allocated.
+ SmallVector<unsigned, 8> NewRanges;
// Check all basic blocks for live-ins.
for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
@@ -291,23 +288,25 @@ void LiveIntervals::computeLiveInRegUnits() {
LIE = MBB->livein_end(); LII != LIE; ++LII) {
for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) {
unsigned Unit = *Units;
- LiveInterval *Intv = RegUnitIntervals[Unit];
- if (!Intv) {
- Intv = RegUnitIntervals[Unit] = new LiveInterval(Unit, HUGE_VALF);
- NewIntvs.push_back(Intv);
+ LiveRange *LR = RegUnitRanges[Unit];
+ if (!LR) {
+ LR = RegUnitRanges[Unit] = new LiveRange();
+ NewRanges.push_back(Unit);
}
- VNInfo *VNI = Intv->createDeadDef(Begin, getVNInfoAllocator());
+ VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator());
(void)VNI;
DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id);
}
}
DEBUG(dbgs() << '\n');
}
- DEBUG(dbgs() << "Created " << NewIntvs.size() << " new intervals.\n");
+ DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n");
- // Compute the 'normal' part of the intervals.
- for (unsigned i = 0, e = NewIntvs.size(); i != e; ++i)
- computeRegUnitInterval(NewIntvs[i]);
+ // Compute the 'normal' part of the ranges.
+ for (unsigned i = 0, e = NewRanges.size(); i != e; ++i) {
+ unsigned Unit = NewRanges[i];
+ computeRegUnitRange(*RegUnitRanges[Unit], Unit);
+ }
}
@@ -331,7 +330,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
continue;
SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
- LiveRangeQuery LRQ(*li, Idx);
+ LiveQueryResult LRQ = li->Query(Idx);
VNInfo *VNI = LRQ.valueIn();
if (!VNI) {
// This shouldn't happen: readsVirtualRegister returns true, but there is
@@ -350,14 +349,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
WorkList.push_back(std::make_pair(Idx, VNI));
}
- // Create a new live interval with only minimal live segments per def.
- LiveInterval NewLI(li->reg, 0);
+ // Create new live ranges with only minimal live segments per def.
+ LiveRange NewLR;
for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
I != E; ++I) {
VNInfo *VNI = *I;
if (VNI->isUnused())
continue;
- NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI));
+ NewLR.addSegment(LiveRange::Segment(VNI->def, VNI->def.getDeadSlot(), VNI));
}
// Keep track of the PHIs that are in use.
@@ -372,7 +371,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
SlotIndex BlockStart = getMBBStartIdx(MBB);
// Extend the live range for VNI to be live at Idx.
- if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) {
+ if (VNInfo *ExtVNI = NewLR.extendInBlock(BlockStart, Idx)) {
(void)ExtVNI;
assert(ExtVNI == VNI && "Unexpected existing value number");
// Is this a PHIDef we haven't seen before?
@@ -393,7 +392,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// VNI is live-in to MBB.
DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
- NewLI.addRange(LiveRange(BlockStart, Idx, VNI));
+ NewLR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI));
// Make sure VNI is live-out from the predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
@@ -414,14 +413,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
VNInfo *VNI = *I;
if (VNI->isUnused())
continue;
- LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
- assert(LII != NewLI.end() && "Missing live range for PHI");
- if (LII->end != VNI->def.getDeadSlot())
+ LiveRange::iterator LRI = NewLR.FindSegmentContaining(VNI->def);
+ assert(LRI != NewLR.end() && "Missing segment for PHI");
+ if (LRI->end != VNI->def.getDeadSlot())
continue;
if (VNI->isPHIDef()) {
// This is a dead PHI. Remove it.
VNI->markUnused();
- NewLI.removeRange(*LII);
+ NewLR.removeSegment(LRI->start, LRI->end);
DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
CanSeparate = true;
} else {
@@ -436,23 +435,23 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
}
}
- // Move the trimmed ranges back.
- li->ranges.swap(NewLI.ranges);
+ // Move the trimmed segments back.
+ li->segments.swap(NewLR.segments);
DEBUG(dbgs() << "Shrunk: " << *li << '\n');
return CanSeparate;
}
-void LiveIntervals::extendToIndices(LiveInterval *LI,
+void LiveIntervals::extendToIndices(LiveRange &LR,
ArrayRef<SlotIndex> Indices) {
assert(LRCalc && "LRCalc not initialized.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
for (unsigned i = 0, e = Indices.size(); i != e; ++i)
- LRCalc->extend(LI, Indices[i]);
+ LRCalc->extend(LR, Indices[i]);
}
void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
SmallVectorImpl<SlotIndex> *EndPoints) {
- LiveRangeQuery LRQ(*LI, Kill);
+ LiveQueryResult LRQ = LI->Query(Kill);
VNInfo *VNI = LRQ.valueOut();
if (!VNI)
return;
@@ -463,13 +462,13 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
// If VNI isn't live out from KillMBB, the value is trivially pruned.
if (LRQ.endPoint() < MBBEnd) {
- LI->removeRange(Kill, LRQ.endPoint());
+ LI->removeSegment(Kill, LRQ.endPoint());
if (EndPoints) EndPoints->push_back(LRQ.endPoint());
return;
}
// VNI is live out of KillMBB.
- LI->removeRange(Kill, MBBEnd);
+ LI->removeSegment(Kill, MBBEnd);
if (EndPoints) EndPoints->push_back(MBBEnd);
// Find all blocks that are reachable from KillMBB without leaving VNI's live
@@ -487,23 +486,23 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
// Check if VNI is live in to MBB.
tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
- LiveRangeQuery LRQ(*LI, MBBStart);
+ LiveQueryResult LRQ = LI->Query(MBBStart);
if (LRQ.valueIn() != VNI) {
- // This block isn't part of the VNI live range. Prune the search.
+ // This block isn't part of the VNI segment. Prune the search.
I.skipChildren();
continue;
}
// Prune the search if VNI is killed in MBB.
if (LRQ.endPoint() < MBBEnd) {
- LI->removeRange(MBBStart, LRQ.endPoint());
+ LI->removeSegment(MBBStart, LRQ.endPoint());
if (EndPoints) EndPoints->push_back(LRQ.endPoint());
I.skipChildren();
continue;
}
// VNI is live through MBB.
- LI->removeRange(MBBStart, MBBEnd);
+ LI->removeSegment(MBBStart, MBBEnd);
if (EndPoints) EndPoints->push_back(MBBEnd);
++I;
}
@@ -516,7 +515,7 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// Keep track of regunit ranges.
- SmallVector<std::pair<LiveInterval*, LiveInterval::iterator>, 8> RU;
+ SmallVector<std::pair<LiveRange*, LiveRange::iterator>, 8> RU;
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
@@ -531,13 +530,14 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
RU.clear();
for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid();
++Units) {
- LiveInterval *RUInt = &getRegUnit(*Units);
- if (RUInt->empty())
+ LiveRange &RURanges = getRegUnit(*Units);
+ if (RURanges.empty())
continue;
- RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end)));
+ RU.push_back(std::make_pair(&RURanges, RURanges.find(LI->begin()->end)));
}
- // Every instruction that kills Reg corresponds to a live range end point.
+ // Every instruction that kills Reg corresponds to a segment range end
+ // point.
for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
++RI) {
// A block index indicates an MBB edge.
@@ -547,7 +547,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
if (!MI)
continue;
- // Check if any of the reguints are live beyond the end of RI. That could
+ // Check if any of the regunits are live beyond the end of RI. That could
// happen when a physreg is defined as a copy of a virtreg:
//
// %EAX = COPY %vreg5
@@ -557,12 +557,12 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// There should be no kill flag on FOO when %vreg5 is rewritten as %EAX.
bool CancelKill = false;
for (unsigned u = 0, e = RU.size(); u != e; ++u) {
- LiveInterval *RInt = RU[u].first;
- LiveInterval::iterator &I = RU[u].second;
- if (I == RInt->end())
+ LiveRange &RRanges = *RU[u].first;
+ LiveRange::iterator &I = RU[u].second;
+ if (I == RRanges.end())
continue;
- I = RInt->advanceTo(I, RI->end);
- if (I == RInt->end() || I->start >= RI->end)
+ I = RRanges.advanceTo(I, RI->end);
+ if (I == RRanges.end() || I->start >= RI->end)
continue;
// I is overlapping RI.
CancelKill = true;
@@ -625,18 +625,18 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, BlockFrequency freq) {
return (isDef + isUse) * (freq.getFrequency() * Scale);
}
-LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
- MachineInstr* startInst) {
- LiveInterval& Interval = getOrCreateInterval(reg);
+LiveRange::Segment
+LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr* startInst) {
+ LiveInterval& Interval = createEmptyInterval(reg);
VNInfo* VN = Interval.getNextValue(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getVNInfoAllocator());
- LiveRange LR(
+ LiveRange::Segment S(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getMBBEndIdx(startInst->getParent()), VN);
- Interval.addRange(LR);
+ Interval.addSegment(S);
- return LR;
+ return S;
}
@@ -711,7 +711,7 @@ private:
const TargetRegisterInfo& TRI;
SlotIndex OldIdx;
SlotIndex NewIdx;
- SmallPtrSet<LiveInterval*, 8> Updated;
+ SmallPtrSet<LiveRange*, 8> Updated;
bool UpdateFlags;
public:
@@ -725,7 +725,7 @@ public:
// physregs, even those that aren't needed for regalloc, in order to update
// kill flags. This is wasteful. Eventually, LiveVariables will strip all kill
// flags, and postRA passes will use a live register utility instead.
- LiveInterval *getRegUnitLI(unsigned Unit) {
+ LiveRange *getRegUnitLI(unsigned Unit) {
if (UpdateFlags)
return &LIS.getRegUnit(Unit);
return LIS.getCachedRegUnit(Unit);
@@ -750,15 +750,16 @@ public:
if (!Reg)
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- updateRange(LIS.getInterval(Reg));
+ LiveInterval &LI = LIS.getInterval(Reg);
+ updateRange(LI, Reg);
continue;
}
// For physregs, only update the regunits that actually have a
// precomputed live range.
for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
- if (LiveInterval *LI = getRegUnitLI(*Units))
- updateRange(*LI);
+ if (LiveRange *LR = getRegUnitLI(*Units))
+ updateRange(*LR, *Units);
}
if (hasRegMask)
updateRegMaskSlots();
@@ -767,26 +768,26 @@ public:
private:
/// Update a single live range, assuming an instruction has been moved from
/// OldIdx to NewIdx.
- void updateRange(LiveInterval &LI) {
- if (!Updated.insert(&LI))
+ void updateRange(LiveRange &LR, unsigned Reg) {
+ if (!Updated.insert(&LR))
return;
DEBUG({
dbgs() << " ";
- if (TargetRegisterInfo::isVirtualRegister(LI.reg))
- dbgs() << PrintReg(LI.reg);
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ dbgs() << PrintReg(Reg);
else
- dbgs() << PrintRegUnit(LI.reg, &TRI);
- dbgs() << ":\t" << LI << '\n';
+ dbgs() << PrintRegUnit(Reg, &TRI);
+ dbgs() << ":\t" << LR << '\n';
});
if (SlotIndex::isEarlierInstr(OldIdx, NewIdx))
- handleMoveDown(LI);
+ handleMoveDown(LR);
else
- handleMoveUp(LI);
- DEBUG(dbgs() << " -->\t" << LI << '\n');
- LI.verify();
+ handleMoveUp(LR, Reg);
+ DEBUG(dbgs() << " -->\t" << LR << '\n');
+ LR.verify();
}
- /// Update LI to reflect an instruction has been moved downwards from OldIdx
+ /// Update LR to reflect an instruction has been moved downwards from OldIdx
/// to NewIdx.
///
/// 1. Live def at OldIdx:
@@ -800,17 +801,17 @@ private:
/// Move def to NewIdx, possibly across another live value.
///
/// 4. Def at OldIdx AND at NewIdx:
- /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx.
+ /// Remove segment [OldIdx;NewIdx) and value defined at OldIdx.
/// (Happens when bundling multiple defs together).
///
/// 5. Value read at OldIdx, killed before NewIdx:
/// Extend kill to NewIdx.
///
- void handleMoveDown(LiveInterval &LI) {
+ void handleMoveDown(LiveRange &LR) {
// First look for a kill at OldIdx.
- LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
- LiveInterval::iterator E = LI.end();
- // Is LI even live at OldIdx?
+ LiveRange::iterator I = LR.find(OldIdx.getBaseIndex());
+ LiveRange::iterator E = LR.end();
+ // Is LR even live at OldIdx?
if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
return;
@@ -827,7 +828,7 @@ private:
for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO)
if (MO->isReg() && MO->isUse())
MO->setIsKill(false);
- // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by
+ // Adjust I->end to reach NewIdx. This may temporarily make LR invalid by
// overlapping ranges. Case 5 above.
I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
// If this was a kill, there may also be a def. Otherwise we're done.
@@ -856,24 +857,25 @@ private:
assert((I->end == OldIdx.getDeadSlot() ||
SlotIndex::isSameInstr(I->end, NewIdx)) &&
"Cannot move def below kill");
- LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot());
+ LiveRange::iterator NewI = LR.advanceTo(I, NewIdx.getRegSlot());
if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) {
// There is an existing def at NewIdx, case 4 above. The def at OldIdx is
// coalesced into that value.
assert(NewI->valno != DefVNI && "Multiple defs of value?");
- LI.removeValNo(DefVNI);
+ LR.removeValNo(DefVNI);
return;
}
// There was no existing def at NewIdx. Turn *I into a dead def at NewIdx.
- // If the def at OldIdx was dead, we allow it to be moved across other LI
+ // If the def at OldIdx was dead, we allow it to be moved across other LR
// values. The new range should be placed immediately before NewI, move any
// intermediate ranges up.
assert(NewI != I && "Inconsistent iterators");
std::copy(llvm::next(I), NewI, I);
- *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
+ *llvm::prior(NewI)
+ = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
}
- /// Update LI to reflect an instruction has been moved upwards from OldIdx
+ /// Update LR to reflect an instruction has been moved upwards from OldIdx
/// to NewIdx.
///
/// 1. Live def at OldIdx:
@@ -893,11 +895,11 @@ private:
/// Hoist kill to NewIdx, then scan for last kill between NewIdx and
/// OldIdx.
///
- void handleMoveUp(LiveInterval &LI) {
+ void handleMoveUp(LiveRange &LR, unsigned Reg) {
// First look for a kill at OldIdx.
- LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
- LiveInterval::iterator E = LI.end();
- // Is LI even live at OldIdx?
+ LiveRange::iterator I = LR.find(OldIdx.getBaseIndex());
+ LiveRange::iterator E = LR.end();
+ // Is LR even live at OldIdx?
if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
return;
@@ -914,7 +916,7 @@ private:
if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) {
// No def, search for the new kill.
// This can never be an early clobber kill since there is no def.
- llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot();
+ llvm::prior(I)->end = findLastUseBefore(Reg).getRegSlot();
return;
}
}
@@ -926,18 +928,18 @@ private:
DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
// Check for an existing def at NewIdx.
- LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot());
+ LiveRange::iterator NewI = LR.find(NewIdx.getRegSlot());
if (SlotIndex::isSameInstr(NewI->start, NewIdx)) {
assert(NewI->valno != DefVNI && "Same value defined more than once?");
// There is an existing def at NewIdx.
if (I->end.isDead()) {
// Case 3: Remove the dead def at OldIdx.
- LI.removeValNo(DefVNI);
+ LR.removeValNo(DefVNI);
return;
}
// Case 4: Replace def at NewIdx with live def at OldIdx.
I->start = DefVNI->def;
- LI.removeValNo(NewI->valno);
+ LR.removeValNo(NewI->valno);
return;
}
@@ -948,10 +950,10 @@ private:
return;
}
- // DefVNI is a dead def. It may have been moved across other values in LI,
+ // DefVNI is a dead def. It may have been moved across other values in LR,
// so move I up to NewI. Slide [NewI;I) down one position.
std::copy_backward(NewI, I, llvm::next(I));
- *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
+ *NewI = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
}
void updateRegMaskSlots() {
@@ -1074,8 +1076,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
if (MOI->isReg() &&
TargetRegisterInfo::isVirtualRegister(MOI->getReg()) &&
!hasInterval(MOI->getReg())) {
- LiveInterval &LI = getOrCreateInterval(MOI->getReg());
- computeVirtRegInterval(&LI);
+ createAndComputeVirtRegInterval(MOI->getReg());
}
}
}
@@ -1122,9 +1123,9 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
if (LII != LI.begin())
prevStart = llvm::prior(LII)->start;
- // FIXME: This could be more efficient if there was a removeRange
- // method that returned an iterator.
- LI.removeRange(*LII, true);
+ // FIXME: This could be more efficient if there was a
+ // removeSegment method that returned an iterator.
+ LI.removeSegment(*LII, true);
if (prevStart.isValid())
LII = LI.find(prevStart);
else
@@ -1143,13 +1144,14 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
if (!lastUseIdx.isValid()) {
VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(),
VNInfoAllocator);
- LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI);
- LII = LI.addRange(LR);
+ LiveRange::Segment S(instrIdx.getRegSlot(),
+ instrIdx.getDeadSlot(), VNI);
+ LII = LI.addSegment(S);
} else if (LII->start != instrIdx.getRegSlot()) {
VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(),
VNInfoAllocator);
- LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI);
- LII = LI.addRange(LR);
+ LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI);
+ LII = LI.addSegment(S);
}
if (MO.getSubReg() && !MO.isUndef())
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index dede490..ae086bc 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -36,11 +36,11 @@ void LiveRangeCalc::reset(const MachineFunction *mf,
}
-void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) {
+void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
assert(MRI && Indexes && "call reset() first");
// Visit all def operands. If the same instruction has multiple defs of Reg,
- // LI->createDeadDef() will deduplicate.
+ // LR.createDeadDef() will deduplicate.
for (MachineRegisterInfo::def_iterator
I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) {
const MachineInstr *MI = &*I;
@@ -54,13 +54,13 @@ void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) {
Idx = Indexes->getInstructionIndex(MI)
.getRegSlot(I.getOperand().isEarlyClobber());
- // Create the def in LI. This may find an existing def.
- LI->createDeadDef(Idx, *Alloc);
+ // Create the def in LR. This may find an existing def.
+ LR.createDeadDef(Idx, *Alloc);
}
}
-void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) {
+void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) {
assert(MRI && Indexes && "call reset() first");
// Visit all operands that read Reg. This may include partial defs.
@@ -99,7 +99,7 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) {
Idx = Idx.getRegSlot(true);
}
}
- extend(LI, Idx, Reg);
+ extend(LR, Idx, Reg);
}
}
@@ -125,17 +125,14 @@ void LiveRangeCalc::updateLiveIns() {
assert(Seen.test(MBB->getNumber()));
LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0);
}
- Updater.setDest(I->LI);
+ Updater.setDest(&I->LR);
Updater.add(Start, End, I->Value);
}
LiveIn.clear();
}
-void LiveRangeCalc::extend(LiveInterval *LI,
- SlotIndex Kill,
- unsigned PhysReg) {
- assert(LI && "Missing live range");
+void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg) {
assert(Kill.isValid() && "Invalid SlotIndex");
assert(Indexes && "Missing SlotIndexes");
assert(DomTree && "Missing dominator tree");
@@ -144,14 +141,14 @@ void LiveRangeCalc::extend(LiveInterval *LI,
assert(KillMBB && "No MBB at Kill");
// Is there a def in the same MBB we can extend?
- if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
+ if (LR.extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
return;
// Find the single reaching def, or determine if Kill is jointly dominated by
// multiple values, and we may need to create even more phi-defs to preserve
// VNInfo SSA form. Perform a search for all predecessor blocks where we
// know the dominating VNInfo.
- if (findReachingDefs(LI, KillMBB, Kill, PhysReg))
+ if (findReachingDefs(LR, *KillMBB, Kill, PhysReg))
return;
// When there were multiple different values, we may need new PHIs.
@@ -170,13 +167,11 @@ void LiveRangeCalc::calculateValues() {
}
-bool LiveRangeCalc::findReachingDefs(LiveInterval *LI,
- MachineBasicBlock *KillMBB,
- SlotIndex Kill,
- unsigned PhysReg) {
- unsigned KillMBBNum = KillMBB->getNumber();
+bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
+ SlotIndex Kill, unsigned PhysReg) {
+ unsigned KillMBBNum = KillMBB.getNumber();
- // Block numbers where LI should be live-in.
+ // Block numbers where LR should be live-in.
SmallVector<unsigned, 16> WorkList(1, KillMBBNum);
// Remember if we have seen more than one value.
@@ -203,7 +198,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI,
#endif
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
+ PE = MBB->pred_end(); PI != PE; ++PI) {
MachineBasicBlock *Pred = *PI;
// Is this a known live-out block?
@@ -221,7 +216,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI,
// First time we see Pred. Try to determine the live-out value, but set
// it as null if Pred is live-through with an unknown value.
- VNInfo *VNI = LI->extendInBlock(Start, End);
+ VNInfo *VNI = LR.extendInBlock(Start, End);
setLiveOutValue(Pred, VNI);
if (VNI) {
if (TheVNI && TheVNI != VNI)
@@ -231,7 +226,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI,
}
// No, we need a live-in value for Pred as well
- if (Pred != KillMBB)
+ if (Pred != &KillMBB)
WorkList.push_back(Pred->getNumber());
else
// Loopback to KillMBB, so value is really live through.
@@ -248,9 +243,9 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI,
// If a unique reaching def was found, blit in the live ranges immediately.
if (UniqueVNI) {
- LiveRangeUpdater Updater(LI);
- for (SmallVectorImpl<unsigned>::const_iterator
- I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
+ LiveRangeUpdater Updater(&LR);
+ for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(),
+ E = WorkList.end(); I != E; ++I) {
SlotIndex Start, End;
tie(Start, End) = Indexes->getMBBRange(*I);
// Trim the live range in KillMBB.
@@ -270,8 +265,8 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI,
for (SmallVectorImpl<unsigned>::const_iterator
I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
MachineBasicBlock *MBB = MF->getBlockNumbered(*I);
- addLiveInBlock(LI, DomTree->getNode(MBB));
- if (MBB == KillMBB)
+ addLiveInBlock(LR, DomTree->getNode(MBB));
+ if (MBB == &KillMBB)
LiveIn.back().Kill = Kill;
}
@@ -348,16 +343,17 @@ void LiveRangeCalc::updateSSA() {
assert(Alloc && "Need VNInfo allocator to create PHI-defs");
SlotIndex Start, End;
tie(Start, End) = Indexes->getMBBRange(MBB);
- VNInfo *VNI = I->LI->getNextValue(Start, *Alloc);
+ LiveRange &LR = I->LR;
+ VNInfo *VNI = LR.getNextValue(Start, *Alloc);
I->Value = VNI;
// This block is done, we know the final value.
I->DomNode = 0;
// Add liveness since updateLiveIns now skips this node.
if (I->Kill.isValid())
- I->LI->addRange(LiveRange(Start, I->Kill, VNI));
+ LR.addSegment(LiveInterval::Segment(Start, I->Kill, VNI));
else {
- I->LI->addRange(LiveRange(Start, End, VNI));
+ LR.addSegment(LiveInterval::Segment(Start, End, VNI));
LOP = LiveOutPair(VNI, Node);
}
} else if (IDomValue.first) {
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 57cab7b..a3a3fbb 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -75,9 +75,9 @@ class LiveRangeCalc {
/// LiveInBlock - Information about a basic block where a live range is known
/// to be live-in, but the value has not yet been determined.
struct LiveInBlock {
- // LI - The live range that is live-in to this block. The algorithms can
+ // The live range set that is live-in to this block. The algorithms can
// handle multiple non-overlapping live ranges simultaneously.
- LiveInterval *LI;
+ LiveRange &LR;
// DomNode - Dominator tree node for the block.
// Cleared when the final value has been determined and LI has been updated.
@@ -91,8 +91,8 @@ class LiveRangeCalc {
// Live-in value filled in by updateSSA once it is known.
VNInfo *Value;
- LiveInBlock(LiveInterval *li, MachineDomTreeNode *node, SlotIndex kill)
- : LI(li), DomNode(node), Kill(kill), Value(0) {}
+ LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill)
+ : LR(LR), DomNode(node), Kill(kill), Value(0) {}
};
/// LiveIn - Work list of blocks where the live-in value has yet to be
@@ -111,10 +111,8 @@ class LiveRangeCalc {
/// are added to the LiveIn array, and the function returns false.
///
/// PhysReg, when set, is used to verify live-in lists on basic blocks.
- bool findReachingDefs(LiveInterval *LI,
- MachineBasicBlock *KillMBB,
- SlotIndex Kill,
- unsigned PhysReg);
+ bool findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
+ SlotIndex Kill, unsigned PhysReg);
/// updateSSA - Compute the values that will be live in to all requested
/// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
@@ -146,10 +144,6 @@ public:
MachineDominatorTree*,
VNInfo::Allocator*);
- /// calculate - Calculate the live range of a virtual register from its defs
- /// and uses. LI must be empty with no values.
- void calculate(LiveInterval *LI);
-
//===--------------------------------------------------------------------===//
// Mid-level interface.
//===--------------------------------------------------------------------===//
@@ -165,27 +159,27 @@ public:
/// single existing value, Alloc may be null.
///
/// PhysReg, when set, is used to verify live-in lists on basic blocks.
- void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0);
+ void extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg = 0);
/// createDeadDefs - Create a dead def in LI for every def operand of Reg.
/// Each instruction defining Reg gets a new VNInfo with a corresponding
/// minimal live range.
- void createDeadDefs(LiveInterval *LI, unsigned Reg);
+ void createDeadDefs(LiveRange &LR, unsigned Reg);
/// createDeadDefs - Create a dead def in LI for every def of LI->reg.
- void createDeadDefs(LiveInterval *LI) {
- createDeadDefs(LI, LI->reg);
+ void createDeadDefs(LiveInterval &LI) {
+ createDeadDefs(LI, LI.reg);
}
/// extendToUses - Extend the live range of LI to reach all uses of Reg.
///
/// All uses must be jointly dominated by existing liveness. PHI-defs are
/// inserted as needed to preserve SSA form.
- void extendToUses(LiveInterval *LI, unsigned Reg);
+ void extendToUses(LiveRange &LR, unsigned Reg);
/// extendToUses - Extend the live range of LI to reach all uses of LI->reg.
- void extendToUses(LiveInterval *LI) {
- extendToUses(LI, LI->reg);
+ void extendToUses(LiveInterval &LI) {
+ extendToUses(LI, LI.reg);
}
//===--------------------------------------------------------------------===//
@@ -216,15 +210,15 @@ public:
/// function can only be called once per basic block. Once the live-in value
/// has been determined, calculateValues() will add liveness to LI.
///
- /// @param LI The live range that is live-in to the block.
+ /// @param LR The live range that is live-in to the block.
/// @param DomNode The domtree node for the block.
/// @param Kill Index in block where LI is killed. If the value is
/// live-through, set Kill = SLotIndex() and also call
/// setLiveOutValue(MBB, 0).
- void addLiveInBlock(LiveInterval *LI,
+ void addLiveInBlock(LiveRange &LR,
MachineDomTreeNode *DomNode,
SlotIndex Kill = SlotIndex()) {
- LiveIn.push_back(LiveInBlock(LI, DomNode, Kill));
+ LiveIn.push_back(LiveInBlock(LR, DomNode, Kill));
}
/// calculateValues - Calculate the value that will be live-in to each block
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 792ef54..cb70c43 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -30,17 +30,23 @@ STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
void LiveRangeEdit::Delegate::anchor() { }
-LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) {
+LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) {
unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
if (VRM) {
- VRM->grow();
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
- LiveInterval &LI = LIS.getOrCreateInterval(VReg);
- NewRegs.push_back(&LI);
+ LiveInterval &LI = LIS.createEmptyInterval(VReg);
return LI;
}
+unsigned LiveRangeEdit::createFrom(unsigned OldReg) {
+ unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ if (VRM) {
+ VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
+ }
+ return VReg;
+}
+
bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
const MachineInstr *DefMI,
AliasAnalysis *aa) {
@@ -256,9 +262,9 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
else if (MOI->isDef()) {
for (MCRegUnitIterator Units(Reg, MRI.getTargetRegisterInfo());
Units.isValid(); ++Units) {
- if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) {
- if (VNInfo *VNI = LI->getVNInfoAt(Idx))
- LI->removeValNo(VNI);
+ if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) {
+ if (VNInfo *VNI = LR->getVNInfoAt(Idx))
+ LR->removeValNo(VNI);
}
}
}
@@ -272,7 +278,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// Always shrink COPY uses that probably come from live range splitting.
if (MI->readsVirtualRegister(Reg) &&
(MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) ||
- LI.killedAt(Idx)))
+ LI.Query(Idx).isKill()))
ToShrink.insert(&LI);
// Remove defined value.
@@ -360,7 +366,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
if (BeingSpilled) continue;
// LI may have been separated, create new intervals.
- LI->RenumberValues(LIS);
+ LI->RenumberValues();
ConnectedVNInfoEqClasses ConEQ(LIS);
unsigned NumComp = ConEQ.Classify(LI);
if (NumComp <= 1)
@@ -370,7 +376,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
SmallVector<LiveInterval*, 8> Dups(1, LI);
for (unsigned i = 1; i != NumComp; ++i) {
- Dups.push_back(&createFrom(LI->reg));
+ Dups.push_back(&createEmptyIntervalFrom(LI->reg));
// If LI is an original interval that hasn't been split yet, make the new
// intervals their own originals instead of referring to LI. The original
// interval must contain all the split products, and LI doesn't.
@@ -387,16 +393,27 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
}
}
+// Keep track of new virtual registers created via
+// MachineRegisterInfo::createVirtualRegister.
+void
+LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg)
+{
+ if (VRM)
+ VRM->grow();
+
+ NewRegs.push_back(VReg);
+}
+
void
LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
const MachineLoopInfo &Loops,
const MachineBlockFrequencyInfo &MBFI) {
VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI);
- for (iterator I = begin(), E = end(); I != E; ++I) {
- LiveInterval &LI = **I;
+ for (unsigned I = 0, Size = size(); I < Size; ++I) {
+ LiveInterval &LI = LIS.getInterval(get(I));
if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
<< MRI.getRegClass(LI.reg)->getName() << '\n');
- VRAI.CalculateWeightAndHint(LI);
+ VRAI.calculateSpillWeightAndHint(LI);
}
}
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 0ef069f..1d801ac 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -119,9 +119,11 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
if (VirtReg.empty())
return false;
CoalescerPair CP(VirtReg.reg, PhysReg, *TRI);
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes()))
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ const LiveRange &UnitRange = LIS->getRegUnit(*Units);
+ if (VirtReg.overlaps(UnitRange, CP, *LIS->getSlotIndexes()))
return true;
+ }
return false;
}
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
new file mode 100644
index 0000000..6221ca2
--- /dev/null
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -0,0 +1,111 @@
+//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRegUnits utility for tracking liveness of
+// physical register units across machine instructions in forward or backward
+// order.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+using namespace llvm;
+
+/// Return true if the given MachineOperand clobbers the given register unit.
+/// A register unit is only clobbered if all its super-registers are clobbered.
+static bool operClobbersUnit(const MachineOperand *MO, unsigned Unit,
+ const MCRegisterInfo *MCRI) {
+ for (MCRegUnitRootIterator RI(Unit, MCRI); RI.isValid(); ++RI) {
+ for (MCSuperRegIterator SI(*RI, MCRI, true); SI.isValid(); ++SI) {
+ if (!MO->clobbersPhysReg(*SI))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// We assume the high bits of a physical super register are not preserved
+/// unless the instruction has an implicit-use operand reading the
+/// super-register or a register unit for the upper bits is available.
+void LiveRegUnits::removeRegsInMask(const MachineOperand &Op,
+ const MCRegisterInfo &MCRI) {
+ SparseSet<unsigned>::iterator LUI = LiveUnits.begin();
+ while (LUI != LiveUnits.end()) {
+ if (operClobbersUnit(&Op, *LUI, &MCRI))
+ LUI = LiveUnits.erase(LUI);
+ else
+ ++LUI;
+ }
+}
+
+void LiveRegUnits::stepBackward(const MachineInstr &MI,
+ const MCRegisterInfo &MCRI) {
+ // Remove defined registers and regmask kills from the set.
+ for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
+ if (O->isReg()) {
+ if (!O->isDef())
+ continue;
+ unsigned Reg = O->getReg();
+ if (Reg == 0)
+ continue;
+ removeReg(Reg, MCRI);
+ } else if (O->isRegMask()) {
+ removeRegsInMask(*O, MCRI);
+ }
+ }
+ // Add uses to the set.
+ for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
+ if (!O->isReg() || !O->readsReg() || O->isUndef())
+ continue;
+ unsigned Reg = O->getReg();
+ if (Reg == 0)
+ continue;
+ addReg(Reg, MCRI);
+ }
+}
+
+/// Uses with kill flag get removed from the set, defs added. If possible
+/// use StepBackward() instead of this function because some kill flags may
+/// be missing.
+void LiveRegUnits::stepForward(const MachineInstr &MI,
+ const MCRegisterInfo &MCRI) {
+ SmallVector<unsigned, 4> Defs;
+ // Remove killed registers from the set.
+ for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
+ if (O->isReg()) {
+ unsigned Reg = O->getReg();
+ if (Reg == 0)
+ continue;
+ if (O->isDef()) {
+ if (!O->isDead())
+ Defs.push_back(Reg);
+ } else {
+ if (!O->isKill())
+ continue;
+ assert(O->isUse());
+ removeReg(Reg, MCRI);
+ }
+ } else if (O->isRegMask()) {
+ removeRegsInMask(*O, MCRI);
+ }
+ }
+ // Add defs to the set.
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ addReg(Defs[i], MCRI);
+ }
+}
+
+/// Adds all registers in the live-in list of block @p BB.
+void LiveRegUnits::addLiveIns(const MachineBasicBlock *MBB,
+ const MCRegisterInfo &MCRI) {
+ for (MachineBasicBlock::livein_iterator L = MBB->livein_begin(),
+ LE = MBB->livein_end(); L != LE; ++L) {
+ addReg(*L, MCRI);
+ }
+}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 5633271..ca71e3b 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -861,7 +861,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
LiveInterval &LI = LIS->getInterval(Reg);
VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
assert(VNI && "PHI sources should be live out of their predecessors.");
- LI.addRange(LiveRange(StartIndex, EndIndex, VNI));
+ LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
}
}
}
@@ -880,9 +880,9 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (isLiveOut && isLastMBB) {
VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
assert(VNI && "LiveInterval should have VNInfo where it is live.");
- LI.addRange(LiveRange(StartIndex, EndIndex, VNI));
+ LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
} else if (!isLiveOut && !isLastMBB) {
- LI.removeRange(StartIndex, EndIndex);
+ LI.removeSegment(StartIndex, EndIndex);
}
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 06bb80a..295b450 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -647,12 +647,15 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
}
}
+#ifndef NDEBUG
+ bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata;
// OpNo now points as the desired insertion point. Unless this is a variadic
// instruction, only implicit regs are allowed beyond MCID->getNumOperands().
// RegMask operands go between the explicit and implicit operands.
assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
- OpNo < MCID->getNumOperands()) &&
+ OpNo < MCID->getNumOperands() || isMetaDataOp) &&
"Trying to add an operand to a machine instr that is already done!");
+#endif
MachineRegisterInfo *MRI = getRegInfo();
@@ -1702,31 +1705,31 @@ void MachineInstr::clearRegisterKills(unsigned Reg,
}
}
-bool MachineInstr::addRegisterDead(unsigned IncomingReg,
+bool MachineInstr::addRegisterDead(unsigned Reg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
- bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(Reg);
bool hasAliases = isPhysReg &&
- MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
+ MCRegAliasIterator(Reg, RegInfo, false).isValid();
bool Found = false;
SmallVector<unsigned,4> DeadOps;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
MachineOperand &MO = getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
- if (!Reg)
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
continue;
- if (Reg == IncomingReg) {
+ if (MOReg == Reg) {
MO.setIsDead();
Found = true;
} else if (hasAliases && MO.isDead() &&
- TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ TargetRegisterInfo::isPhysicalRegister(MOReg)) {
// There exists a super-register that's marked dead.
- if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ if (RegInfo->isSuperRegister(Reg, MOReg))
return true;
- if (RegInfo->isSubRegister(IncomingReg, Reg))
+ if (RegInfo->isSubRegister(Reg, MOReg))
DeadOps.push_back(i);
}
}
@@ -1746,7 +1749,7 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
if (Found || !AddIfNotFound)
return Found;
- addOperand(MachineOperand::CreateReg(IncomingReg,
+ addOperand(MachineOperand::CreateReg(Reg,
true /*IsDef*/,
true /*IsImp*/,
false /*IsKill*/,
@@ -1754,21 +1757,21 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
return true;
}
-void MachineInstr::addRegisterDefined(unsigned IncomingReg,
+void MachineInstr::addRegisterDefined(unsigned Reg,
const TargetRegisterInfo *RegInfo) {
- if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) {
- MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ MachineOperand *MO = findRegisterDefOperand(Reg, false, RegInfo);
if (MO)
return;
} else {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
- if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() &&
+ if (MO.isReg() && MO.getReg() == Reg && MO.isDef() &&
MO.getSubReg() == 0)
return;
}
}
- addOperand(MachineOperand::CreateReg(IncomingReg,
+ addOperand(MachineOperand::CreateReg(Reg,
true /*IsDef*/,
true /*IsImp*/));
}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 6ad4e39..104eacd 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -468,12 +468,12 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
if (PhysRegDefs.test(*AS))
PhysRegClobbers.set(*AS);
- if (PhysRegClobbers.test(*AS))
- // MI defined register is seen defined by another instruction in
- // the loop, it cannot be a LICM candidate.
- RuledOut = true;
PhysRegDefs.set(*AS);
}
+ if (PhysRegClobbers.test(Reg))
+ // MI defined register is seen defined by another instruction in
+ // the loop, it cannot be a LICM candidate.
+ RuledOut = true;
}
// Only consider reloads for now and remats which do not have register
@@ -502,7 +502,7 @@ void MachineLICM::HoistRegionPostRA() {
// Walk the entire region, count number of defs for each register, and
// collect potential LICM candidates.
- const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+ const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
MachineBasicBlock *BB = Blocks[i];
@@ -584,7 +584,7 @@ void MachineLICM::HoistRegionPostRA() {
/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
/// loop, and make sure it is not killed by any instructions in the loop.
void MachineLICM::AddToLiveIns(unsigned Reg) {
- const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+ const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
MachineBasicBlock *BB = Blocks[i];
if (!BB->isLiveIn(Reg))
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 7f2c0ca..f8b8796 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -19,8 +19,11 @@
using namespace llvm;
+// Pin the vtable to this file.
+void MachineRegisterInfo::Delegate::anchor() {}
+
MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM)
- : TM(TM), IsSSA(true), TracksLiveness(true) {
+ : TM(TM), TheDelegate(0), IsSSA(true), TracksLiveness(true) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
@@ -108,6 +111,8 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
VRegInfo.grow(Reg);
VRegInfo[Reg].first = RegClass;
RegAllocHints.grow(Reg);
+ if (TheDelegate)
+ TheDelegate->MRI_NoteNewVirtualRegister(Reg);
return Reg;
}
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index a6c5a9f..e71c4df 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -53,6 +53,12 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
static bool ViewMISchedDAGs = false;
#endif // NDEBUG
+static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
+ cl::desc("Enable register pressure scheduling."), cl::init(true));
+
+static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
+ cl::desc("Enable cyclic critical path analysis."), cl::init(true));
+
static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
cl::desc("Enable load clustering."), cl::init(true));
@@ -66,6 +72,10 @@ static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
// DAG subtrees must have at least this many nodes.
static const unsigned MinSubtreeSize = 8;
+// Pin the vtables to this file.
+void MachineSchedStrategy::anchor() {}
+void ScheduleDAGMutation::anchor() {}
+
//===----------------------------------------------------------------------===//
// Machine Instruction Scheduling Pass and Registry
//===----------------------------------------------------------------------===//
@@ -95,6 +105,9 @@ public:
virtual void print(raw_ostream &O, const Module* = 0) const;
static char ID; // Class identification, replacement for typeinfo
+
+protected:
+ ScheduleDAGInstrs *createMachineScheduler();
};
} // namespace
@@ -149,12 +162,13 @@ DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
/// Forward declare the standard machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
+static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C);
/// Decrement this iterator until reaching the top or a non-debug instr.
-static MachineBasicBlock::iterator
-priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
+static MachineBasicBlock::const_iterator
+priorNonDebug(MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator Beg) {
assert(I != Beg && "reached the top of the region, cannot decrement");
while (--I != Beg) {
if (!I->isDebugValue())
@@ -163,10 +177,19 @@ priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
return I;
}
+/// Non-const version.
+static MachineBasicBlock::iterator
+priorNonDebug(MachineBasicBlock::iterator I,
+ MachineBasicBlock::const_iterator Beg) {
+ return const_cast<MachineInstr*>(
+ &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg));
+}
+
/// If this iterator is a debug value, increment until reaching the End or a
/// non-debug instruction.
-static MachineBasicBlock::iterator
-nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
+static MachineBasicBlock::const_iterator
+nextIfDebug(MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator End) {
for(; I != End; ++I) {
if (!I->isDebugValue())
break;
@@ -174,6 +197,34 @@ nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
return I;
}
+/// Non-const version.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I,
+ MachineBasicBlock::const_iterator End) {
+ // Cast the return value to nonconst MachineInstr, then cast to an
+ // instr_iterator, which does not check for null, finally return a
+ // bundle_iterator.
+ return MachineBasicBlock::instr_iterator(
+ const_cast<MachineInstr*>(
+ &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
+}
+
+/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
+ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
+ // Select the scheduler, or set the default.
+ MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
+ if (Ctor != useDefaultMachineSched)
+ return Ctor(this);
+
+ // Get the default scheduler set by the target for this function.
+ ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
+ if (Scheduler)
+ return Scheduler;
+
+ // Default to GenericScheduler.
+ return createGenericSched(this);
+}
+
/// Top-level MachineScheduler pass driver.
///
/// Visit blocks in function order. Divide each block into scheduling regions
@@ -209,18 +260,9 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
}
RegClassInfo->runOnMachineFunction(*MF);
- // Select the scheduler, or set the default.
- MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
- if (Ctor == useDefaultMachineSched) {
- // Get the default scheduler set by the target.
- Ctor = MachineSchedRegistry::getDefault();
- if (!Ctor) {
- Ctor = createConvergingSched;
- MachineSchedRegistry::setDefault(Ctor);
- }
- }
- // Instantiate the selected scheduler.
- OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
+ // Instantiate the selected scheduler for this target, function, and
+ // optimization level.
+ OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
// Visit all machine basic blocks.
//
@@ -255,14 +297,15 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
// The next region starts above the previous region. Look backward in the
// instruction stream until we find the nearest boundary.
+ unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
- for(;I != MBB->begin(); --I, --RemainingInstrs) {
+ for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) {
if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
break;
}
// Notify the scheduler of the region, even if we may skip scheduling
// it. Perhaps it still needs to be bundled.
- Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
+ Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
// Skip empty scheduling regions (0 or 1 schedulable instructions).
if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
@@ -277,7 +320,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
<< "\n From: " << *I << " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
else dbgs() << "End";
- dbgs() << " Remaining: " << RemainingInstrs << "\n");
+ dbgs() << " RegionInstrs: " << NumRegionInstrs
+ << " Remaining: " << RemainingInstrs << "\n");
// Schedule a region: possibly reorder instructions.
// This invalidates 'RegionEnd' and 'I'.
@@ -446,13 +490,19 @@ bool ScheduleDAGMI::checkSchedLimit() {
void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
- unsigned endcount)
+ unsigned regioninstrs)
{
- ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
// For convenience remember the end of the liveness region.
LiveRegionEnd =
(RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
+
+ SUPressureDiffs.clear();
+
+ SchedImpl->initPolicy(begin, end, regioninstrs);
+
+ ShouldTrackPressure = SchedImpl->shouldTrackPressure();
}
// Setup the register pressure trackers for the top scheduled top and bottom
@@ -483,9 +533,16 @@ void ScheduleDAGMI::initRegPressure() {
dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
};
+ // For each live out vreg reduce the pressure change associated with other
+ // uses of the same vreg below the live-out reaching def.
+ updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
+
// Account for liveness generated by the region boundary.
- if (LiveRegionEnd != RegionEnd)
- BotRPTracker.recede();
+ if (LiveRegionEnd != RegionEnd) {
+ SmallVector<unsigned, 8> LiveUses;
+ BotRPTracker.recede(&LiveUses);
+ updatePressureDiffs(LiveUses);
+ }
assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
@@ -500,34 +557,83 @@ void ScheduleDAGMI::initRegPressure() {
DEBUG(dbgs() << TRI->getRegPressureSetName(i)
<< " Limit " << Limit
<< " Actual " << RegionPressure[i] << "\n");
- RegionCriticalPSets.push_back(PressureElement(i, 0));
+ RegionCriticalPSets.push_back(PressureChange(i));
}
}
DEBUG(dbgs() << "Excess PSets: ";
for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
dbgs() << TRI->getRegPressureSetName(
- RegionCriticalPSets[i].PSetID) << " ";
+ RegionCriticalPSets[i].getPSet()) << " ";
dbgs() << "\n");
}
-// FIXME: When the pressure tracker deals in pressure differences then we won't
-// iterate over all RegionCriticalPSets[i].
void ScheduleDAGMI::
-updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
- for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
- unsigned ID = RegionCriticalPSets[i].PSetID;
- int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
- if ((int)NewMaxPressure[ID] > MaxUnits)
- MaxUnits = NewMaxPressure[ID];
+updateScheduledPressure(const SUnit *SU,
+ const std::vector<unsigned> &NewMaxPressure) {
+ const PressureDiff &PDiff = getPressureDiff(SU);
+ unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
+ for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
+ I != E; ++I) {
+ if (!I->isValid())
+ break;
+ unsigned ID = I->getPSet();
+ while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
+ ++CritIdx;
+ if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
+ if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
+ && NewMaxPressure[ID] <= INT16_MAX)
+ RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
+ }
+ unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
+ if (NewMaxPressure[ID] >= Limit - 2) {
+ DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
+ << NewMaxPressure[ID] << " > " << Limit << "(+ "
+ << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
+ }
}
- DEBUG(
- for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) {
- unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
- if (NewMaxPressure[i] > Limit ) {
- dbgs() << " " << TRI->getRegPressureSetName(i) << ": "
- << NewMaxPressure[i] << " > " << Limit << "\n";
+}
+
+/// Update the PressureDiff array for liveness after scheduling this
+/// instruction.
+void ScheduleDAGMI::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
+ for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) {
+ /// FIXME: Currently assuming single-use physregs.
+ unsigned Reg = LiveUses[LUIdx];
+ DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
+ if (!TRI->isVirtualRegister(Reg))
+ continue;
+
+ // This may be called before CurrentBottom has been initialized. However,
+ // BotRPTracker must have a valid position. We want the value live into the
+ // instruction or live out of the block, so ask for the previous
+ // instruction's live-out.
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ VNInfo *VNI;
+ MachineBasicBlock::const_iterator I =
+ nextIfDebug(BotRPTracker.getPos(), BB->end());
+ if (I == BB->end())
+ VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
+ else {
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I));
+ VNI = LRQ.valueIn();
+ }
+ // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
+ assert(VNI && "No live value at use.");
+ for (VReg2UseMap::iterator
+ UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
+ SUnit *SU = UI->SU;
+ DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
+ << *SU->getInstr());
+ // If this use comes before the reaching def, it cannot be a last use, so
+ // descrease its pressure change.
+ if (!SU->isScheduled && SU != &ExitSU) {
+ LiveQueryResult LRQ
+ = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
+ if (LRQ.valueIn() == VNI)
+ getPressureDiff(SU).addPressureChange(Reg, true, &MRI);
}
- });
+ }
+ }
}
/// schedule - Called back from MachineScheduler::runOnMachineFunction
@@ -585,6 +691,13 @@ void ScheduleDAGMI::schedule() {
/// Build the DAG and setup three register pressure trackers.
void ScheduleDAGMI::buildDAGWithRegPressure() {
+ if (!ShouldTrackPressure) {
+ RPTracker.reset();
+ RegionCriticalPSets.clear();
+ buildSchedGraph(AA);
+ return;
+ }
+
// Initialize the register pressure tracker used by buildSchedGraph.
RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
/*TrackUntiedDefs=*/true);
@@ -594,7 +707,7 @@ void ScheduleDAGMI::buildDAGWithRegPressure() {
RPTracker.recede();
// Build the DAG, and compute current register pressure.
- buildSchedGraph(AA, &RPTracker);
+ buildSchedGraph(AA, &RPTracker, &SUPressureDiffs);
// Initialize top/bottom trackers after computing region pressure.
initRegPressure();
@@ -637,6 +750,91 @@ void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
ExitSU.biasCriticalPath();
}
+/// Compute the max cyclic critical path through the DAG. The scheduling DAG
+/// only provides the critical path for single block loops. To handle loops that
+/// span blocks, we could use the vreg path latencies provided by
+/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
+/// available for use in the scheduler.
+///
+/// The cyclic path estimation identifies a def-use pair that crosses the back
+/// edge and considers the depth and height of the nodes. For example, consider
+/// the following instruction sequence where each instruction has unit latency
+/// and defines an epomymous virtual register:
+///
+/// a->b(a,c)->c(b)->d(c)->exit
+///
+/// The cyclic critical path is a two cycles: b->c->b
+/// The acyclic critical path is four cycles: a->b->c->d->exit
+/// LiveOutHeight = height(c) = len(c->d->exit) = 2
+/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
+/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
+/// LiveInDepth = depth(b) = len(a->b) = 1
+///
+/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
+/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
+/// CyclicCriticalPath = min(2, 2) = 2
+unsigned ScheduleDAGMI::computeCyclicCriticalPath() {
+ // This only applies to single block loop.
+ if (!BB->isSuccessor(BB))
+ return 0;
+
+ unsigned MaxCyclicLatency = 0;
+ // Visit each live out vreg def to find def/use pairs that cross iterations.
+ ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs;
+ for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();
+ RI != RE; ++RI) {
+ unsigned Reg = *RI;
+ if (!TRI->isVirtualRegister(Reg))
+ continue;
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
+ if (!DefVNI)
+ continue;
+
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
+ const SUnit *DefSU = getSUnit(DefMI);
+ if (!DefSU)
+ continue;
+
+ unsigned LiveOutHeight = DefSU->getHeight();
+ unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
+ // Visit all local users of the vreg def.
+ for (VReg2UseMap::iterator
+ UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
+ if (UI->SU == &ExitSU)
+ continue;
+
+ // Only consider uses of the phi.
+ LiveQueryResult LRQ =
+ LI.Query(LIS->getInstructionIndex(UI->SU->getInstr()));
+ if (!LRQ.valueIn()->isPHIDef())
+ continue;
+
+ // Assume that a path spanning two iterations is a cycle, which could
+ // overestimate in strange cases. This allows cyclic latency to be
+ // estimated as the minimum slack of the vreg's depth or height.
+ unsigned CyclicLatency = 0;
+ if (LiveOutDepth > UI->SU->getDepth())
+ CyclicLatency = LiveOutDepth - UI->SU->getDepth();
+
+ unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
+ if (LiveInHeight > LiveOutHeight) {
+ if (LiveInHeight - LiveOutHeight < CyclicLatency)
+ CyclicLatency = LiveInHeight - LiveOutHeight;
+ }
+ else
+ CyclicLatency = 0;
+
+ DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
+ << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
+ if (CyclicLatency > MaxCyclicLatency)
+ MaxCyclicLatency = CyclicLatency;
+ }
+ }
+ DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
+ return MaxCyclicLatency;
+}
+
/// Identify DAG roots and setup scheduler queues.
void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
ArrayRef<SUnit*> BotRoots) {
@@ -664,11 +862,13 @@ void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
SchedImpl->registerRoots();
// Advance past initial DebugValues.
- assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
- TopRPTracker.setPos(CurrentTop);
-
CurrentBottom = RegionEnd;
+
+ if (ShouldTrackPressure) {
+ assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
+ TopRPTracker.setPos(CurrentTop);
+ }
}
/// Move an instruction and update register pressure.
@@ -685,10 +885,12 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
TopRPTracker.setPos(MI);
}
- // Update top scheduled pressure.
- TopRPTracker.advance();
- assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
- updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+ if (ShouldTrackPressure) {
+ // Update top scheduled pressure.
+ TopRPTracker.advance();
+ assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+ updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
+ }
}
else {
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
@@ -704,10 +906,14 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
moveInstruction(MI, CurrentBottom);
CurrentBottom = MI;
}
- // Update bottom scheduled pressure.
- BotRPTracker.recede();
- assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
- updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
+ if (ShouldTrackPressure) {
+ // Update bottom scheduled pressure.
+ SmallVector<unsigned, 8> LiveUses;
+ BotRPTracker.recede(&LiveUses);
+ assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+ updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
+ updatePressureDiffs(LiveUses);
+ }
}
}
@@ -1113,13 +1319,13 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) {
}
//===----------------------------------------------------------------------===//
-// ConvergingScheduler - Implementation of the generic MachineSchedStrategy.
+// GenericScheduler - Implementation of the generic MachineSchedStrategy.
//===----------------------------------------------------------------------===//
namespace {
-/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
/// the schedule.
-class ConvergingScheduler : public MachineSchedStrategy {
+class GenericScheduler : public MachineSchedStrategy {
public:
/// Represent the type of SchedCandidate found within a single queue.
/// pickNodeBidirectional depends on these listed by decreasing priority.
@@ -1129,7 +1335,7 @@ public:
TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
#ifndef NDEBUG
- static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
+ static const char *getReasonStr(GenericScheduler::CandReason Reason);
#endif
/// Policy for scheduling the next instruction in the candidate's zone.
@@ -1160,7 +1366,7 @@ public:
}
};
- /// Store the state used by ConvergingScheduler heuristics, required for the
+ /// Store the state used by GenericScheduler heuristics, required for the
/// lifetime of one invocation of pickNode().
struct SchedCandidate {
CandPolicy Policy;
@@ -1205,16 +1411,21 @@ public:
struct SchedRemainder {
// Critical path through the DAG in expected latency.
unsigned CriticalPath;
+ unsigned CyclicCritPath;
// Scaled count of micro-ops left to schedule.
unsigned RemIssueCount;
+ bool IsAcyclicLatencyLimited;
+
// Unscheduled resources
SmallVector<unsigned, 16> RemainingCounts;
void reset() {
CriticalPath = 0;
+ CyclicCritPath = 0;
RemIssueCount = 0;
+ IsAcyclicLatencyLimited = false;
RemainingCounts.clear();
}
@@ -1288,13 +1499,16 @@ public:
void reset() {
// A new HazardRec is created for each DAG and owned by SchedBoundary.
- delete HazardRec;
-
+ // Destroying and reconstructing it is very expensive though. So keep
+ // invalid, placeholder HazardRecs.
+ if (HazardRec && HazardRec->isEnabled()) {
+ delete HazardRec;
+ HazardRec = 0;
+ }
Available.clear();
Pending.clear();
CheckPending = false;
NextSUs.clear();
- HazardRec = 0;
CurrCycle = 0;
CurrMOps = 0;
MinReadyCycle = UINT_MAX;
@@ -1316,7 +1530,7 @@ public:
/// PendingFlag set.
SchedBoundary(unsigned ID, const Twine &Name):
DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
- Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
+ Pending(ID << GenericScheduler::LogMaxQID, Name+".P"),
HazardRec(0) {
reset();
}
@@ -1327,7 +1541,7 @@ public:
SchedRemainder *rem);
bool isTop() const {
- return Available.getID() == ConvergingScheduler::TopQID;
+ return Available.getID() == GenericScheduler::TopQID;
}
#ifndef NDEBUG
@@ -1399,6 +1613,7 @@ public:
};
private:
+ const MachineSchedContext *Context;
ScheduleDAGMI *DAG;
const TargetSchedModel *SchedModel;
const TargetRegisterInfo *TRI;
@@ -1408,6 +1623,7 @@ private:
SchedBoundary Top;
SchedBoundary Bot;
+ MachineSchedPolicy RegionPolicy;
public:
/// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
enum {
@@ -1416,8 +1632,15 @@ public:
LogMaxQID = 2
};
- ConvergingScheduler():
- DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+ GenericScheduler(const MachineSchedContext *C):
+ Context(C), DAG(0), SchedModel(0), TRI(0),
+ Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+
+ virtual void initPolicy(MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned NumRegionInstrs);
+
+ bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; }
virtual void initialize(ScheduleDAGMI *dag);
@@ -1432,6 +1655,8 @@ public:
virtual void registerRoots();
protected:
+ void checkAcyclicLatency();
+
void tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary &Zone,
@@ -1452,7 +1677,7 @@ protected:
};
} // namespace
-void ConvergingScheduler::SchedRemainder::
+void GenericScheduler::SchedRemainder::
init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
reset();
if (!SchedModel->hasInstrSchedModel())
@@ -1473,7 +1698,7 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
}
}
-void ConvergingScheduler::SchedBoundary::
+void GenericScheduler::SchedBoundary::
init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
reset();
DAG = dag;
@@ -1483,7 +1708,49 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
}
-void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
+/// Initialize the per-region scheduling policy.
+void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned NumRegionInstrs) {
+ const TargetMachine &TM = Context->MF->getTarget();
+
+ // Avoid setting up the register pressure tracker for small regions to save
+ // compile time. As a rough heuristic, only track pressure when the number of
+ // schedulable instructions exceeds half the integer register file.
+ unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
+ TM.getTargetLowering()->getRegClassFor(MVT::i32));
+
+ RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
+
+ // For generic targets, we default to bottom-up, because it's simpler and more
+ // compile-time optimizations have been implemented in that direction.
+ RegionPolicy.OnlyBottomUp = true;
+
+ // Allow the subtarget to override default policy.
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs);
+
+ // After subtarget overrides, apply command line options.
+ if (!EnableRegPressure)
+ RegionPolicy.ShouldTrackPressure = false;
+
+ // Check -misched-topdown/bottomup can force or unforce scheduling direction.
+ // e.g. -misched-bottomup=false allows scheduling in both directions.
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ if (ForceBottomUp.getNumOccurrences() > 0) {
+ RegionPolicy.OnlyBottomUp = ForceBottomUp;
+ if (RegionPolicy.OnlyBottomUp)
+ RegionPolicy.OnlyTopDown = false;
+ }
+ if (ForceTopDown.getNumOccurrences() > 0) {
+ RegionPolicy.OnlyTopDown = ForceTopDown;
+ if (RegionPolicy.OnlyTopDown)
+ RegionPolicy.OnlyBottomUp = false;
+ }
+}
+
+void GenericScheduler::initialize(ScheduleDAGMI *dag) {
DAG = dag;
SchedModel = DAG->getSchedModel();
TRI = DAG->TRI;
@@ -1498,14 +1765,17 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
// are disabled, then these HazardRecs will be disabled.
const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
const TargetMachine &TM = DAG->MF.getTarget();
- Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
- Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
-
- assert((!ForceTopDown || !ForceBottomUp) &&
- "-misched-topdown incompatible with -misched-bottomup");
+ if (!Top.HazardRec) {
+ Top.HazardRec =
+ TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ }
+ if (!Bot.HazardRec) {
+ Bot.HazardRec =
+ TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ }
}
-void ConvergingScheduler::releaseTopNode(SUnit *SU) {
+void GenericScheduler::releaseTopNode(SUnit *SU) {
if (SU->isScheduled)
return;
@@ -1524,7 +1794,7 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) {
Top.releaseNode(SU, SU->TopReadyCycle);
}
-void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
+void GenericScheduler::releaseBottomNode(SUnit *SU) {
if (SU->isScheduled)
return;
@@ -1545,8 +1815,46 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
Bot.releaseNode(SU, SU->BotReadyCycle);
}
-void ConvergingScheduler::registerRoots() {
+/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
+/// critical path by more cycles than it takes to drain the instruction buffer.
+/// We estimate an upper bounds on in-flight instructions as:
+///
+/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
+/// InFlightIterations = AcyclicPath / CyclesPerIteration
+/// InFlightResources = InFlightIterations * LoopResources
+///
+/// TODO: Check execution resources in addition to IssueCount.
+void GenericScheduler::checkAcyclicLatency() {
+ if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
+ return;
+
+ // Scaled number of cycles per loop iteration.
+ unsigned IterCount =
+ std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
+ Rem.RemIssueCount);
+ // Scaled acyclic critical path.
+ unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
+ // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
+ unsigned InFlightCount =
+ (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
+ unsigned BufferLimit =
+ SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
+
+ Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
+
+ DEBUG(dbgs() << "IssueCycles="
+ << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
+ << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
+ << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
+ << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
+ << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
+ if (Rem.IsAcyclicLatencyLimited)
+ dbgs() << " ACYCLIC LATENCY LIMIT\n");
+}
+
+void GenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
+
// Some roots may not feed into ExitSU. Check all of them in case.
for (std::vector<SUnit*>::const_iterator
I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
@@ -1554,6 +1862,11 @@ void ConvergingScheduler::registerRoots() {
Rem.CriticalPath = (*I)->getDepth();
}
DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+
+ if (EnableCyclicPath) {
+ Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
+ checkAcyclicLatency();
+ }
}
/// Does this SU have a hazard within the current instruction group.
@@ -1569,7 +1882,7 @@ void ConvergingScheduler::registerRoots() {
/// can dispatch per cycle.
///
/// TODO: Also check whether the SU must start a new group.
-bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
+bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) {
if (HazardRec->isEnabled())
return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
@@ -1583,7 +1896,7 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
}
// Find the unscheduled node in ReadySUs with the highest latency.
-unsigned ConvergingScheduler::SchedBoundary::
+unsigned GenericScheduler::SchedBoundary::
findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
SUnit *LateSU = 0;
unsigned RemLatency = 0;
@@ -1605,7 +1918,7 @@ findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
// Count resources in this zone and the remaining unscheduled
// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
// resource index, or zero if the zone is issue limited.
-unsigned ConvergingScheduler::SchedBoundary::
+unsigned GenericScheduler::SchedBoundary::
getOtherResourceCount(unsigned &OtherCritIdx) {
OtherCritIdx = 0;
if (!SchedModel->hasInstrSchedModel())
@@ -1633,7 +1946,7 @@ getOtherResourceCount(unsigned &OtherCritIdx) {
/// Set the CandPolicy for this zone given the current resources and latencies
/// inside and outside the zone.
-void ConvergingScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
+void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
SchedBoundary &OtherZone) {
// Now that potential stalls have been considered, apply preemptive heuristics
// based on the the total latency and resources inside and outside this
@@ -1692,7 +2005,7 @@ void ConvergingScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
Policy.DemandResIdx = OtherCritIdx;
}
-void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
+void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU,
unsigned ReadyCycle) {
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
@@ -1710,7 +2023,7 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
}
/// Move the boundary of scheduled code by one cycle.
-void ConvergingScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
+void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
if (SchedModel->getMicroOpBufferSize() == 0) {
assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
if (MinReadyCycle > NextCycle)
@@ -1748,7 +2061,7 @@ void ConvergingScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
}
-void ConvergingScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
+void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
unsigned Count) {
ExecutedResCounts[PIdx] += Count;
if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
@@ -1762,7 +2075,7 @@ void ConvergingScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
///
/// \return the next cycle at which the instruction may execute without
/// oversubscribing resources.
-unsigned ConvergingScheduler::SchedBoundary::
+unsigned GenericScheduler::SchedBoundary::
countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
unsigned Factor = SchedModel->getResourceFactor(PIdx);
unsigned Count = Factor * Cycles;
@@ -1787,7 +2100,7 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
}
/// Move the boundary of scheduled code by one SUnit.
-void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
+void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
// Update the reservation table.
if (HazardRec->isEnabled()) {
if (!isTop() && SU->isCall) {
@@ -1891,7 +2204,7 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
/// Release pending ready nodes in to the available queue. This makes them
/// visible to heuristics.
-void ConvergingScheduler::SchedBoundary::releasePending() {
+void GenericScheduler::SchedBoundary::releasePending() {
// If the available queue is empty, it is safe to reset MinReadyCycle.
if (Available.empty())
MinReadyCycle = UINT_MAX;
@@ -1921,7 +2234,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
}
/// Remove SU from the ready set for this boundary.
-void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
+void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) {
if (Available.isInQueue(SU))
Available.remove(Available.find(SU));
else {
@@ -1933,7 +2246,7 @@ void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
/// If this queue only has one ready candidate, return it. As a side effect,
/// defer any nodes that now hit a hazard, and advance the cycle until at least
/// one node is ready. If multiple instructions are ready, return NULL.
-SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
+SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() {
if (CheckPending)
releasePending();
@@ -1962,7 +2275,7 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
#ifndef NDEBUG
// This is useful information to dump after bumpNode.
// Note that the Queue contents are more useful before pickNodeFromQueue.
-void ConvergingScheduler::SchedBoundary::dumpScheduledState() {
+void GenericScheduler::SchedBoundary::dumpScheduledState() {
unsigned ResFactor;
unsigned ResCount;
if (ZoneCritResIdx) {
@@ -1985,7 +2298,7 @@ void ConvergingScheduler::SchedBoundary::dumpScheduledState() {
}
#endif
-void ConvergingScheduler::SchedCandidate::
+void GenericScheduler::SchedCandidate::
initResourceDelta(const ScheduleDAGMI *DAG,
const TargetSchedModel *SchedModel) {
if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
@@ -2005,9 +2318,9 @@ initResourceDelta(const ScheduleDAGMI *DAG,
/// Return true if this heuristic determines order.
static bool tryLess(int TryVal, int CandVal,
- ConvergingScheduler::SchedCandidate &TryCand,
- ConvergingScheduler::SchedCandidate &Cand,
- ConvergingScheduler::CandReason Reason) {
+ GenericScheduler::SchedCandidate &TryCand,
+ GenericScheduler::SchedCandidate &Cand,
+ GenericScheduler::CandReason Reason) {
if (TryVal < CandVal) {
TryCand.Reason = Reason;
return true;
@@ -2022,9 +2335,9 @@ static bool tryLess(int TryVal, int CandVal,
}
static bool tryGreater(int TryVal, int CandVal,
- ConvergingScheduler::SchedCandidate &TryCand,
- ConvergingScheduler::SchedCandidate &Cand,
- ConvergingScheduler::CandReason Reason) {
+ GenericScheduler::SchedCandidate &TryCand,
+ GenericScheduler::SchedCandidate &Cand,
+ GenericScheduler::CandReason Reason) {
if (TryVal > CandVal) {
TryCand.Reason = Reason;
return true;
@@ -2038,26 +2351,26 @@ static bool tryGreater(int TryVal, int CandVal,
return false;
}
-static bool tryPressure(const PressureElement &TryP,
- const PressureElement &CandP,
- ConvergingScheduler::SchedCandidate &TryCand,
- ConvergingScheduler::SchedCandidate &Cand,
- ConvergingScheduler::CandReason Reason) {
+static bool tryPressure(const PressureChange &TryP,
+ const PressureChange &CandP,
+ GenericScheduler::SchedCandidate &TryCand,
+ GenericScheduler::SchedCandidate &Cand,
+ GenericScheduler::CandReason Reason) {
+ int TryRank = TryP.getPSetOrMax();
+ int CandRank = CandP.getPSetOrMax();
// If both candidates affect the same set, go with the smallest increase.
- if (TryP.PSetID == CandP.PSetID) {
- return tryLess(TryP.UnitIncrease, CandP.UnitIncrease, TryCand, Cand,
+ if (TryRank == CandRank) {
+ return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
Reason);
}
// If one candidate decreases and the other increases, go with it.
- if (tryLess(TryP.UnitIncrease < 0, CandP.UnitIncrease < 0, TryCand, Cand,
+ // Invalid candidates have UnitInc==0.
+ if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
Reason)) {
return true;
}
- // If TryP has lower Rank, it has a higher priority.
- int TryRank = TryP.PSetRank();
- int CandRank = CandP.PSetRank();
// If the candidates are decreasing pressure, reverse priority.
- if (TryP.UnitIncrease < 0)
+ if (TryP.getUnitInc() < 0)
std::swap(TryRank, CandRank);
return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
}
@@ -2094,6 +2407,32 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
return 0;
}
+static bool tryLatency(GenericScheduler::SchedCandidate &TryCand,
+ GenericScheduler::SchedCandidate &Cand,
+ GenericScheduler::SchedBoundary &Zone) {
+ if (Zone.isTop()) {
+ if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
+ if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, GenericScheduler::TopDepthReduce))
+ return true;
+ }
+ if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, GenericScheduler::TopPathReduce))
+ return true;
+ }
+ else {
+ if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
+ if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, GenericScheduler::BotHeightReduce))
+ return true;
+ }
+ if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, GenericScheduler::BotPathReduce))
+ return true;
+ }
+ return false;
+}
+
/// Apply a set of heursitics to a new candidate. Heuristics are currently
/// hierarchical. This may be more efficient than a graduated cost model because
/// we don't need to evaluate all aspects of the model for each node in the
@@ -2105,16 +2444,44 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
/// \param Zone describes the scheduled zone that we are extending.
/// \param RPTracker describes reg pressure within the scheduled zone.
/// \param TempTracker is a scratch pressure tracker to reuse in queries.
-void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
+void GenericScheduler::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary &Zone,
const RegPressureTracker &RPTracker,
RegPressureTracker &TempTracker) {
- // Always initialize TryCand's RPDelta.
- TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
- DAG->getRegionCriticalPSets(),
- DAG->getRegPressure().MaxSetPressure);
+ if (DAG->isTrackingPressure()) {
+ // Always initialize TryCand's RPDelta.
+ if (Zone.isTop()) {
+ TempTracker.getMaxDownwardPressureDelta(
+ TryCand.SU->getInstr(),
+ TryCand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ }
+ else {
+ if (VerifyScheduling) {
+ TempTracker.getMaxUpwardPressureDelta(
+ TryCand.SU->getInstr(),
+ &DAG->getPressureDiff(TryCand.SU),
+ TryCand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ }
+ else {
+ RPTracker.getUpwardPressureDelta(
+ TryCand.SU->getInstr(),
+ DAG->getPressureDiff(TryCand.SU),
+ TryCand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ }
+ }
+ }
+ DEBUG(if (TryCand.RPDelta.Excess.isValid())
+ dbgs() << " SU(" << TryCand.SU->NodeNum << ") "
+ << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
+ << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
// Initialize the candidate if needed.
if (!Cand.isValid()) {
@@ -2129,13 +2496,22 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
// Avoid exceeding the target's limit. If signed PSetID is negative, it is
// invalid; convert it to INT_MAX to give it lowest priority.
- if (tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
- RegExcess))
+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
+ Cand.RPDelta.Excess,
+ TryCand, Cand, RegExcess))
return;
// Avoid increasing the max critical pressure in the scheduled region.
- if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
- TryCand, Cand, RegCritical))
+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
+ Cand.RPDelta.CriticalMax,
+ TryCand, Cand, RegCritical))
+ return;
+
+ // For loops that are acyclic path limited, aggressively schedule for latency.
+ // This can result in very long dependence chains scheduled in sequence, so
+ // once every cycle (when CurrMOps == 0), switch to normal heuristics.
+ if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps
+ && tryLatency(TryCand, Cand, Zone))
return;
// Keep clustered nodes together to encourage downstream peephole
@@ -2157,8 +2533,9 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
return;
}
// Avoid increasing the max pressure of the entire region.
- if (tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax,
- TryCand, Cand, RegMax))
+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
+ Cand.RPDelta.CurrentMax,
+ TryCand, Cand, RegMax))
return;
// Avoid critical resource consumption and balance the schedule.
@@ -2172,27 +2549,10 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
return;
// Avoid serializing long latency dependence chains.
- if (Cand.Policy.ReduceLatency) {
- if (Zone.isTop()) {
- if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
- if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
- TryCand, Cand, TopDepthReduce))
- return;
- }
- if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
- TryCand, Cand, TopPathReduce))
- return;
- }
- else {
- if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
- if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
- TryCand, Cand, BotHeightReduce))
- return;
- }
- if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
- TryCand, Cand, BotPathReduce))
- return;
- }
+ // For acyclic path limited loops, latency was already checked above.
+ if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
+ && tryLatency(TryCand, Cand, Zone)) {
+ return;
}
// Prefer immediate defs/users of the last scheduled instruction. This is a
@@ -2210,8 +2570,8 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
}
#ifndef NDEBUG
-const char *ConvergingScheduler::getReasonStr(
- ConvergingScheduler::CandReason Reason) {
+const char *GenericScheduler::getReasonStr(
+ GenericScheduler::CandReason Reason) {
switch (Reason) {
case NoCand: return "NOCAND ";
case PhysRegCopy: return "PREG-COPY";
@@ -2232,8 +2592,8 @@ const char *ConvergingScheduler::getReasonStr(
llvm_unreachable("Unknown reason!");
}
-void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
- PressureElement P;
+void GenericScheduler::traceCandidate(const SchedCandidate &Cand) {
+ PressureChange P;
unsigned ResIdx = 0;
unsigned Latency = 0;
switch (Cand.Reason) {
@@ -2269,8 +2629,8 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
}
dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
if (P.isValid())
- dbgs() << " " << TRI->getRegPressureSetName(P.PSetID)
- << ":" << P.UnitIncrease << " ";
+ dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
+ << ":" << P.getUnitInc() << " ";
else
dbgs() << " ";
if (ResIdx)
@@ -2285,12 +2645,12 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
}
#endif
-/// Pick the best candidate from the top queue.
+/// Pick the best candidate from the queue.
///
/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
/// DAG building. To adjust for the current scheduling location we need to
/// maintain the number of vreg uses remaining to be top-scheduled.
-void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand) {
ReadyQueue &Q = Zone.Available;
@@ -2315,14 +2675,14 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
}
}
-static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
+static void tracePick(const GenericScheduler::SchedCandidate &Cand,
bool IsTop) {
DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
- << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
+ << GenericScheduler::getReasonStr(Cand.Reason) << '\n');
}
/// Pick the best candidate node from either the top or bottom queue.
-SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
+SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
// Schedule as far as possible in the direction of no choice. This is most
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
@@ -2377,7 +2737,7 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
}
/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
-SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
+SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
if (DAG->top() == DAG->bottom()) {
assert(Top.Available.empty() && Top.Pending.empty() &&
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
@@ -2385,24 +2745,26 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
}
SUnit *SU;
do {
- if (ForceTopDown) {
+ if (RegionPolicy.OnlyTopDown) {
SU = Top.pickOnlyChoice();
if (!SU) {
CandPolicy NoPolicy;
SchedCandidate TopCand(NoPolicy);
pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
- assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ assert(TopCand.Reason != NoCand && "failed to find a candidate");
+ tracePick(TopCand, true);
SU = TopCand.SU;
}
IsTopNode = true;
}
- else if (ForceBottomUp) {
+ else if (RegionPolicy.OnlyBottomUp) {
SU = Bot.pickOnlyChoice();
if (!SU) {
CandPolicy NoPolicy;
SchedCandidate BotCand(NoPolicy);
pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
- assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+ assert(BotCand.Reason != NoCand && "failed to find a candidate");
+ tracePick(BotCand, false);
SU = BotCand.SU;
}
IsTopNode = false;
@@ -2421,7 +2783,7 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
return SU;
}
-void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
MachineBasicBlock::iterator InsertPos = SU->getInstr();
if (!isTop)
@@ -2452,7 +2814,7 @@ void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
///
/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
/// them here. See comments in biasPhysRegCopy.
-void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle);
Top.bumpNode(SU);
@@ -2469,25 +2831,23 @@ void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
/// Create the standard converging machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
- assert((!ForceTopDown || !ForceBottomUp) &&
- "-misched-topdown incompatible with -misched-bottomup");
- ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
+static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) {
+ ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C));
// Register DAG post-processors.
//
// FIXME: extend the mutation API to allow earlier mutations to instantiate
// data and pass it to later mutations. Have a single mutation that gathers
// the interesting nodes in one pass.
DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
- if (EnableLoadCluster)
+ if (EnableLoadCluster && DAG->TII->enableClusterLoads())
DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
if (EnableMacroFusion)
DAG->addMutation(new MacroFusion(DAG->TII));
return DAG;
}
static MachineSchedRegistry
-ConvergingSchedRegistry("converge", "Standard converging scheduler.",
- createConvergingSched);
+GenericSchedRegistry("converge", "Standard converging scheduler.",
+ createGenericSched);
//===----------------------------------------------------------------------===//
// ILP Scheduler. Currently for experimental analysis of heuristics.
@@ -2529,15 +2889,6 @@ struct ILPOrder {
/// \brief Schedule based on the ILP metric.
class ILPScheduler : public MachineSchedStrategy {
- /// In case all subtrees are eventually connected to a common root through
- /// data dependence (e.g. reduction), place an upper limit on their size.
- ///
- /// FIXME: A subtree limit is generally good, but in the situation commented
- /// above, where multiple similar subtrees feed a common root, we should
- /// only split at a point where the resulting subtrees will be balanced.
- /// (a motivating test case must be found).
- static const unsigned SubtreeLimit = 16;
-
ScheduleDAGMI *DAG;
ILPOrder Cmp;
@@ -2721,7 +3072,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
}
static bool isNodeHidden(const SUnit *Node) {
- return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+ return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
}
static bool hasNodeAddressLabel(const SUnit *Node,
@@ -2744,7 +3095,11 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
std::string Str;
raw_string_ostream SS(Str);
- SS << "SU(" << SU->NodeNum << ')';
+ const SchedDFSResult *DFS =
+ static_cast<const ScheduleDAGMI*>(G)->getDFSResult();
+ SS << "SU:" << SU->NodeNum;
+ if (DFS)
+ SS << " I:" << DFS->getNumInstrs(SU);
return SS.str();
}
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index dacdbdd..105d7c2 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -308,12 +308,29 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
// to be sunk then it's probably worth it.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg()) continue;
+ if (!MO.isReg() || !MO.isUse())
+ continue;
unsigned Reg = MO.getReg();
- if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Reg == 0)
continue;
- if (MRI->hasOneNonDBGUse(Reg))
- return true;
+
+ // We don't move live definitions of physical registers,
+ // so sinking their uses won't enable any opportunities.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+
+ // If this instruction is the only user of a virtual register,
+ // check if breaking the edge will enable sinking
+ // both this instruction and the defining instruction.
+ if (MRI->hasOneNonDBGUse(Reg)) {
+ // If the definition resides in same MBB,
+ // claim it's likely we can sink these together.
+ // If definition resides elsewhere, we aren't
+ // blocking it from being sunk so don't break the edge.
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (DefMI->getParent() == MI->getParent())
+ return true;
+ }
}
return false;
@@ -615,9 +632,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo);
- // If the block has multiple predecessors, this would introduce computation on
- // a path that it doesn't already exist. We could split the critical edge,
- // but for now we just punt.
+ // If the block has multiple predecessors, this is a critical edge.
+ // Decide if we can sink along it or need to break the edge.
if (SuccToSinkTo->pred_size() > 1) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index e74bfc8..d61470c 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -213,6 +213,10 @@ namespace {
const LiveInterval &LI);
void report(const char *msg, const MachineBasicBlock *MBB,
const LiveInterval &LI);
+ void report(const char *msg, const MachineFunction *MF,
+ const LiveRange &LR);
+ void report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveRange &LR);
void verifyInlineAsm(const MachineInstr *MI);
@@ -225,9 +229,10 @@ namespace {
void verifyLiveVariables();
void verifyLiveIntervals();
void verifyLiveInterval(const LiveInterval&);
- void verifyLiveIntervalValue(const LiveInterval&, VNInfo*);
- void verifyLiveIntervalSegment(const LiveInterval&,
- LiveInterval::const_iterator);
+ void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned);
+ void verifyLiveRangeSegment(const LiveRange&,
+ const LiveRange::const_iterator I, unsigned);
+ void verifyLiveRange(const LiveRange&, unsigned);
void verifyStackFrame();
};
@@ -414,23 +419,25 @@ void MachineVerifier::report(const char *msg,
void MachineVerifier::report(const char *msg, const MachineFunction *MF,
const LiveInterval &LI) {
report(msg, MF);
- *OS << "- interval: ";
- if (TargetRegisterInfo::isVirtualRegister(LI.reg))
- *OS << PrintReg(LI.reg, TRI);
- else
- *OS << PrintRegUnit(LI.reg, TRI);
- *OS << ' ' << LI << '\n';
+ *OS << "- interval: " << LI << '\n';
}
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
const LiveInterval &LI) {
report(msg, MBB);
- *OS << "- interval: ";
- if (TargetRegisterInfo::isVirtualRegister(LI.reg))
- *OS << PrintReg(LI.reg, TRI);
- else
- *OS << PrintRegUnit(LI.reg, TRI);
- *OS << ' ' << LI << '\n';
+ *OS << "- interval: " << LI << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveRange &LR) {
+ report(msg, MBB);
+ *OS << "- liverange: " << LR << "\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineFunction *MF,
+ const LiveRange &LR) {
+ report(msg, MF);
+ *OS << "- liverange: " << LR << "\n";
}
void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
@@ -768,7 +775,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
if (MI->getNumOperands() < MCID.getNumOperands()) {
report("Too few operands", MI);
*OS << MCID.getNumOperands() << " operands expected, but "
- << MI->getNumExplicitOperands() << " given.\n";
+ << MI->getNumOperands() << " given.\n";
}
// Check the tied operands.
@@ -826,7 +833,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MO->isReg() &&
!(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
if (MO->isDef() && !MCOI.isOptionalDef())
- report("Explicit operand marked as def", MO, MONum);
+ report("Explicit operand marked as def", MO, MONum);
if (MO->isImplicit())
report("Explicit operand marked as implicit", MO, MONum);
}
@@ -1001,16 +1008,16 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// Check the cached regunit intervals.
if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) {
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
- if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) {
- LiveRangeQuery LRQ(*LI, UseIdx);
+ if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units)) {
+ LiveQueryResult LRQ = LR->Query(UseIdx);
if (!LRQ.valueIn()) {
- report("No live range at use", MO, MONum);
+ report("No live segment at use", MO, MONum);
*OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
- << ' ' << *LI << '\n';
+ << ' ' << *LR << '\n';
}
if (MO->isKill() && !LRQ.isKill()) {
report("Live range continues after kill flag", MO, MONum);
- *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n';
+ *OS << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n';
}
}
}
@@ -1020,9 +1027,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (LiveInts->hasInterval(Reg)) {
// This is a virtual register interval.
const LiveInterval &LI = LiveInts->getInterval(Reg);
- LiveRangeQuery LRQ(LI, UseIdx);
+ LiveQueryResult LRQ = LI.Query(UseIdx);
if (!LRQ.valueIn()) {
- report("No live range at use", MO, MONum);
+ report("No live segment at use", MO, MONum);
*OS << UseIdx << " is not live in " << LI << '\n';
}
// Check for extra kill flags.
@@ -1071,7 +1078,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
report("Multiple virtual register defs in SSA form", MO, MONum);
- // Check LiveInts for a live range, but only for virtual registers.
+ // Check LiveInts for a live segment, but only for virtual registers.
if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
!LiveInts->isNotInMIMap(MI)) {
SlotIndex DefIdx = LiveInts->getInstructionIndex(MI);
@@ -1086,9 +1093,17 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
<< DefIdx << " in " << LI << '\n';
}
} else {
- report("No live range at def", MO, MONum);
+ report("No live segment at def", MO, MONum);
*OS << DefIdx << " is not live in " << LI << '\n';
}
+ // Check that, if the dead def flag is present, LiveInts agree.
+ if (MO->isDead()) {
+ LiveQueryResult LRQ = LI.Query(DefIdx);
+ if (!LRQ.isDeadDef()) {
+ report("Live range continues after dead def flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ }
} else {
report("Virtual register has no Live interval", MO, MONum);
}
@@ -1335,25 +1350,26 @@ void MachineVerifier::verifyLiveIntervals() {
// Verify all the cached regunit intervals.
for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
- if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i))
- verifyLiveInterval(*LI);
+ if (const LiveRange *LR = LiveInts->getCachedRegUnit(i))
+ verifyLiveRange(*LR, i);
}
-void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI,
- VNInfo *VNI) {
+void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
+ const VNInfo *VNI,
+ unsigned Reg) {
if (VNI->isUnused())
return;
- const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
+ const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def);
if (!DefVNI) {
- report("Valno not live at def and not marked unused", MF, LI);
+ report("Valno not live at def and not marked unused", MF, LR);
*OS << "Valno #" << VNI->id << '\n';
return;
}
if (DefVNI != VNI) {
- report("Live range at def has different valno", MF, LI);
+ report("Live segment at def has different valno", MF, LR);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " where valno #" << DefVNI->id << " is live\n";
return;
@@ -1361,15 +1377,15 @@ void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI,
const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
if (!MBB) {
- report("Invalid definition index", MF, LI);
+ report("Invalid definition index", MF, LR);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
+ << " in " << LR << '\n';
return;
}
if (VNI->isPHIDef()) {
if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
- report("PHIDef value is not defined at MBB start", MBB, LI);
+ report("PHIDef value is not defined at MBB start", MBB, LR);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< ", not at the beginning of BB#" << MBB->getNumber() << '\n';
}
@@ -1379,161 +1395,154 @@ void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI,
// Non-PHI def.
const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
if (!MI) {
- report("No instruction at def index", MBB, LI);
+ report("No instruction at def index", MBB, LR);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
return;
}
- bool hasDef = false;
- bool isEarlyClobber = false;
- for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
- if (!MOI->isReg() || !MOI->isDef())
- continue;
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- if (MOI->getReg() != LI.reg)
- continue;
- } else {
- if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
- !TRI->hasRegUnit(MOI->getReg(), LI.reg))
+ if (Reg != 0) {
+ bool hasDef = false;
+ bool isEarlyClobber = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (MOI->getReg() != Reg)
+ continue;
+ } else {
+ if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+ !TRI->hasRegUnit(MOI->getReg(), Reg))
+ continue;
+ }
+ hasDef = true;
+ if (MOI->isEarlyClobber())
+ isEarlyClobber = true;
}
- hasDef = true;
- if (MOI->isEarlyClobber())
- isEarlyClobber = true;
- }
- if (!hasDef) {
- report("Defining instruction does not modify register", MI);
- *OS << "Valno #" << VNI->id << " in " << LI << '\n';
- }
+ if (!hasDef) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LR << '\n';
+ }
- // Early clobber defs begin at USE slots, but other defs must begin at
- // DEF slots.
- if (isEarlyClobber) {
- if (!VNI->def.isEarlyClobber()) {
- report("Early clobber def must be at an early-clobber slot", MBB, LI);
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isEarlyClobber()) {
+ report("Early clobber def must be at an early-clobber slot", MBB, LR);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+ } else if (!VNI->def.isRegister()) {
+ report("Non-PHI, non-early clobber def must be at a register slot",
+ MBB, LR);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
}
- } else if (!VNI->def.isRegister()) {
- report("Non-PHI, non-early clobber def must be at a register slot",
- MBB, LI);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
}
}
-void
-MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI,
- LiveInterval::const_iterator I) {
- const VNInfo *VNI = I->valno;
- assert(VNI && "Live range has no valno");
-
- if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
- report("Foreign valno in live range", MF, LI);
- *OS << *I << " has a bad valno\n";
+void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
+ const LiveRange::const_iterator I,
+ unsigned Reg) {
+ const LiveRange::Segment &S = *I;
+ const VNInfo *VNI = S.valno;
+ assert(VNI && "Live segment has no valno");
+
+ if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) {
+ report("Foreign valno in live segment", MF, LR);
+ *OS << S << " has a bad valno\n";
}
if (VNI->isUnused()) {
- report("Live range valno is marked unused", MF, LI);
- *OS << *I << '\n';
+ report("Live segment valno is marked unused", MF, LR);
+ *OS << S << '\n';
}
- const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start);
if (!MBB) {
- report("Bad start of live segment, no basic block", MF, LI);
- *OS << *I << '\n';
+ report("Bad start of live segment, no basic block", MF, LR);
+ *OS << S << '\n';
return;
}
SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
- if (I->start != MBBStartIdx && I->start != VNI->def) {
- report("Live segment must begin at MBB entry or valno def", MBB, LI);
- *OS << *I << '\n';
+ if (S.start != MBBStartIdx && S.start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB, LR);
+ *OS << S << '\n';
}
const MachineBasicBlock *EndMBB =
- LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+ LiveInts->getMBBFromIndex(S.end.getPrevSlot());
if (!EndMBB) {
- report("Bad end of live segment, no basic block", MF, LI);
- *OS << *I << '\n';
+ report("Bad end of live segment, no basic block", MF, LR);
+ *OS << S << '\n';
return;
}
// No more checks for live-out segments.
- if (I->end == LiveInts->getMBBEndIdx(EndMBB))
+ if (S.end == LiveInts->getMBBEndIdx(EndMBB))
return;
// RegUnit intervals are allowed dead phis.
- if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() &&
- I->start == VNI->def && I->end == VNI->def.getDeadSlot())
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) && VNI->isPHIDef() &&
+ S.start == VNI->def && S.end == VNI->def.getDeadSlot())
return;
// The live segment is ending inside EndMBB
const MachineInstr *MI =
- LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB, LI);
- *OS << *I << '\n';
+ report("Live segment doesn't end at a valid instruction", EndMBB, LR);
+ *OS << S << '\n';
return;
}
// The block slot must refer to a basic block boundary.
- if (I->end.isBlock()) {
- report("Live segment ends at B slot of an instruction", EndMBB, LI);
- *OS << *I << '\n';
+ if (S.end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", EndMBB, LR);
+ *OS << S << '\n';
}
- if (I->end.isDead()) {
+ if (S.end.isDead()) {
// Segment ends on the dead slot.
// That means there must be a dead def.
- if (!SlotIndex::isSameInstr(I->start, I->end)) {
- report("Live segment ending at dead slot spans instructions", EndMBB, LI);
- *OS << *I << '\n';
+ if (!SlotIndex::isSameInstr(S.start, S.end)) {
+ report("Live segment ending at dead slot spans instructions", EndMBB, LR);
+ *OS << S << '\n';
}
}
// A live segment can only end at an early-clobber slot if it is being
// redefined by an early-clobber def.
- if (I->end.isEarlyClobber()) {
- if (I+1 == LI.end() || (I+1)->start != I->end) {
+ if (S.end.isEarlyClobber()) {
+ if (I+1 == LR.end() || (I+1)->start != S.end) {
report("Live segment ending at early clobber slot must be "
- "redefined by an EC def in the same instruction", EndMBB, LI);
- *OS << *I << '\n';
+ "redefined by an EC def in the same instruction", EndMBB, LR);
+ *OS << S << '\n';
}
}
// The following checks only apply to virtual registers. Physreg liveness
// is too weird to check.
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- // A live range can end with either a redefinition, a kill flag on a
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // A live segment can end with either a redefinition, a kill flag on a
// use, or a dead flag on a def.
bool hasRead = false;
- bool hasDeadDef = false;
for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
- if (!MOI->isReg() || MOI->getReg() != LI.reg)
+ if (!MOI->isReg() || MOI->getReg() != Reg)
continue;
if (MOI->readsReg())
hasRead = true;
- if (MOI->isDef() && MOI->isDead())
- hasDeadDef = true;
}
-
- if (I->end.isDead()) {
- if (!hasDeadDef) {
- report("Instruction doesn't have a dead def operand", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- } else {
+ if (!S.end.isDead()) {
if (!hasRead) {
- report("Instruction ending live range doesn't read the register", MI);
- *OS << *I << " in " << LI << '\n';
+ report("Instruction ending live segment doesn't read the register", MI);
+ *OS << S << " in " << LR << '\n';
}
}
}
// Now check all the basic blocks in this live segment.
MachineFunction::const_iterator MFI = MBB;
- // Is this live range the beginning of a non-PHIDef VN?
- if (I->start == VNI->def && !VNI->isPHIDef()) {
+ // Is this live segment the beginning of a non-PHIDef VN?
+ if (S.start == VNI->def && !VNI->isPHIDef()) {
// Not live-in to any blocks.
if (MBB == EndMBB)
return;
@@ -1541,9 +1550,9 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI,
++MFI;
}
for (;;) {
- assert(LiveInts->isLiveInToMBB(LI, MFI));
+ assert(LiveInts->isLiveInToMBB(LR, MFI));
// We don't know how to track physregs into a landing pad.
- if (!TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) &&
MFI->isLandingPad()) {
if (&*MFI == EndMBB)
break;
@@ -1559,11 +1568,11 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI,
for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
PE = MFI->pred_end(); PI != PE; ++PI) {
SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
- const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
+ const VNInfo *PVNI = LR.getVNInfoBefore(PEnd);
// All predecessors must have a live-out value.
if (!PVNI) {
- report("Register not marked live out of predecessor", *PI, LI);
+ report("Register not marked live out of predecessor", *PI, LR);
*OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
<< '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
<< PEnd << '\n';
@@ -1572,7 +1581,7 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI,
// Only PHI-defs can take different predecessor values.
if (!IsPHI && PVNI != VNI) {
- report("Different value live out of predecessor", *PI, LI);
+ report("Different value live out of predecessor", *PI, LR);
*OS << "Valno #" << PVNI->id << " live out of BB#"
<< (*PI)->getNumber() << '@' << PEnd
<< "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
@@ -1585,13 +1594,17 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI,
}
}
-void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
- I!=E; ++I)
- verifyLiveIntervalValue(LI, *I);
+void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg) {
+ for (LiveRange::const_vni_iterator I = LR.vni_begin(), E = LR.vni_end();
+ I != E; ++I)
+ verifyLiveRangeValue(LR, *I, Reg);
+
+ for (LiveRange::const_iterator I = LR.begin(), E = LR.end(); I != E; ++I)
+ verifyLiveRangeSegment(LR, I, Reg);
+}
- for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I)
- verifyLiveIntervalSegment(LI, I);
+void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
+ verifyLiveRange(LI, LI.reg);
// Check the LI only has one connected component.
if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index bf23eca..dcd9072 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -313,14 +313,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (IncomingReg) {
// Add the region from the beginning of MBB to the copy instruction to
// IncomingReg's live interval.
- LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg);
+ LiveInterval &IncomingLI = LIS->createEmptyInterval(IncomingReg);
VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex);
if (!IncomingVNI)
IncomingVNI = IncomingLI.getNextValue(MBBStartIndex,
LIS->getVNInfoAllocator());
- IncomingLI.addRange(LiveRange(MBBStartIndex,
- DestCopyIndex.getRegSlot(),
- IncomingVNI));
+ IncomingLI.addSegment(LiveInterval::Segment(MBBStartIndex,
+ DestCopyIndex.getRegSlot(),
+ IncomingVNI));
}
LiveInterval &DestLI = LIS->getInterval(DestReg);
@@ -332,14 +332,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// the copy instruction.
VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex);
assert(OrigDestVNI && "PHI destination should be live at block entry.");
- DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot());
+ DestLI.removeSegment(MBBStartIndex, MBBStartIndex.getDeadSlot());
DestLI.createDeadDef(DestCopyIndex.getRegSlot(),
LIS->getVNInfoAllocator());
DestLI.removeValNo(OrigDestVNI);
} else {
// Otherwise, remove the region from the beginning of MBB to the copy
// instruction from DestReg's live interval.
- DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot());
+ DestLI.removeSegment(MBBStartIndex, DestCopyIndex.getRegSlot());
VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
assert(DestVNI && "PHI destination should be live at its definition.");
DestVNI->def = DestCopyIndex.getRegSlot();
@@ -460,7 +460,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (LIS) {
if (NewSrcInstr) {
LIS->InsertMachineInstrInMaps(NewSrcInstr);
- LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr);
+ LIS->addSegmentToEndOfBlock(IncomingReg, NewSrcInstr);
}
if (!SrcUndef &&
@@ -511,8 +511,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
"Cannot find kill instruction");
SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst);
- SrcLI.removeRange(LastUseIndex.getRegSlot(),
- LIS->getMBBEndIdx(&opBlock));
+ SrcLI.removeSegment(LastUseIndex.getRegSlot(),
+ LIS->getMBBEndIdx(&opBlock));
}
}
}
diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h
index 9ac47fb..48234ae 100644
--- a/lib/CodeGen/PHIEliminationUtils.h
+++ b/lib/CodeGen/PHIEliminationUtils.h
@@ -1,4 +1,4 @@
-//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=//
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index c0861c5..f4ffd03 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -58,8 +58,6 @@ OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
static cl::opt<cl::boolOrDefault>
EnableMachineSched("enable-misched", cl::Hidden,
cl::desc("Enable the machine instruction scheduling pass."));
-static cl::opt<bool> EnableStrongPHIElim("strong-phi-elim", cl::Hidden,
- cl::desc("Use strong PHI elimination."));
static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
cl::Hidden,
cl::desc("Disable Machine LICM"));
@@ -236,7 +234,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
// Temporarily disable experimental passes.
const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
- if (!ST.enableMachineScheduler())
+ if (!ST.useMachineScheduler())
disablePass(&MachineSchedulerID);
}
@@ -675,24 +673,15 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
// preferably fix the scavenger to not depend on them).
addPass(&LiveVariablesID);
- // Add passes that move from transformed SSA into conventional SSA. This is a
- // "copy coalescing" problem.
- //
- if (!EnableStrongPHIElim) {
- // Edge splitting is smarter with machine loop info.
- addPass(&MachineLoopInfoID);
- addPass(&PHIEliminationID);
- }
+ // Edge splitting is smarter with machine loop info.
+ addPass(&MachineLoopInfoID);
+ addPass(&PHIEliminationID);
// Eventually, we want to run LiveIntervals before PHI elimination.
if (EarlyLiveIntervals)
addPass(&LiveIntervalsID);
addPass(&TwoAddressInstructionPassID);
-
- if (EnableStrongPHIElim)
- addPass(&StrongPHIEliminationID);
-
addPass(&RegisterCoalescerID);
// PreRA instruction scheduling.
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index a7439b5..28f2d2f 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -40,20 +40,30 @@
// If the branch instruction can use flag from "sub", then we can replace
// "sub" with "subs" and eliminate the "cmp" instruction.
//
-// - Optimize Bitcast pairs:
-//
-// v1 = bitcast v0
-// v2 = bitcast v1
-// = v2
-// =>
-// v1 = bitcast v0
-// = v0
-//
// - Optimize Loads:
//
// Loads that can be folded into a later instruction. A load is foldable
// if it loads to virtual registers and the virtual register defined has
// a single use.
+//
+// - Optimize Copies and Bitcast:
+//
+// Rewrite copies and bitcasts to avoid cross register bank copies
+// when possible.
+// E.g., Consider the following example, where capital and lower
+// letters denote different register file:
+// b = copy A <-- cross-bank copy
+// C = copy b <-- cross-bank copy
+// =>
+// b = copy A <-- cross-bank copy
+// C = copy A <-- same-bank copy
+//
+// E.g., for bitcast:
+// b = bitcast A <-- cross-bank copy
+// C = bitcast b <-- cross-bank copy
+// =>
+// b = bitcast A <-- cross-bank copy
+// C = copy A <-- same-bank copy
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "peephole-opt"
@@ -81,11 +91,11 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
cl::desc("Disable the peephole optimizer"));
STATISTIC(NumReuse, "Number of extension results reused");
-STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
STATISTIC(NumLoadFold, "Number of loads folded");
STATISTIC(NumSelects, "Number of selects optimized");
+STATISTIC(NumCopiesBitcasts, "Number of copies/bitcasts optimized");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
@@ -112,11 +122,11 @@ namespace {
}
private:
- bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs);
bool optimizeSelect(MachineInstr *MI);
+ bool optimizeCopyOrBitcast(MachineInstr *MI);
bool isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
@@ -298,78 +308,6 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
return Changed;
}
-/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that
-/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
-/// a value cross register classes), and the source is defined by another
-/// bitcast instruction B. And if the register class of source of B matches
-/// the register class of instruction A, then it is legal to replace all uses
-/// of the def of A with source of B. e.g.
-/// %vreg0<def> = VMOVSR %vreg1
-/// %vreg3<def> = VMOVRS %vreg0
-/// Replace all uses of vreg3 with vreg1.
-
-bool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI,
- MachineBasicBlock *MBB) {
- unsigned NumDefs = MI->getDesc().getNumDefs();
- unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
- if (NumDefs != 1)
- return false;
-
- unsigned Def = 0;
- unsigned Src = 0;
- for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!Reg)
- continue;
- if (MO.isDef())
- Def = Reg;
- else if (Src)
- // Multiple sources?
- return false;
- else
- Src = Reg;
- }
-
- assert(Def && Src && "Malformed bitcast instruction!");
-
- MachineInstr *DefMI = MRI->getVRegDef(Src);
- if (!DefMI || !DefMI->isBitcast())
- return false;
-
- unsigned SrcSrc = 0;
- NumDefs = DefMI->getDesc().getNumDefs();
- NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
- if (NumDefs != 1)
- return false;
- for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
- const MachineOperand &MO = DefMI->getOperand(i);
- if (!MO.isReg() || MO.isDef())
- continue;
- unsigned Reg = MO.getReg();
- if (!Reg)
- continue;
- if (!MO.isDef()) {
- if (SrcSrc)
- // Multiple sources?
- return false;
- else
- SrcSrc = Reg;
- }
- }
-
- if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
- return false;
-
- MRI->replaceRegWith(Def, SrcSrc);
- MRI->clearKillFlags(SrcSrc);
- MI->eraseFromParent();
- ++NumBitcasts;
- return true;
-}
-
/// optimizeCmpInstr - If the instruction is a compare and the previous
/// instruction it's comparing against all ready sets (or could be modified to
/// set) the same flag as the compare, then we can remove the comparison and use
@@ -411,6 +349,150 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
return true;
}
+/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
+/// share the same register file.
+static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) {
+ // Same register class.
+ if (DefRC == SrcRC)
+ return true;
+
+ // Both operands are sub registers. Check if they share a register class.
+ unsigned SrcIdx, DefIdx;
+ if (SrcSubReg && DefSubReg)
+ return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
+ SrcIdx, DefIdx) != NULL;
+ // At most one of the register is a sub register, make it Src to avoid
+ // duplicating the test.
+ if (!SrcSubReg) {
+ std::swap(DefSubReg, SrcSubReg);
+ std::swap(DefRC, SrcRC);
+ }
+
+ // One of the register is a sub register, check if we can get a superclass.
+ if (SrcSubReg)
+ return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != NULL;
+ // Plain copy.
+ return TRI.getCommonSubClass(DefRC, SrcRC) != NULL;
+}
+
+/// \brief Get the index of the definition and source for \p Copy
+/// instruction.
+/// \pre Copy.isCopy() or Copy.isBitcast().
+/// \return True if the Copy instruction has only one register source
+/// and one register definition. Otherwise, \p DefIdx and \p SrcIdx
+/// are invalid.
+static bool getCopyOrBitcastDefUseIdx(const MachineInstr &Copy,
+ unsigned &DefIdx, unsigned &SrcIdx) {
+ assert((Copy.isCopy() || Copy.isBitcast()) && "Wrong operation type.");
+ if (Copy.isCopy()) {
+ // Copy instruction are supposed to be: Def = Src.
+ if (Copy.getDesc().getNumOperands() != 2)
+ return false;
+ DefIdx = 0;
+ SrcIdx = 1;
+ assert(Copy.getOperand(DefIdx).isDef() && "Use comes before def!");
+ return true;
+ }
+ // Bitcast case.
+ // Bitcasts with more than one def are not supported.
+ if (Copy.getDesc().getNumDefs() != 1)
+ return false;
+ // Initialize SrcIdx to an undefined operand.
+ SrcIdx = Copy.getDesc().getNumOperands();
+ for (unsigned OpIdx = 0, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) {
+ const MachineOperand &MO = Copy.getOperand(OpIdx);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef())
+ DefIdx = OpIdx;
+ else if (SrcIdx != EndOpIdx)
+ // Multiple sources?
+ return false;
+ SrcIdx = OpIdx;
+ }
+ return true;
+}
+
+/// \brief Optimize a copy or bitcast instruction to avoid cross
+/// register bank copy. The optimization looks through a chain of
+/// copies and try to find a source that has a compatible register
+/// class.
+/// Two register classes are considered to be compatible if they share
+/// the same register bank.
+/// New copies issued by this optimization are register allocator
+/// friendly. This optimization does not remove any copy as it may
+/// overconstraint the register allocator, but replaces some when
+/// possible.
+/// \pre \p MI is a Copy (MI->isCopy() is true)
+/// \return True, when \p MI has been optimized. In that case, \p MI has
+/// been removed from its parent.
+bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
+ unsigned DefIdx, SrcIdx;
+ if (!MI || !getCopyOrBitcastDefUseIdx(*MI, DefIdx, SrcIdx))
+ return false;
+
+ const MachineOperand &MODef = MI->getOperand(DefIdx);
+ assert(MODef.isReg() && "Copies must be between registers.");
+ unsigned Def = MODef.getReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Def))
+ return false;
+
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Def);
+ unsigned DefSubReg = MODef.getSubReg();
+
+ unsigned Src;
+ unsigned SrcSubReg;
+ bool ShouldRewrite = false;
+ MachineInstr *Copy = MI;
+ const TargetRegisterInfo &TRI = *TM->getRegisterInfo();
+
+ // Follow the chain of copies until we reach the top or find a
+ // more suitable source.
+ do {
+ unsigned CopyDefIdx, CopySrcIdx;
+ if (!getCopyOrBitcastDefUseIdx(*Copy, CopyDefIdx, CopySrcIdx))
+ break;
+ const MachineOperand &MO = Copy->getOperand(CopySrcIdx);
+ assert(MO.isReg() && "Copies must be between registers.");
+ Src = MO.getReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Src))
+ break;
+
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
+ SrcSubReg = MO.getSubReg();
+
+ // If this source does not incur a cross register bank copy, use it.
+ ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC,
+ SrcSubReg);
+ // Follow the chain of copies: get the definition of Src.
+ Copy = MRI->getVRegDef(Src);
+ } while (!ShouldRewrite && Copy && (Copy->isCopy() || Copy->isBitcast()));
+
+ // If we did not find a more suitable source, there is nothing to optimize.
+ if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg())
+ return false;
+
+ // Rewrite the copy to avoid a cross register bank penalty.
+ unsigned NewVR = TargetRegisterInfo::isPhysicalRegister(Def) ? Def :
+ MRI->createVirtualRegister(DefRC);
+ MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(Src, 0, SrcSubReg);
+ NewCopy->getOperand(0).setSubReg(DefSubReg);
+
+ MRI->replaceRegWith(Def, NewVR);
+ MRI->clearKillFlags(NewVR);
+ MI->eraseFromParent();
+ ++NumCopiesBitcasts;
+ return true;
+}
+
/// isLoadFoldable - Check whether MI is a candidate for folding into a later
/// instruction. We only fold loads to virtual registers and the virtual
/// register defined has a single use.
@@ -523,7 +605,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->mayStore() || MI->isCall())
FoldAsLoadDefReg = 0;
- if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
+ if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) ||
(MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
(MI->isSelect() && optimizeSelect(MI))) {
// MI is deleted.
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 27f5676..1afc1ec 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -127,6 +127,12 @@ namespace {
/// The schedule. Null SUnit*'s represent noop instructions.
std::vector<SUnit*> Sequence;
+ /// The index in BB of RegionEnd.
+ ///
+ /// This is the instruction number from the top of the current block, not
+ /// the SlotIndex. It is only used by the AntiDepBreaker.
+ unsigned EndIndex;
+
public:
SchedulePostRATDList(
MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
@@ -141,11 +147,14 @@ namespace {
///
void startBlock(MachineBasicBlock *BB);
+ // Set the index of RegionEnd within the current BB.
+ void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; }
+
/// Initialize the scheduler state for the next scheduling region.
virtual void enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
- unsigned endcount);
+ unsigned regioninstrs);
/// Notify that the scheduler has finished scheduling the current region.
virtual void exitRegion();
@@ -197,7 +206,7 @@ SchedulePostRATDList::SchedulePostRATDList(
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
: ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA),
- LiveRegs(TRI->getNumRegs())
+ LiveRegs(TRI->getNumRegs()), EndIndex(0)
{
const TargetMachine &TM = MF.getTarget();
const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
@@ -223,8 +232,8 @@ SchedulePostRATDList::~SchedulePostRATDList() {
void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
- unsigned endcount) {
- ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+ unsigned regioninstrs) {
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
Sequence.clear();
}
@@ -312,20 +321,21 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
unsigned Count = MBB->size(), CurrentCount = Count;
for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
MachineInstr *MI = llvm::prior(I);
+ --Count;
// Calls are not scheduling boundaries before register allocation, but
// post-ra we don't gain anything by scheduling across calls since we
// don't need to worry about register pressure.
if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
- Scheduler.enterRegion(MBB, I, Current, CurrentCount);
+ Scheduler.enterRegion(MBB, I, Current, CurrentCount - Count);
+ Scheduler.setEndIndex(CurrentCount);
Scheduler.schedule();
Scheduler.exitRegion();
Scheduler.EmitSchedule();
Current = MI;
- CurrentCount = Count - 1;
+ CurrentCount = Count;
Scheduler.Observe(MI, CurrentCount);
}
I = MI;
- --Count;
if (MI->isBundle())
Count -= MI->getBundleSize();
}
@@ -333,6 +343,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
assert((MBB->begin() == Current || CurrentCount != 0) &&
"Instruction count mismatch!");
Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
+ Scheduler.setEndIndex(CurrentCount);
Scheduler.schedule();
Scheduler.exitRegion();
Scheduler.EmitSchedule();
@@ -504,11 +515,11 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
// Examine all used registers and set/clear kill flag. When a
// register is used multiple times we only set the kill flag on
- // the first use.
+ // the first use. Don't set kill flags on undef operands.
killedRegs.reset();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isUse()) continue;
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
unsigned Reg = MO.getReg();
if ((Reg == 0) || MRI.isReserved(Reg)) continue;
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index e4e18c3..0c5173a 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -78,7 +78,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
unsigned Reg = MI->getOperand(0).getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- // For virtual regiusters, mark all uses as <undef>, and convert users to
+ // For virtual registers, mark all uses as <undef>, and convert users to
// implicit-def when possible.
for (MachineRegisterInfo::use_nodbg_iterator UI =
MRI->use_nodbg_begin(Reg),
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 1965188..b0e494f 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -14,9 +14,6 @@
// This pass must be run after register allocation. After this pass is
// executed, it is illegal to construct MO_FrameIndex operands.
//
-// This pass provides an optional shrink wrapping variant of prolog/epilog
-// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp.
-//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "pei"
@@ -66,6 +63,38 @@ STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
STATISTIC(NumBytesStackSpace,
"Number of bytes used for stack in all functions");
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
+ return (MBB && !MBB->empty() && MBB->back().isReturn());
+}
+
+/// Compute the set of return blocks
+void PEI::calculateSets(MachineFunction &Fn) {
+ // Sets used to compute spill, restore placement sets.
+ const std::vector<CalleeSavedInfo> &CSI =
+ Fn.getFrameInfo()->getCalleeSavedInfo();
+
+ // If no CSRs used, we are done.
+ if (CSI.empty())
+ return;
+
+ // Save refs to entry and return blocks.
+ EntryBlock = Fn.begin();
+ for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+ MBB != E; ++MBB)
+ if (isReturnBlock(MBB))
+ ReturnBlocks.push_back(MBB);
+
+ return;
+}
+
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
///
@@ -93,16 +122,11 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
calculateCalleeSavedRegisters(Fn);
// Determine placement of CSR spill/restore code:
- // - With shrink wrapping, place spills and restores to tightly
- // enclose regions in the Machine CFG of the function where
- // they are used.
- // - Without shink wrapping (default), place all spills in the
- // entry block, all restores in return blocks.
- placeCSRSpillsAndRestores(Fn);
+ // place all spills in the entry block, all restores in return blocks.
+ calculateSets(Fn);
// Add the code to save and restore the callee saved registers
- if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Naked))
+ if (!F->hasFnAttribute(Attribute::Naked))
insertCSRSpillsAndRestores(Fn);
// Allow the target machine to make final modifications to the function
@@ -117,8 +141,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// called functions. Because of this, calculateCalleeSavedRegisters()
// must be called before this function in order to set the AdjustsStack
// and MaxCallFrameSize variables.
- if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Naked))
+ if (!F->hasFnAttribute(Attribute::Naked))
insertPrologEpilogCode(Fn);
// Replace all MO_FrameIndex operands with physical register references
@@ -143,7 +166,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
<< ") in " << Fn.getName() << ".\n";
delete RS;
- clearAllSets();
+ ReturnBlocks.clear();
return true;
}
@@ -221,8 +244,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
return;
// In Naked functions we aren't going to save any registers.
- if (F.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Naked))
+ if (F.getFunction()->hasFnAttribute(Attribute::Naked))
return;
std::vector<CalleeSavedInfo> CSI;
@@ -286,7 +308,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
}
/// insertCSRSpillsAndRestores - Insert spill and restore code for
-/// callee saved registers used in the function, handling shrink wrapping.
+/// callee saved registers used in the function.
///
void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Get callee saved register information.
@@ -304,133 +326,33 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
MachineBasicBlock::iterator I;
- if (!ShrinkWrapThisFunction) {
- // Spill using target interface.
- I = EntryBlock->begin();
- if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- // Add the callee-saved register as live-in.
- // It's killed at the spill.
- EntryBlock->addLiveIn(CSI[i].getReg());
-
- // Insert the spill to the stack frame.
- unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(*EntryBlock, I, Reg, true,
- CSI[i].getFrameIdx(), RC, TRI);
- }
- }
-
- // Restore using target interface.
- for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
- MachineBasicBlock* MBB = ReturnBlocks[ri];
- I = MBB->end(); --I;
-
- // Skip over all terminator instructions, which are part of the return
- // sequence.
- MachineBasicBlock::iterator I2 = I;
- while (I2 != MBB->begin() && (--I2)->isTerminator())
- I = I2;
-
- bool AtStart = I == MBB->begin();
- MachineBasicBlock::iterator BeforeI = I;
- if (!AtStart)
- --BeforeI;
-
- // Restore all registers immediately before the return and any
- // terminators that precede it.
- if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(*MBB, I, Reg,
- CSI[i].getFrameIdx(),
- RC, TRI);
- assert(I != MBB->begin() &&
- "loadRegFromStackSlot didn't insert any code!");
- // Insert in reverse order. loadRegFromStackSlot can insert
- // multiple instructions.
- if (AtStart)
- I = MBB->begin();
- else {
- I = BeforeI;
- ++I;
- }
- }
- }
- }
- return;
- }
-
- // Insert spills.
- std::vector<CalleeSavedInfo> blockCSI;
- for (CSRegBlockMap::iterator BI = CSRSave.begin(),
- BE = CSRSave.end(); BI != BE; ++BI) {
- MachineBasicBlock* MBB = BI->first;
- CSRegSet save = BI->second;
-
- if (save.empty())
- continue;
-
- blockCSI.clear();
- for (CSRegSet::iterator RI = save.begin(),
- RE = save.end(); RI != RE; ++RI) {
- blockCSI.push_back(CSI[*RI]);
- }
- assert(blockCSI.size() > 0 &&
- "Could not collect callee saved register info");
-
- I = MBB->begin();
-
- // When shrink wrapping, use stack slot stores/loads.
- for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+ // Spill using target interface.
+ I = EntryBlock->begin();
+ if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
// Add the callee-saved register as live-in.
// It's killed at the spill.
- MBB->addLiveIn(blockCSI[i].getReg());
+ EntryBlock->addLiveIn(CSI[i].getReg());
// Insert the spill to the stack frame.
- unsigned Reg = blockCSI[i].getReg();
+ unsigned Reg = CSI[i].getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(*MBB, I, Reg,
- true,
- blockCSI[i].getFrameIdx(),
+ TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(),
RC, TRI);
}
}
- for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
- BE = CSRRestore.end(); BI != BE; ++BI) {
- MachineBasicBlock* MBB = BI->first;
- CSRegSet restore = BI->second;
-
- if (restore.empty())
- continue;
+ // Restore using target interface.
+ for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
+ MachineBasicBlock *MBB = ReturnBlocks[ri];
+ I = MBB->end();
+ --I;
- blockCSI.clear();
- for (CSRegSet::iterator RI = restore.begin(),
- RE = restore.end(); RI != RE; ++RI) {
- blockCSI.push_back(CSI[*RI]);
- }
- assert(blockCSI.size() > 0 &&
- "Could not find callee saved register info");
-
- // If MBB is empty and needs restores, insert at the _beginning_.
- if (MBB->empty()) {
- I = MBB->begin();
- } else {
- I = MBB->end();
- --I;
-
- // Skip over all terminator instructions, which are part of the
- // return sequence.
- if (! I->isTerminator()) {
- ++I;
- } else {
- MachineBasicBlock::iterator I2 = I;
- while (I2 != MBB->begin() && (--I2)->isTerminator())
- I = I2;
- }
- }
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
+ I = I2;
bool AtStart = I == MBB->begin();
MachineBasicBlock::iterator BeforeI = I;
@@ -439,21 +361,21 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Restore all registers immediately before the return and any
// terminators that precede it.
- for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
- unsigned Reg = blockCSI[i].getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(*MBB, I, Reg,
- blockCSI[i].getFrameIdx(),
- RC, TRI);
- assert(I != MBB->begin() &&
- "loadRegFromStackSlot didn't insert any code!");
- // Insert in reverse order. loadRegFromStackSlot can insert
- // multiple instructions.
- if (AtStart)
- I = MBB->begin();
- else {
- I = BeforeI;
- ++I;
+ if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
}
}
}
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index 50f4daf..77cfa2b 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -14,9 +14,6 @@
// This pass must be run after register allocation. After this pass is
// executed, it is illegal to construct MO_FrameIndex operands.
//
-// This pass also implements a shrink wrapping variant of prolog/epilog
-// insertion.
-//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_PEI_H
@@ -54,74 +51,16 @@ namespace llvm {
// stack frame indexes.
unsigned MinCSFrameIndex, MaxCSFrameIndex;
- // Analysis info for spill/restore placement.
- // "CSR": "callee saved register".
-
- // CSRegSet contains indices into the Callee Saved Register Info
- // vector built by calculateCalleeSavedRegisters() and accessed
- // via MF.getFrameInfo()->getCalleeSavedInfo().
- typedef SparseBitVector<> CSRegSet;
-
- // CSRegBlockMap maps MachineBasicBlocks to sets of callee
- // saved register indices.
- typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
-
- // Set and maps for computing CSR spill/restore placement:
- // used in function (UsedCSRegs)
- // used in a basic block (CSRUsed)
- // anticipatable in a basic block (Antic{In,Out})
- // available in a basic block (Avail{In,Out})
- // to be spilled at the entry to a basic block (CSRSave)
- // to be restored at the end of a basic block (CSRRestore)
- CSRegSet UsedCSRegs;
- CSRegBlockMap CSRUsed;
- CSRegBlockMap AnticIn, AnticOut;
- CSRegBlockMap AvailIn, AvailOut;
- CSRegBlockMap CSRSave;
- CSRegBlockMap CSRRestore;
-
// Entry and return blocks of the current function.
MachineBasicBlock* EntryBlock;
SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
- // Map of MBBs to top level MachineLoops.
- DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
-
- // Flag to control shrink wrapping per-function:
- // may choose to skip shrink wrapping for certain
- // functions.
- bool ShrinkWrapThisFunction;
-
// Flag to control whether to use the register scavenger to resolve
// frame index materialization registers. Set according to
// TRI->requiresFrameIndexScavenging() for the curren function.
bool FrameIndexVirtualScavenging;
-#ifndef NDEBUG
- // Machine function handle.
- MachineFunction* MF;
-
- // Flag indicating that the current function
- // has at least one "short" path in the machine
- // CFG from the entry block to an exit block.
- bool HasFastExitPath;
-#endif
-
- bool calculateSets(MachineFunction &Fn);
- bool calcAnticInOut(MachineBasicBlock* MBB);
- bool calcAvailInOut(MachineBasicBlock* MBB);
- void calculateAnticAvail(MachineFunction &Fn);
- bool addUsesForMEMERegion(MachineBasicBlock* MBB,
- SmallVectorImpl<MachineBasicBlock *> &blks);
- bool addUsesForTopLevelLoops(SmallVectorImpl<MachineBasicBlock *> &blks);
- bool calcSpillPlacements(MachineBasicBlock* MBB,
- SmallVectorImpl<MachineBasicBlock *> &blks,
- CSRegBlockMap &prevSpills);
- bool calcRestorePlacements(MachineBasicBlock* MBB,
- SmallVectorImpl<MachineBasicBlock *> &blks,
- CSRegBlockMap &prevRestores);
- void placeSpillsAndRestores(MachineFunction &Fn);
- void placeCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateSets(MachineFunction &Fn);
void calculateCallsInformation(MachineFunction &Fn);
void calculateCalleeSavedRegisters(MachineFunction &Fn);
void insertCSRSpillsAndRestores(MachineFunction &Fn);
@@ -132,44 +71,8 @@ namespace llvm {
void scavengeFrameVirtualRegs(MachineFunction &Fn);
void insertPrologEpilogCode(MachineFunction &Fn);
- // Initialize DFA sets, called before iterations.
- void clearAnticAvailSets();
- // Clear all sets constructed by shrink wrapping.
- void clearAllSets();
-
- // Initialize all shrink wrapping data.
- void initShrinkWrappingInfo();
-
- // Convienences for dealing with machine loops.
- MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP);
- MachineLoop* getTopLevelLoopParent(MachineLoop *LP);
-
- // Propgate CSRs used in MBB to all MBBs of loop LP.
- void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
-
// Convenience for recognizing return blocks.
bool isReturnBlock(MachineBasicBlock* MBB);
-
-#ifndef NDEBUG
- // Debugging methods.
-
- // Mark this function as having fast exit paths.
- void findFastExitPath();
-
- // Verify placement of spills/restores.
- void verifySpillRestorePlacement();
-
- std::string getBasicBlockName(const MachineBasicBlock* MBB);
- std::string stringifyCSRegSet(const CSRegSet& s);
- void dumpSet(const CSRegSet& s);
- void dumpUsed(MachineBasicBlock* MBB);
- void dumpAllUsed();
- void dumpSets(MachineBasicBlock* MBB);
- void dumpSets1(MachineBasicBlock* MBB);
- void dumpAllSets();
- void dumpSRSets();
-#endif
-
};
} // End llvm namespace
#endif
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index df3e12a..293e306 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -50,6 +50,9 @@ bool RegAllocBase::VerifyEnabled = false;
// RegAllocBase Implementation
//===----------------------------------------------------------------------===//
+// Pin the vtable to this file.
+void RegAllocBase::anchor() {}
+
void RegAllocBase::init(VirtRegMap &vrm,
LiveIntervals &lis,
LiveRegMatrix &mat) {
@@ -99,14 +102,13 @@ void RegAllocBase::allocatePhysRegs() {
// result from splitting.
DEBUG(dbgs() << "\nselectOrSplit "
<< MRI->getRegClass(VirtReg->reg)->getName()
- << ':' << PrintReg(VirtReg->reg) << ' ' << *VirtReg << '\n');
- typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+ << ':' << *VirtReg << '\n');
+ typedef SmallVector<unsigned, 4> VirtRegVec;
VirtRegVec SplitVRegs;
unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
if (AvailablePhysReg == ~0u) {
// selectOrSplit failed to find a register!
- const char *Msg = "ran out of registers during register allocation";
// Probably caused by an inline asm.
MachineInstr *MI;
for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
@@ -114,9 +116,9 @@ void RegAllocBase::allocatePhysRegs() {
if (MI->isInlineAsm())
break;
if (MI)
- MI->emitError(Msg);
+ MI->emitError("inline assembly requires more registers than available");
else
- report_fatal_error(Msg);
+ report_fatal_error("ran out of registers during register allocation");
// Keep going after reporting the error.
VRM->assignVirt2Phys(VirtReg->reg,
RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
@@ -128,7 +130,7 @@ void RegAllocBase::allocatePhysRegs() {
for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
I != E; ++I) {
- LiveInterval *SplitVirtReg = *I;
+ LiveInterval *SplitVirtReg = &LIS->getInterval(*I);
assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index ccaabba..c17a8d9 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -38,7 +38,7 @@
#define LLVM_CODEGEN_REGALLOCBASE
#include "llvm/ADT/OwningPtr.h"
-#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
namespace llvm {
@@ -57,6 +57,7 @@ class Spiller;
/// live range splitting. They must also override enqueue/dequeue to provide an
/// assignment order.
class RegAllocBase {
+ virtual void anchor();
protected:
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
@@ -90,7 +91,7 @@ protected:
// or new set of split live virtual registers. It is up to the splitter to
// converge quickly toward fully spilled live ranges.
virtual unsigned selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
+ SmallVectorImpl<unsigned> &splitLVRs) = 0;
// Use this group name for NamedRegionTimer.
static const char TimerGroupName[];
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index d6a7d6f..6768e45 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -102,7 +102,7 @@ public:
}
virtual unsigned selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs);
+ SmallVectorImpl<unsigned> &SplitVRegs);
/// Perform register allocation.
virtual bool runOnMachineFunction(MachineFunction &mf);
@@ -111,7 +111,7 @@ public:
// that interfere with the most recently queried lvr. Return true if spilling
// was successful, and append any new spilled/split intervals to splitLVRs.
bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs);
+ SmallVectorImpl<unsigned> &SplitVRegs);
static char ID;
};
@@ -126,7 +126,6 @@ RABasic::RABasic(): MachineFunctionPass(ID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
- initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
@@ -143,7 +142,6 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveDebugVariables>();
AU.addPreserved<LiveDebugVariables>();
- AU.addRequired<CalculateSpillWeights>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
AU.addRequired<MachineBlockFrequencyInfo>();
@@ -168,7 +166,7 @@ void RABasic::releaseMemory() {
// that interfere with VirtReg. The newly spilled or split live intervals are
// returned by appending them to SplitVRegs.
bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ SmallVectorImpl<unsigned> &SplitVRegs) {
// Record each interference and determine if all are spillable before mutating
// either the union or live intervals.
SmallVector<LiveInterval*, 8> Intfs;
@@ -222,7 +220,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
// minimal, there is no value in caching them outside the scope of
// selectOrSplit().
unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ SmallVectorImpl<unsigned> &SplitVRegs) {
// Populate a list of physical register spill candidates.
SmallVector<unsigned, 8> PhysRegSpillCands;
@@ -279,6 +277,11 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
RegAllocBase::init(getAnalysis<VirtRegMap>(),
getAnalysis<LiveIntervals>(),
getAnalysis<LiveRegMatrix>());
+
+ calculateSpillWeightsAndHints(*LIS, *MF,
+ getAnalysis<MachineLoopInfo>(),
+ getAnalysis<MachineBlockFrequencyInfo>());
+
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
allocatePhysRegs();
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 6617e50..e92dbd2 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -144,7 +144,7 @@ namespace {
// not be erased.
bool isBulkSpilling;
- enum {
+ enum LLVM_ENUM_INT_TYPE(unsigned) {
spillClean = 1,
spillDirty = 100,
spillImpossible = ~0u
@@ -298,7 +298,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
MachineInstr *DBG = LRIDbgValues[li];
const MDNode *MDPtr = DBG->getOperand(2).getMetadata();
- bool IsIndirect = DBG->getOperand(1).isImm(); // Register-indirect value?
+ bool IsIndirect = DBG->isIndirectDebugValue();
uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0;
DebugLoc DL;
if (MI == MBB->end()) {
@@ -569,7 +569,10 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
}
// Nothing we can do. Report an error and keep going with a bad allocation.
- MI->emitError("ran out of registers during register allocation");
+ if (MI->isInlineAsm())
+ MI->emitError("inline assembly requires more registers than available");
+ else
+ MI->emitError("ran out of registers during register allocation");
definePhysReg(MI, *AO.begin(), regFree);
return assignVirtToPhysReg(VirtReg, *AO.begin());
}
@@ -856,7 +859,7 @@ void RAFast::AllocateBasicBlock() {
}
else {
// Modify DBG_VALUE now that the value is in a spill slot.
- bool IsIndirect = MI->getOperand(1).isImm();
+ bool IsIndirect = MI->isIndirectDebugValue();
uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
const MDNode *MDPtr =
MI->getOperand(MI->getNumOperands()-1).getMetadata();
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index f9e363b..c08d955 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -120,7 +120,9 @@ class RAGreedy : public MachineFunctionPass,
RS_Done
};
+#ifndef NDEBUG
static const char *const StageName[];
+#endif
// RegInfo - Keep additional information about each live range.
struct RegInfo {
@@ -147,7 +149,7 @@ class RAGreedy : public MachineFunctionPass,
void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
ExtraRegInfo.resize(MRI->getNumVirtRegs());
for (;Begin != End; ++Begin) {
- unsigned Reg = (*Begin)->reg;
+ unsigned Reg = *Begin;
if (ExtraRegInfo[Reg].Stage == RS_New)
ExtraRegInfo[Reg].Stage = NewStage;
}
@@ -220,7 +222,7 @@ class RAGreedy : public MachineFunctionPass,
/// class.
SmallVector<GlobalSplitCandidate, 32> GlobalCand;
- enum { NoCand = ~0u };
+ enum LLVM_ENUM_INT_TYPE(unsigned) { NoCand = ~0u };
/// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
/// NoCand which indicates the stack interval.
@@ -241,7 +243,7 @@ public:
virtual void enqueue(LiveInterval *LI);
virtual LiveInterval *dequeue();
virtual unsigned selectOrSplit(LiveInterval&,
- SmallVectorImpl<LiveInterval*>&);
+ SmallVectorImpl<unsigned>&);
/// Perform register allocation.
virtual bool runOnMachineFunction(MachineFunction &mf);
@@ -265,22 +267,22 @@ private:
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
void evictInterference(LiveInterval&, unsigned,
- SmallVectorImpl<LiveInterval*>&);
+ SmallVectorImpl<unsigned>&);
unsigned tryAssign(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<LiveInterval*>&);
+ SmallVectorImpl<unsigned>&);
unsigned tryEvict(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<LiveInterval*>&, unsigned = ~0u);
+ SmallVectorImpl<unsigned>&, unsigned = ~0u);
unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<LiveInterval*>&);
+ SmallVectorImpl<unsigned>&);
unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<LiveInterval*>&);
+ SmallVectorImpl<unsigned>&);
unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<LiveInterval*>&);
+ SmallVectorImpl<unsigned>&);
unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<LiveInterval*>&);
+ SmallVectorImpl<unsigned>&);
unsigned trySplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<LiveInterval*>&);
+ SmallVectorImpl<unsigned>&);
};
} // end anonymous namespace
@@ -313,7 +315,6 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
- initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
@@ -337,7 +338,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveDebugVariables>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
- AU.addRequired<CalculateSpillWeights>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
@@ -455,7 +455,7 @@ LiveInterval *RAGreedy::dequeue() {
/// tryAssign - Try to assign VirtReg to an available register.
unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
AllocationOrder &Order,
- SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ SmallVectorImpl<unsigned> &NewVRegs) {
Order.rewind();
unsigned PhysReg;
while ((PhysReg = Order.next()))
@@ -638,7 +638,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
/// from being assigned to Physreg. This assumes that canEvictInterference
/// returned true.
void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ SmallVectorImpl<unsigned> &NewVRegs) {
// Make sure that VirtReg has a cascade number, and assign that cascade
// number to every evicted register. These live ranges than then only be
// evicted by a newer cascade, preventing infinite loops.
@@ -670,7 +670,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
"Cannot decrease cascade number, illegal eviction");
ExtraRegInfo[Intf->reg].Cascade = Cascade;
++NumEvicted;
- NewVRegs.push_back(Intf);
+ NewVRegs.push_back(Intf->reg);
}
}
@@ -680,7 +680,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
/// @return Physreg to assign VirtReg, or 0.
unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
AllocationOrder &Order,
- SmallVectorImpl<LiveInterval*> &NewVRegs,
+ SmallVectorImpl<unsigned> &NewVRegs,
unsigned CostPerUseLimit) {
NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
@@ -1125,7 +1125,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
- DebugVars->splitRegister(Reg, LREdit.regs());
+ DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
ExtraRegInfo.resize(MRI->getNumVirtRegs());
unsigned OrigBlocks = SA->getNumLiveBlocks();
@@ -1136,7 +1136,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// - Block-local splits are candidates for local splitting.
// - DCE leftovers should go back on the queue.
for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
- LiveInterval &Reg = *LREdit.get(i);
+ LiveInterval &Reg = LIS->getInterval(LREdit.get(i));
// Ignore old intervals from DCE.
if (getStage(Reg) != RS_New)
@@ -1170,7 +1170,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
}
unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ SmallVectorImpl<unsigned> &NewVRegs) {
unsigned NumCands = 0;
unsigned BestCand = NoCand;
BlockFrequency BestCost;
@@ -1305,7 +1305,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// creates a lot of local live ranges, that will be split by tryLocalSplit if
/// they don't allocate.
unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ SmallVectorImpl<unsigned> &NewVRegs) {
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
unsigned Reg = VirtReg.reg;
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
@@ -1326,14 +1326,14 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SE->finish(&IntvMap);
// Tell LiveDebugVariables about the new ranges.
- DebugVars->splitRegister(Reg, LREdit.regs());
+ DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
ExtraRegInfo.resize(MRI->getNumVirtRegs());
// Sort out the new intervals created by splitting. The remainder interval
// goes straight to spilling, the new local ranges get to stay RS_New.
for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
- LiveInterval &LI = *LREdit.get(i);
+ LiveInterval &LI = LIS->getInterval(LREdit.get(i));
if (getStage(LI) == RS_New && IntvMap[i] == 0)
setStage(LI, RS_Spill);
}
@@ -1357,7 +1357,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// This is similar to spilling to a larger register class.
unsigned
RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ SmallVectorImpl<unsigned> &NewVRegs) {
// There is no point to this if there are no larger sub-classes.
if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg)))
return 0;
@@ -1393,7 +1393,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
- DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
ExtraRegInfo.resize(MRI->getNumVirtRegs());
// Assign all new registers to RS_Spill. This was the last chance.
@@ -1464,9 +1464,9 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
// Add fixed interference.
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- const LiveInterval &LI = LIS->getRegUnit(*Units);
- LiveInterval::const_iterator I = LI.find(StartIdx);
- LiveInterval::const_iterator E = LI.end();
+ const LiveRange &LR = LIS->getRegUnit(*Units);
+ LiveRange::const_iterator I = LR.find(StartIdx);
+ LiveRange::const_iterator E = LR.end();
// Same loop as above. Mark any overlapped gaps as HUGE_VALF.
for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
@@ -1477,7 +1477,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
break;
for (; Gap != NumGaps; ++Gap) {
- GapWeight[Gap] = HUGE_VALF;
+ GapWeight[Gap] = llvm::huge_valf;
if (Uses[Gap+1].getBaseIndex() >= I->end)
break;
}
@@ -1491,7 +1491,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
/// basic block.
///
unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ SmallVectorImpl<unsigned> &NewVRegs) {
assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
@@ -1583,7 +1583,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Remove any gaps with regmask clobbers.
if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
- GapWeight[RegMaskGaps[i]] = HUGE_VALF;
+ GapWeight[RegMaskGaps[i]] = llvm::huge_valf;
// Try to find the best sequence of gaps to close.
// The new spill weight must be larger than any gap interference.
@@ -1618,7 +1618,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Legally, without causing looping?
bool Legal = !ProgressRequired || NewGaps < NumGaps;
- if (Legal && MaxGap < HUGE_VALF) {
+ if (Legal && MaxGap < llvm::huge_valf) {
// Estimate the new spill weight. Each instruction reads or writes the
// register. Conservatively assume there are no read-modify-write
// instructions.
@@ -1685,7 +1685,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SE->useIntv(SegStart, SegStop);
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
- DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
// If the new range has the same number of instructions as before, mark it as
// RS_Split2 so the next split will be forced to make progress. Otherwise,
@@ -1698,8 +1698,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
assert(!ProgressRequired && "Didn't make progress when it was required.");
for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
if (IntvMap[i] == 1) {
- setStage(*LREdit.get(i), RS_Split2);
- DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg));
+ setStage(LIS->getInterval(LREdit.get(i)), RS_Split2);
+ DEBUG(dbgs() << PrintReg(LREdit.get(i)));
}
DEBUG(dbgs() << '\n');
}
@@ -1716,7 +1716,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// assignable.
/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<LiveInterval*>&NewVRegs) {
+ SmallVectorImpl<unsigned>&NewVRegs) {
// Ranges must be Split2 or less.
if (getStage(VirtReg) >= RS_Spill)
return 0;
@@ -1765,7 +1765,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
//===----------------------------------------------------------------------===//
unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ SmallVectorImpl<unsigned> &NewVRegs) {
// First try assigning a free register.
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
@@ -1790,7 +1790,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
if (Stage < RS_Split) {
setStage(VirtReg, RS_Split);
DEBUG(dbgs() << "wait for second round\n");
- NewVRegs.push_back(&VirtReg);
+ NewVRegs.push_back(VirtReg.reg);
return 0;
}
@@ -1838,6 +1838,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
SpillPlacer = &getAnalysis<SpillPlacement>();
DebugVars = &getAnalysis<LiveDebugVariables>();
+ calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI);
+
DEBUG(LIS->dump());
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 81ecca1..88c8201 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -95,7 +95,6 @@ public:
: MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
- initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
}
@@ -158,13 +157,13 @@ char RegAllocPBQP::ID = 0;
} // End anonymous namespace.
-unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const {
+unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::NodeId node) const {
Node2VReg::const_iterator vregItr = node2VReg.find(node);
assert(vregItr != node2VReg.end() && "No vreg for node.");
return vregItr->second;
}
-PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
+PBQP::Graph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
assert(nodeItr != vreg2Node.end() && "No node for vreg.");
return nodeItr->second;
@@ -247,7 +246,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis,
}
// Construct the node.
- PBQP::Graph::NodeItr node =
+ PBQP::Graph::NodeId node =
g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
// Record the mapping and allowed set in the problem.
@@ -273,7 +272,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis,
assert(!l2.empty() && "Empty interval in vreg set?");
if (l1.overlaps(l2)) {
- PBQP::Graph::EdgeItr edge =
+ PBQP::Graph::EdgeId edge =
g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0));
@@ -364,16 +363,16 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf,
}
if (pregOpt < allowed.size()) {
++pregOpt; // +1 to account for spill option.
- PBQP::Graph::NodeItr node = p->getNodeForVReg(src);
+ PBQP::Graph::NodeId node = p->getNodeForVReg(src);
addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit);
}
} else {
const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
- PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst);
- PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src);
- PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2);
- if (edge == g.edgesEnd()) {
+ PBQP::Graph::NodeId node1 = p->getNodeForVReg(dst);
+ PBQP::Graph::NodeId node2 = p->getNodeForVReg(src);
+ PBQP::Graph::EdgeId edge = g.findEdge(node1, node2);
+ if (edge == g.invalidEdgeId()) {
edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1,
allowed2->size() + 1,
0));
@@ -432,7 +431,6 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
//au.addRequiredID(SplitCriticalEdgesID);
if (customPassID)
au.addRequiredID(*customPassID);
- au.addRequired<CalculateSpillWeights>();
au.addRequired<LiveStacks>();
au.addPreserved<LiveStacks>();
au.addRequired<MachineBlockFrequencyInfo>();
@@ -477,11 +475,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
const PBQP::Graph &g = problem.getGraph();
// Iterate over the nodes mapping the PBQP solution to a register
// assignment.
- for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(),
- nodeEnd = g.nodesEnd();
- node != nodeEnd; ++node) {
- unsigned vreg = problem.getVRegForNode(node);
- unsigned alloc = solution.getSelection(node);
+ for (PBQP::Graph::NodeItr nodeItr = g.nodesBegin(),
+ nodeEnd = g.nodesEnd();
+ nodeItr != nodeEnd; ++nodeItr) {
+ unsigned vreg = problem.getVRegForNode(*nodeItr);
+ unsigned alloc = solution.getSelection(*nodeItr);
if (problem.isPRegOption(vreg, alloc)) {
unsigned preg = problem.getPRegForOption(vreg, alloc);
@@ -491,7 +489,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
vrm->assignVirt2Phys(vreg, preg);
} else if (problem.isSpillOption(vreg, alloc)) {
vregsToAlloc.erase(vreg);
- SmallVector<LiveInterval*, 8> newSpills;
+ SmallVector<unsigned, 8> newSpills;
LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
spiller->spill(LRE);
@@ -502,9 +500,10 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
// allocate.
for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end();
itr != end; ++itr) {
- assert(!(*itr)->empty() && "Empty spill range.");
- DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " ");
- vregsToAlloc.insert((*itr)->reg);
+ LiveInterval &li = lis->getInterval(*itr);
+ assert(!li.empty() && "Empty spill range.");
+ DEBUG(dbgs() << PrintReg(li.reg, tri) << " ");
+ vregsToAlloc.insert(li.reg);
}
DEBUG(dbgs() << ")\n");
@@ -550,6 +549,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
lss = &getAnalysis<LiveStacks>();
mbfi = &getAnalysis<MachineBlockFrequencyInfo>();
+ calculateSpillWeightsAndHints(*lis, MF, getAnalysis<MachineLoopInfo>(),
+ *mbfi);
+
vrm = &getAnalysis<VirtRegMap>();
spiller.reset(createInlineSpiller(*this, MF, *vrm));
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index f99f1a3..dd86c1f 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -398,7 +398,7 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
}
void RegisterCoalescer::eliminateDeadDefs() {
- SmallVector<LiveInterval*, 8> NewRegs;
+ SmallVector<unsigned, 8> NewRegs;
LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs);
}
@@ -434,11 +434,11 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
- // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // BValNo is a value number in B that is defined by a copy from A. 'B1' in
// the example above.
- LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
- if (BLR == IntB.end()) return false;
- VNInfo *BValNo = BLR->valno;
+ LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
+ if (BS == IntB.end()) return false;
+ VNInfo *BValNo = BS->valno;
// Get the location that B is defined at. Two options: either this value has
// an unknown definition point or it is defined at CopyIdx. If unknown, we
@@ -447,10 +447,10 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
// AValNo is the value number in A that defines the copy, A3 in the example.
SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
- LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
- // The live range might not exist after fun with physreg coalescing.
- if (ALR == IntA.end()) return false;
- VNInfo *AValNo = ALR->valno;
+ LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
+ // The live segment might not exist after fun with physreg coalescing.
+ if (AS == IntA.end()) return false;
+ VNInfo *AValNo = AS->valno;
// If AValNo is defined as a copy from IntB, we can potentially process this.
// Get the instruction that defines this value number.
@@ -459,54 +459,54 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
return false;
- // Get the LiveRange in IntB that this value number starts with.
- LiveInterval::iterator ValLR =
- IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
- if (ValLR == IntB.end())
+ // Get the Segment in IntB that this value number starts with.
+ LiveInterval::iterator ValS =
+ IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
+ if (ValS == IntB.end())
return false;
- // Make sure that the end of the live range is inside the same block as
+ // Make sure that the end of the live segment is inside the same block as
// CopyMI.
- MachineInstr *ValLREndInst =
- LIS->getInstructionFromIndex(ValLR->end.getPrevSlot());
- if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
+ MachineInstr *ValSEndInst =
+ LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
+ if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
return false;
- // Okay, we now know that ValLR ends in the same block that the CopyMI
- // live-range starts. If there are no intervening live ranges between them in
- // IntB, we can merge them.
- if (ValLR+1 != BLR) return false;
+ // Okay, we now know that ValS ends in the same block that the CopyMI
+ // live-range starts. If there are no intervening live segments between them
+ // in IntB, we can merge them.
+ if (ValS+1 != BS) return false;
DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI));
- SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
+ SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
// We are about to delete CopyMI, so need to remove it as the 'instruction
// that defines this value #'. Update the valnum with the new defining
// instruction #.
BValNo->def = FillerStart;
// Okay, we can merge them. We need to insert a new liverange:
- // [ValLR.end, BLR.begin) of either value number, then we merge the
+ // [ValS.end, BS.begin) of either value number, then we merge the
// two value numbers.
- IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+ IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
// Okay, merge "B1" into the same value number as "B0".
- if (BValNo != ValLR->valno)
- IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+ if (BValNo != ValS->valno)
+ IntB.MergeValueNumberInto(BValNo, ValS->valno);
DEBUG(dbgs() << " result = " << IntB << '\n');
// If the source instruction was killing the source register before the
// merge, unset the isKill marker given the live range has been extended.
- int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+ int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true);
if (UIdx != -1) {
- ValLREndInst->getOperand(UIdx).setIsKill(false);
+ ValSEndInst->getOperand(UIdx).setIsKill(false);
}
// Rewrite the copy. If the copy instruction was killing the destination
// register before the merge, find the last use and trim the live range. That
// will also add the isKill marker.
CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
- if (ALR->end == CopyIdx)
+ if (AS->end == CopyIdx)
LIS->shrinkToUses(&IntA);
++numExtends;
@@ -527,11 +527,11 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
- LiveInterval::Ranges::iterator BI =
- std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
- if (BI != IntB.ranges.begin())
+ LiveInterval::iterator BI =
+ std::upper_bound(IntB.begin(), IntB.end(), AI->start);
+ if (BI != IntB.begin())
--BI;
- for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
+ for (; BI != IntB.end() && AI->end >= BI->start; ++BI) {
if (BI->valno == BValNo)
continue;
if (BI->start <= AI->start && BI->end > AI->start)
@@ -577,14 +577,12 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
LiveInterval &IntB =
LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
- // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+ // BValNo is a value number in B that is defined by a copy from A. 'B1' in
// the example above.
VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
if (!BValNo || BValNo->def != CopyIdx)
return false;
- assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
-
// AValNo is the value number in A that defines the copy, A3 in the example.
VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
assert(AValNo && "COPY source not live");
@@ -614,7 +612,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
unsigned NewReg = NewDstMO.getReg();
- if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill())
+ if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
return false;
// Make sure there are no other definitions of IntB that would reach the
@@ -629,8 +627,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
MachineInstr *UseMI = &*UI;
SlotIndex UseIdx = LIS->getInstructionIndex(UseMI);
- LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
- if (ULR == IntA.end() || ULR->valno != AValNo)
+ LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
+ if (US == IntA.end() || US->valno != AValNo)
continue;
// If this use is tied to a def, we can't rewrite the register.
if (UseMI->isRegTiedToDefOperand(UI.getOperandNo()))
@@ -681,8 +679,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
continue;
}
SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true);
- LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
- if (ULR == IntA.end() || ULR->valno != AValNo)
+ LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
+ if (US == IntA.end() || US->valno != AValNo)
continue;
// Kill flags are no longer accurate. They are recomputed after RA.
UseMO.setIsKill(false);
@@ -712,14 +710,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
UseMI->eraseFromParent();
}
- // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
+ // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
// is updated.
VNInfo *ValNo = BValNo;
ValNo->def = AValNo->def;
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
- IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
+ IntB.addSegment(LiveInterval::Segment(AI->start, AI->end, ValNo));
}
DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
@@ -744,7 +742,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
LiveInterval &SrcInt = LIS->getInterval(SrcReg);
SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI);
- VNInfo *ValNo = LiveRangeQuery(SrcInt, CopyIdx).valueIn();
+ VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
assert(ValNo && "CopyMI input register not live");
if (ValNo->isPHIDef() || ValNo->isUnused())
return false;
@@ -876,8 +874,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
unsigned Reg = NewMIImplDefs[i];
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
- if (LiveInterval *LI = LIS->getCachedRegUnit(*Units))
- LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
+ if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
+ LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
DEBUG(dbgs() << "Remat: " << *NewMI);
@@ -1048,7 +1046,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
if (CP.getSrcReg() == CP.getDstReg()) {
LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
- LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI));
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(CopyMI));
if (VNInfo *DefVNI = LRQ.valueDefined()) {
VNInfo *ReadVNI = LRQ.valueIn();
assert(ReadVNI && "No value before copy and no <undef> flag.");
@@ -1091,8 +1089,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
});
// When possible, let DstReg be the larger interval.
- if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() >
- LIS->getInterval(CP.getDstReg()).ranges.size())
+ if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
+ LIS->getInterval(CP.getDstReg()).size())
CP.flip();
}
@@ -1109,7 +1107,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
return true;
- // If we can eliminate the copy without merging the live ranges, do so now.
+ // If we can eliminate the copy without merging the live segments, do so
+ // now.
if (!CP.isPartial() && !CP.isPhys()) {
if (adjustCopiesBackFrom(CP, CopyMI) ||
removeCopyByCommutingDef(CP, CopyMI)) {
@@ -1157,10 +1156,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
DEBUG({
- dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI);
- if (!CP.isPhys())
+ dbgs() << "\tJoined. Result = ";
+ if (CP.isPhys())
+ dbgs() << PrintReg(CP.getDstReg(), TRI);
+ else
dbgs() << LIS->getInterval(CP.getDstReg());
- dbgs() << '\n';
+ dbgs() << '\n';
});
++numJoins;
@@ -1172,8 +1173,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
assert(CP.isPhys() && "Must be a physreg copy");
assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register");
LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
- DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
- << '\n');
+ DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
assert(CP.isFlipped() && RHS.containsOneValue() &&
"Invalid join with reserved register");
@@ -1442,7 +1442,7 @@ VNInfo *JoinVals::stripCopies(VNInfo *VNI) {
unsigned Reg = MI->getOperand(1).getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg))
break;
- LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def);
+ LiveQueryResult LRQ = LIS->getInterval(Reg).Query(VNI->def);
if (!LRQ.valueIn())
break;
VNI = LRQ.valueIn();
@@ -1493,7 +1493,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// The <read-undef> flag on the def operand means that old lane values are
// not important.
if (Redef) {
- V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn();
+ V.RedefVNI = LI.Query(VNI->def).valueIn();
assert(V.RedefVNI && "Instruction is reading nonexistent value");
computeAssignment(V.RedefVNI->id, Other);
V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
@@ -1510,7 +1510,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
}
// Find the value in Other that overlaps VNI->def, if any.
- LiveRangeQuery OtherLRQ(Other.LI, VNI->def);
+ LiveQueryResult OtherLRQ = Other.LI.Query(VNI->def);
// It is possible that both values are defined by the same instruction, or
// the values are PHIs defined in the same block. When that happens, the two
@@ -1969,8 +1969,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI);
JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI);
- DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
- << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS
+ DEBUG(dbgs() << "\t\tRHS = " << RHS
+ << "\n\t\tLHS = " << LHS
<< '\n');
// First compute NewVNInfo and the simple value mappings.
@@ -2001,8 +2001,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
// Join RHS into LHS.
- LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo,
- MRI);
+ LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
// Kill flags are going to be wrong if the live ranges were overlapping.
// Eventually, we should simply clear all kill flags when computing live
@@ -2017,7 +2016,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// CR_Replace conflicts.
DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
<< " points: " << LHS << '\n');
- LIS->extendToIndices(&LHS, EndPoints);
+ LIS->extendToIndices(LHS, EndPoints);
return true;
}
@@ -2043,9 +2042,8 @@ struct MBBPriorityInfo {
// block (the unsigned), and then on the MBB number.
//
// EnableGlobalCopies assumes that the primary sort key is loop depth.
-static int compareMBBPriority(const void *L, const void *R) {
- const MBBPriorityInfo *LHS = static_cast<const MBBPriorityInfo*>(L);
- const MBBPriorityInfo *RHS = static_cast<const MBBPriorityInfo*>(R);
+static int compareMBBPriority(const MBBPriorityInfo *LHS,
+ const MBBPriorityInfo *RHS) {
// Deeper loops first
if (LHS->Depth != RHS->Depth)
return LHS->Depth > RHS->Depth ? -1 : 1;
@@ -2203,7 +2201,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
if (EnableGlobalCopies == cl::BOU_UNSET)
- JoinGlobalCopies = ST.enableMachineScheduler();
+ JoinGlobalCopies = ST.useMachineScheduler();
else
JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index b7ab138..092ecdd 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -25,53 +25,19 @@ using namespace llvm;
/// Increase pressure for each pressure set provided by TargetRegisterInfo.
static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
- std::vector<unsigned> &MaxSetPressure,
- const int *PSet, unsigned Weight) {
- for (; *PSet != -1; ++PSet) {
- CurrSetPressure[*PSet] += Weight;
- if (&CurrSetPressure != &MaxSetPressure
- && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) {
- MaxSetPressure[*PSet] = CurrSetPressure[*PSet];
- }
- }
+ PSetIterator PSetI) {
+ unsigned Weight = PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI)
+ CurrSetPressure[*PSetI] += Weight;
}
/// Decrease pressure for each pressure set provided by TargetRegisterInfo.
static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
- const int *PSet, unsigned Weight) {
- for (; *PSet != -1; ++PSet) {
- assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow");
- CurrSetPressure[*PSet] -= Weight;
- }
-}
-
-/// Directly increase pressure only within this RegisterPressure result.
-void RegisterPressure::increase(unsigned Reg, const TargetRegisterInfo *TRI,
- const MachineRegisterInfo *MRI) {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- increaseSetPressure(MaxSetPressure, MaxSetPressure,
- TRI->getRegClassPressureSets(RC),
- TRI->getRegClassWeight(RC).RegWeight);
- }
- else {
- increaseSetPressure(MaxSetPressure, MaxSetPressure,
- TRI->getRegUnitPressureSets(Reg),
- TRI->getRegUnitWeight(Reg));
- }
-}
-
-/// Directly decrease pressure only within this RegisterPressure result.
-void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI,
- const MachineRegisterInfo *MRI) {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- decreaseSetPressure(MaxSetPressure, TRI->getRegClassPressureSets(RC),
- TRI->getRegClassWeight(RC).RegWeight);
- }
- else {
- decreaseSetPressure(MaxSetPressure, TRI->getRegUnitPressureSets(Reg),
- TRI->getRegUnitWeight(Reg));
+ PSetIterator PSetI) {
+ unsigned Weight = PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI) {
+ assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow");
+ CurrSetPressure[*PSetI] -= Weight;
}
}
@@ -113,36 +79,23 @@ void RegPressureTracker::dump() const {
/// Increase the current pressure as impacted by these registers and bump
/// the high water mark if needed.
-void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> Regs) {
- for (unsigned I = 0, E = Regs.size(); I != E; ++I) {
- if (TargetRegisterInfo::isVirtualRegister(Regs[I])) {
- const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]);
- increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
- TRI->getRegClassPressureSets(RC),
- TRI->getRegClassWeight(RC).RegWeight);
- }
- else {
- increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
- TRI->getRegUnitPressureSets(Regs[I]),
- TRI->getRegUnitWeight(Regs[I]));
+void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ PSetIterator PSetI = MRI->getPressureSets(RegUnits[i]);
+ unsigned Weight = PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI) {
+ CurrSetPressure[*PSetI] += Weight;
+ if (CurrSetPressure[*PSetI] > P.MaxSetPressure[*PSetI]) {
+ P.MaxSetPressure[*PSetI] = CurrSetPressure[*PSetI];
+ }
}
}
}
/// Simply decrease the current pressure as impacted by these registers.
-void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> Regs) {
- for (unsigned I = 0, E = Regs.size(); I != E; ++I) {
- if (TargetRegisterInfo::isVirtualRegister(Regs[I])) {
- const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]);
- decreaseSetPressure(CurrSetPressure,
- TRI->getRegClassPressureSets(RC),
- TRI->getRegClassWeight(RC).RegWeight);
- }
- else {
- decreaseSetPressure(CurrSetPressure, TRI->getRegUnitPressureSets(Regs[I]),
- TRI->getRegUnitWeight(Regs[I]));
- }
- }
+void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> RegUnits) {
+ for (unsigned I = 0, E = RegUnits.size(); I != E; ++I)
+ decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnits[I]));
}
/// Clear the result so it can be used for another round of pressure tracking.
@@ -194,12 +147,30 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
LiveInRegs.clear();
}
-const LiveInterval *RegPressureTracker::getInterval(unsigned Reg) const {
+const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const {
if (TargetRegisterInfo::isVirtualRegister(Reg))
return &LIS->getInterval(Reg);
return LIS->getCachedRegUnit(Reg);
}
+void RegPressureTracker::reset() {
+ MBB = 0;
+ LIS = 0;
+
+ CurrSetPressure.clear();
+ LiveThruPressure.clear();
+ P.MaxSetPressure.clear();
+
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).reset();
+ else
+ static_cast<RegionPressure&>(P).reset();
+
+ LiveRegs.PhysRegs.clear();
+ LiveRegs.VirtRegs.clear();
+ UntiedDefs.clear();
+}
+
/// Setup the RegPressureTracker.
///
/// TODO: Add support for pressure without LiveIntervals.
@@ -210,6 +181,8 @@ void RegPressureTracker::init(const MachineFunction *mf,
MachineBasicBlock::const_iterator pos,
bool ShouldTrackUntiedDefs)
{
+ reset();
+
MF = mf;
TRI = MF->getTarget().getRegisterInfo();
RCI = rci;
@@ -224,19 +197,11 @@ void RegPressureTracker::init(const MachineFunction *mf,
CurrPos = pos;
CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0);
- LiveThruPressure.clear();
- if (RequireIntervals)
- static_cast<IntervalPressure&>(P).reset();
- else
- static_cast<RegionPressure&>(P).reset();
P.MaxSetPressure = CurrSetPressure;
- LiveRegs.PhysRegs.clear();
LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs());
- LiveRegs.VirtRegs.clear();
LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs());
- UntiedDefs.clear();
if (TrackUntiedDefs)
UntiedDefs.setUniverse(MRI->getNumVirtRegs());
}
@@ -328,24 +293,25 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
unsigned Reg = P.LiveOutRegs[i];
if (TargetRegisterInfo::isVirtualRegister(Reg)
&& !RPTracker.hasUntiedDef(Reg)) {
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- increaseSetPressure(LiveThruPressure, LiveThruPressure,
- TRI->getRegClassPressureSets(RC),
- TRI->getRegClassWeight(RC).RegWeight);
+ increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg));
}
}
}
/// \brief Convenient wrapper for checking membership in RegisterOperands.
-static bool containsReg(ArrayRef<unsigned> Regs, unsigned Reg) {
- return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end();
+/// (std::count() doesn't have an early exit).
+static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) {
+ return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end();
}
/// Collect this instruction's unique uses and defs into SmallVectors for
/// processing defs and uses in order.
+///
+/// FIXME: always ignore tied opers
class RegisterOperands {
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
+ bool IgnoreDead;
public:
SmallVector<unsigned, 8> Uses;
@@ -353,7 +319,8 @@ public:
SmallVector<unsigned, 8> DeadDefs;
RegisterOperands(const TargetRegisterInfo *tri,
- const MachineRegisterInfo *mri): TRI(tri), MRI(mri) {}
+ const MachineRegisterInfo *mri, bool ID = false):
+ TRI(tri), MRI(mri), IgnoreDead(ID) {}
/// Push this operand's register onto the correct vector.
void collect(const MachineOperand &MO) {
@@ -362,25 +329,27 @@ public:
if (MO.readsReg())
pushRegUnits(MO.getReg(), Uses);
if (MO.isDef()) {
- if (MO.isDead())
- pushRegUnits(MO.getReg(), DeadDefs);
+ if (MO.isDead()) {
+ if (!IgnoreDead)
+ pushRegUnits(MO.getReg(), DeadDefs);
+ }
else
pushRegUnits(MO.getReg(), Defs);
}
}
protected:
- void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &Regs) {
+ void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) {
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- if (containsReg(Regs, Reg))
+ if (containsReg(RegUnits, Reg))
return;
- Regs.push_back(Reg);
+ RegUnits.push_back(Reg);
}
else if (MRI->isAllocatable(Reg)) {
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
- if (containsReg(Regs, *Units))
+ if (containsReg(RegUnits, *Units))
continue;
- Regs.push_back(*Units);
+ RegUnits.push_back(*Units);
}
}
}
@@ -399,6 +368,56 @@ static void collectOperands(const MachineInstr *MI,
RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end());
}
+/// Initialize an array of N PressureDiffs.
+void PressureDiffs::init(unsigned N) {
+ Size = N;
+ if (N <= Max) {
+ memset(PDiffArray, 0, N * sizeof(PressureDiff));
+ return;
+ }
+ Max = Size;
+ free(PDiffArray);
+ PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff)));
+}
+
+/// Add a change in pressure to the pressure diff of a given instruction.
+void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
+ const MachineRegisterInfo *MRI) {
+ PSetIterator PSetI = MRI->getPressureSets(RegUnit);
+ int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI) {
+ // Find an existing entry in the pressure diff for this PSet.
+ PressureDiff::iterator I = begin(), E = end();
+ for (; I != E && I->isValid(); ++I) {
+ if (I->getPSet() >= *PSetI)
+ break;
+ }
+ // If all pressure sets are more constrained, skip the remaining PSets.
+ if (I == E)
+ break;
+ // Insert this PressureChange.
+ if (!I->isValid() || I->getPSet() != *PSetI) {
+ PressureChange PTmp = PressureChange(*PSetI);
+ for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J)
+ std::swap(*J,PTmp);
+ }
+ // Update the units for this pressure set.
+ I->setUnitInc(I->getUnitInc() + Weight);
+ }
+}
+
+/// Record the pressure difference induced by the given operand list.
+static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers,
+ const MachineRegisterInfo *MRI) {
+ assert(!PDiff.begin()->isValid() && "stale PDiff");
+
+ for (unsigned i = 0, e = RegOpers.Defs.size(); i != e; ++i)
+ PDiff.addPressureChange(RegOpers.Defs[i], true, MRI);
+
+ for (unsigned i = 0, e = RegOpers.Uses.size(); i != e; ++i)
+ PDiff.addPressureChange(RegOpers.Uses[i], false, MRI);
+}
+
/// Force liveness of registers.
void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
@@ -415,7 +434,7 @@ void RegPressureTracker::discoverLiveIn(unsigned Reg) {
// At live in discovery, unconditionally increase the high water mark.
P.LiveInRegs.push_back(Reg);
- P.increase(Reg, TRI, MRI);
+ increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg));
}
/// Add Reg to the live out set and increase max pressure.
@@ -426,11 +445,16 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) {
// At live out discovery, unconditionally increase the high water mark.
P.LiveOutRegs.push_back(Reg);
- P.increase(Reg, TRI, MRI);
+ increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg));
}
-/// Recede across the previous instruction.
-bool RegPressureTracker::recede() {
+/// Recede across the previous instruction. If LiveUses is provided, record any
+/// RegUnits that are made live by the current instruction's uses. This includes
+/// registers that are both defined and used by the instruction. If a pressure
+/// difference pointer is provided record the changes is pressure caused by this
+/// instruction independent of liveness.
+bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
+ PressureDiff *PDiff) {
// Check for the top of the analyzable region.
if (CurrPos == MBB->begin()) {
closeRegion();
@@ -463,6 +487,9 @@ bool RegPressureTracker::recede() {
RegisterOperands RegOpers(TRI, MRI);
collectOperands(CurrPos, RegOpers);
+ if (PDiff)
+ collectPDiff(*PDiff, RegOpers, MRI);
+
// Boost pressure for all dead defs together.
increaseRegPressure(RegOpers.DeadDefs);
decreaseRegPressure(RegOpers.DeadDefs);
@@ -471,10 +498,20 @@ bool RegPressureTracker::recede() {
// TODO: consider earlyclobbers?
for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
unsigned Reg = RegOpers.Defs[i];
- if (LiveRegs.erase(Reg))
- decreaseRegPressure(Reg);
- else
- discoverLiveOut(Reg);
+ bool DeadDef = false;
+ if (RequireIntervals) {
+ const LiveRange *LR = getLiveRange(Reg);
+ if (LR) {
+ LiveQueryResult LRQ = LR->Query(SlotIdx);
+ DeadDef = LRQ.isDeadDef();
+ }
+ }
+ if (!DeadDef) {
+ if (LiveRegs.erase(Reg))
+ decreaseRegPressure(Reg);
+ else
+ discoverLiveOut(Reg);
+ }
}
// Generate liveness for uses.
@@ -483,12 +520,17 @@ bool RegPressureTracker::recede() {
if (!LiveRegs.contains(Reg)) {
// Adjust liveouts if LiveIntervals are available.
if (RequireIntervals) {
- const LiveInterval *LI = getInterval(Reg);
- if (LI && !LI->killedAt(SlotIdx))
- discoverLiveOut(Reg);
+ const LiveRange *LR = getLiveRange(Reg);
+ if (LR) {
+ LiveQueryResult LRQ = LR->Query(SlotIdx);
+ if (!LRQ.isKill() && !LRQ.valueDefined())
+ discoverLiveOut(Reg);
+ }
}
increaseRegPressure(Reg);
LiveRegs.insert(Reg);
+ if (LiveUses && !containsReg(*LiveUses, Reg))
+ LiveUses->push_back(Reg);
}
}
if (TrackUntiedDefs) {
@@ -537,8 +579,8 @@ bool RegPressureTracker::advance() {
// Kill liveness at last uses.
bool lastUse = false;
if (RequireIntervals) {
- const LiveInterval *LI = getInterval(Reg);
- lastUse = LI && LI->killedAt(SlotIdx);
+ const LiveRange *LR = getLiveRange(Reg);
+ lastUse = LR && LR->Query(SlotIdx).isKill();
}
else {
// Allocatable physregs are always single-use before register rewriting.
@@ -576,8 +618,7 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
RegPressureDelta &Delta,
const RegisterClassInfo *RCI,
ArrayRef<unsigned> LiveThruPressureVec) {
- int ExcessUnits = 0;
- unsigned PSetID = ~0U;
+ Delta.Excess = PressureChange();
for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) {
unsigned POld = OldPressureVec[i];
unsigned PNew = NewPressureVec[i];
@@ -599,13 +640,11 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
PDiff = Limit - POld; // Just obeyed limit.
if (PDiff) {
- ExcessUnits = PDiff;
- PSetID = i;
+ Delta.Excess = PressureChange(i);
+ Delta.Excess.setUnitInc(PDiff);
break;
}
}
- Delta.Excess.PSetID = PSetID;
- Delta.Excess.UnitIncrease = ExcessUnits;
}
/// Find the max change in max pressure that either surpasses a critical PSet
@@ -616,11 +655,11 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
/// RegPressureTracker API change to work with pressure differences.
static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
ArrayRef<unsigned> NewMaxPressureVec,
- ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<PressureChange> CriticalPSets,
ArrayRef<unsigned> MaxPressureLimit,
RegPressureDelta &Delta) {
- Delta.CriticalMax = PressureElement();
- Delta.CurrentMax = PressureElement();
+ Delta.CriticalMax = PressureChange();
+ Delta.CurrentMax = PressureChange();
unsigned CritIdx = 0, CritEnd = CriticalPSets.size();
for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) {
@@ -630,27 +669,24 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
continue;
if (!Delta.CriticalMax.isValid()) {
- while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i)
+ while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < i)
++CritIdx;
- if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) {
- int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease;
+ if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == i) {
+ int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].getUnitInc();
if (PDiff > 0) {
- Delta.CriticalMax.PSetID = i;
- Delta.CriticalMax.UnitIncrease = PDiff;
+ Delta.CriticalMax = PressureChange(i);
+ Delta.CriticalMax.setUnitInc(PDiff);
}
}
}
// Find the first increase above MaxPressureLimit.
// (Ignores negative MDiff).
- if (!Delta.CurrentMax.isValid()) {
- int MDiff = (int)PNew - (int)MaxPressureLimit[i];
- if (MDiff > 0) {
- Delta.CurrentMax.PSetID = i;
- Delta.CurrentMax.UnitIncrease = MDiff;
- if (CritIdx == CritEnd || Delta.CriticalMax.isValid())
- break;
- }
+ if (!Delta.CurrentMax.isValid() && PNew > MaxPressureLimit[i]) {
+ Delta.CurrentMax = PressureChange(i);
+ Delta.CurrentMax.setUnitInc(PNew - POld);
+ if (CritIdx == CritEnd || Delta.CriticalMax.isValid())
+ break;
}
}
}
@@ -665,7 +701,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
// Account for register pressure similar to RegPressureTracker::recede().
- RegisterOperands RegOpers(TRI, MRI);
+ RegisterOperands RegOpers(TRI, MRI, /*IgnoreDead=*/true);
collectOperands(MI, RegOpers);
// Boost max pressure for all dead defs together.
@@ -676,8 +712,19 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
// Kill liveness at live defs.
for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
unsigned Reg = RegOpers.Defs[i];
- if (!containsReg(RegOpers.Uses, Reg))
- decreaseRegPressure(Reg);
+ bool DeadDef = false;
+ if (RequireIntervals) {
+ const LiveRange *LR = getLiveRange(Reg);
+ if (LR) {
+ SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
+ LiveQueryResult LRQ = LR->Query(SlotIdx);
+ DeadDef = LRQ.isDeadDef();
+ }
+ }
+ if (!DeadDef) {
+ if (!containsReg(RegOpers.Uses, Reg))
+ decreaseRegPressure(Reg);
+ }
}
// Generate liveness for uses.
for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
@@ -699,8 +746,9 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
/// result per-SUnit with enough information to adjust for the current
/// scheduling position. But this works as a proof of concept.
void RegPressureTracker::
-getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
- ArrayRef<PressureElement> CriticalPSets,
+getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff,
+ RegPressureDelta &Delta,
+ ArrayRef<PressureChange> CriticalPSets,
ArrayRef<unsigned> MaxPressureLimit) {
// Snapshot Pressure.
// FIXME: The snapshot heap space should persist. But I'm planning to
@@ -714,12 +762,113 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
LiveThruPressure);
computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
MaxPressureLimit, Delta);
- assert(Delta.CriticalMax.UnitIncrease >= 0 &&
- Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure");
+ assert(Delta.CriticalMax.getUnitInc() >= 0 &&
+ Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure");
// Restore the tracker's state.
P.MaxSetPressure.swap(SavedMaxPressure);
CurrSetPressure.swap(SavedPressure);
+
+#ifndef NDEBUG
+ if (!PDiff)
+ return;
+
+ // Check if the alternate algorithm yields the same result.
+ RegPressureDelta Delta2;
+ getUpwardPressureDelta(MI, *PDiff, Delta2, CriticalPSets, MaxPressureLimit);
+ if (Delta != Delta2) {
+ dbgs() << "DELTA: " << *MI;
+ if (Delta.Excess.isValid())
+ dbgs() << "Excess1 " << TRI->getRegPressureSetName(Delta.Excess.getPSet())
+ << " " << Delta.Excess.getUnitInc() << "\n";
+ if (Delta.CriticalMax.isValid())
+ dbgs() << "Critic1 " << TRI->getRegPressureSetName(Delta.CriticalMax.getPSet())
+ << " " << Delta.CriticalMax.getUnitInc() << "\n";
+ if (Delta.CurrentMax.isValid())
+ dbgs() << "CurrMx1 " << TRI->getRegPressureSetName(Delta.CurrentMax.getPSet())
+ << " " << Delta.CurrentMax.getUnitInc() << "\n";
+ if (Delta2.Excess.isValid())
+ dbgs() << "Excess2 " << TRI->getRegPressureSetName(Delta2.Excess.getPSet())
+ << " " << Delta2.Excess.getUnitInc() << "\n";
+ if (Delta2.CriticalMax.isValid())
+ dbgs() << "Critic2 " << TRI->getRegPressureSetName(Delta2.CriticalMax.getPSet())
+ << " " << Delta2.CriticalMax.getUnitInc() << "\n";
+ if (Delta2.CurrentMax.isValid())
+ dbgs() << "CurrMx2 " << TRI->getRegPressureSetName(Delta2.CurrentMax.getPSet())
+ << " " << Delta2.CurrentMax.getUnitInc() << "\n";
+ llvm_unreachable("RegP Delta Mismatch");
+ }
+#endif
+}
+
+/// This is a prototype of the fast version of querying register pressure that
+/// does not directly depend on current liveness. It's still slow because we
+/// recompute pressure change on-the-fly. This implementation only exists to
+/// prove correctness.
+///
+/// @param Delta captures information needed for heuristics.
+///
+/// @param CriticalPSets Are the pressure sets that are known to exceed some
+/// limit within the region, not necessarily at the current position.
+///
+/// @param MaxPressureLimit Is the max pressure within the region, not
+/// necessarily at the current position.
+void RegPressureTracker::
+getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
+ RegPressureDelta &Delta,
+ ArrayRef<PressureChange> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) const {
+ unsigned CritIdx = 0, CritEnd = CriticalPSets.size();
+ for (PressureDiff::const_iterator
+ PDiffI = PDiff.begin(), PDiffE = PDiff.end();
+ PDiffI != PDiffE && PDiffI->isValid(); ++PDiffI) {
+
+ unsigned PSetID = PDiffI->getPSet();
+ unsigned Limit = RCI->getRegPressureSetLimit(PSetID);
+ if (!LiveThruPressure.empty())
+ Limit += LiveThruPressure[PSetID];
+
+ unsigned POld = CurrSetPressure[PSetID];
+ unsigned MOld = P.MaxSetPressure[PSetID];
+ unsigned MNew = MOld;
+ // Ignore DeadDefs here because they aren't captured by PressureChange.
+ unsigned PNew = POld + PDiffI->getUnitInc();
+ assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) && "PSet overflow");
+ if (PNew > MOld)
+ MNew = PNew;
+ // Check if current pressure has exceeded the limit.
+ if (!Delta.Excess.isValid()) {
+ unsigned ExcessInc = 0;
+ if (PNew > Limit)
+ ExcessInc = POld > Limit ? PNew - POld : PNew - Limit;
+ else if (POld > Limit)
+ ExcessInc = Limit - POld;
+ if (ExcessInc) {
+ Delta.Excess = PressureChange(PSetID);
+ Delta.Excess.setUnitInc(ExcessInc);
+ }
+ }
+ // Check if max pressure has exceeded a critical pressure set max.
+ if (MNew == MOld)
+ continue;
+ if (!Delta.CriticalMax.isValid()) {
+ while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < PSetID)
+ ++CritIdx;
+
+ if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) {
+ int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc();
+ if (CritInc > 0 && CritInc <= INT16_MAX) {
+ Delta.CriticalMax = PressureChange(PSetID);
+ Delta.CriticalMax.setUnitInc(CritInc);
+ }
+ }
+ }
+ // Check if max pressure has exceeded the current max.
+ if (!Delta.CurrentMax.isValid() && MNew > MaxPressureLimit[PSetID]) {
+ Delta.CurrentMax = PressureChange(PSetID);
+ Delta.CurrentMax.setUnitInc(MNew - MOld);
+ }
+ }
}
/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
@@ -765,10 +914,12 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
// FIXME: allow the caller to pass in the list of vreg uses that remain
// to be bottom-scheduled to avoid searching uses at each query.
SlotIndex CurrIdx = getCurrSlot();
- const LiveInterval *LI = getInterval(Reg);
- if (LI && LI->killedAt(SlotIdx)
- && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) {
- decreaseRegPressure(Reg);
+ const LiveRange *LR = getLiveRange(Reg);
+ if (LR) {
+ LiveQueryResult LRQ = LR->Query(SlotIdx);
+ if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) {
+ decreaseRegPressure(Reg);
+ }
}
}
else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -793,7 +944,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
/// This assumes that the current LiveIn set is sufficient.
void RegPressureTracker::
getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
- ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<PressureChange> CriticalPSets,
ArrayRef<unsigned> MaxPressureLimit) {
// Snapshot Pressure.
std::vector<unsigned> SavedPressure = CurrSetPressure;
@@ -805,8 +956,8 @@ getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
LiveThruPressure);
computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
MaxPressureLimit, Delta);
- assert(Delta.CriticalMax.UnitIncrease >= 0 &&
- Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure");
+ assert(Delta.CriticalMax.getUnitInc() >= 0 &&
+ Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure");
// Restore the tracker's state.
P.MaxSetPressure.swap(SavedMaxPressure);
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 892903c..7f1f9c4 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -36,6 +36,8 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <queue>
+
using namespace llvm;
static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
@@ -178,14 +180,11 @@ void ScheduleDAGInstrs::finishBlock() {
void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
- unsigned endcount) {
+ unsigned regioninstrs) {
assert(bb == BB && "startBlock should set BB");
RegionBegin = begin;
RegionEnd = end;
- EndIndex = endcount;
- MISUnitMap.clear();
-
- ScheduleDAG::clearDAG();
+ NumRegionInstrs = regioninstrs;
}
/// Close the current scheduling region. Don't clear any state in case the
@@ -405,9 +404,19 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
MachineInstr *MI = SU->getInstr();
unsigned Reg = MI->getOperand(OperIdx).getReg();
+ // Record this local VReg use.
+ VReg2UseMap::iterator UI = VRegUses.find(Reg);
+ for (; UI != VRegUses.end(); ++UI) {
+ if (UI->SU == SU)
+ break;
+ }
+ if (UI == VRegUses.end())
+ VRegUses.insert(VReg2SUnit(Reg, SU));
+
// Lookup this operand's reaching definition.
assert(LIS && "vreg dependencies requires LiveIntervals");
- LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI));
+ LiveQueryResult LRQ
+ = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI));
VNInfo *VNI = LRQ.valueIn();
// VNI will be valid because MachineOperand::readsReg() is checked by caller.
@@ -635,8 +644,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
bool isNormalMemory = false) {
// If this is a false dependency,
// do not add the edge, but rememeber the rejected node.
- if (!EnableAASchedMI ||
- MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
Dep.setLatency(TrueMemOrderLatency);
SUb->addPred(Dep);
@@ -664,7 +672,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
void ScheduleDAGInstrs::initSUnits() {
// We'll be allocating one SUnit for each real instruction in the region,
// which is contained within a basic block.
- SUnits.reserve(BB->size());
+ SUnits.reserve(NumRegionInstrs);
for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
MachineInstr *MI = I;
@@ -686,10 +694,22 @@ void ScheduleDAGInstrs::initSUnits() {
/// DAG builder is an efficient place to do it because it already visits
/// operands.
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
- RegPressureTracker *RPTracker) {
+ RegPressureTracker *RPTracker,
+ PressureDiffs *PDiffs) {
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
+ : ST.useAA();
+ AliasAnalysis *AAForDep = UseAA ? AA : 0;
+
+ MISUnitMap.clear();
+ ScheduleDAG::clearDAG();
+
// Create an SUnit for each real instruction.
initSUnits();
+ if (PDiffs)
+ PDiffs->init(SUnits.size());
+
// We build scheduling units by walking a block's instruction list from bottom
// to top.
@@ -715,10 +735,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
Uses.setUniverse(TRI->getNumRegs());
assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
- // FIXME: Allow SparseSet to reserve space for the creation of virtual
- // registers during scheduling. Don't artificially inflate the Universe
- // because we want to assert that vregs are not created during DAG building.
+ VRegUses.clear();
VRegDefs.setUniverse(MRI.getNumVirtRegs());
+ VRegUses.setUniverse(MRI.getNumVirtRegs());
// Model data dependencies between instructions being scheduled and the
// ExitSU.
@@ -738,17 +757,18 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
DbgMI = MI;
continue;
}
+ SUnit *SU = MISUnitMap[MI];
+ assert(SU && "No SUnit mapped to this MI");
+
if (RPTracker) {
- RPTracker->recede();
+ PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0;
+ RPTracker->recede(/*LiveUses=*/0, PDiff);
assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
}
assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) &&
"Cannot schedule terminators or labels!");
- SUnit *SU = MISUnitMap[MI];
- assert(SU && "No SUnit mapped to this MI");
-
// Add register-based dependencies (data, anti, and output).
bool HasVRegDef = false;
for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
@@ -826,20 +846,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
unsigned ChainLatency = 0;
if (AliasChain->getInstr()->mayLoad())
ChainLatency = TrueMemOrderLatency;
- addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
+ addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes,
ChainLatency);
}
AliasChain = SU;
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
E = AliasMemDefs.end(); I != E; ++I)
- addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
+ addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes);
for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
TrueMemOrderLatency);
}
adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
@@ -872,7 +892,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
MapVector<const Value *, SUnit *>::iterator IE =
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE) {
- addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+ addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes,
+ 0, true);
I->second = SU;
} else {
if (ThisMayAlias)
@@ -887,7 +908,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
- addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes,
TrueMemOrderLatency, true);
J->second.clear();
}
@@ -896,11 +917,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// Add dependencies from all the PendingLoads, i.e. loads
// with no underlying object.
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
// Add dependence on alias chain, if needed.
if (AliasChain)
- addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+ addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
// But we also should check dependent instructions for the
// SU in question.
adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
@@ -930,7 +951,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// potentially aliasing stores.
for (MapVector<const Value *, SUnit *>::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
- addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
+ addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes);
PendingLoads.push_back(SU);
MayAlias = true;
@@ -952,7 +973,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
MapVector<const Value *, SUnit *>::iterator IE =
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE)
- addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+ addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes,
+ 0, true);
if (ThisMayAlias)
AliasMemUses[V].push_back(SU);
else
@@ -962,7 +984,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
// Add dependencies on alias and barrier chains, if needed.
if (MayAlias && AliasChain)
- addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+ addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Barrier));
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cb88941..43f72c5 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -35,6 +35,8 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -43,6 +45,7 @@ STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
+STATISTIC(SlicedLoads, "Number of load sliced");
namespace {
static cl::opt<bool>
@@ -53,6 +56,14 @@ namespace {
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
cl::desc("Include global information in alias analysis"));
+ /// Hidden option to stress test load slicing, i.e., when this option
+ /// is enabled, load slicing bypasses most of its profitability guards.
+ static cl::opt<bool>
+ StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
+ cl::desc("Bypass the profitability model of load "
+ "slicing"),
+ cl::init(false));
+
//------------------------------ DAGCombiner ---------------------------------//
class DAGCombiner {
@@ -62,6 +73,7 @@ namespace {
CodeGenOpt::Level OptLevel;
bool LegalOperations;
bool LegalTypes;
+ bool ForCodeSize;
// Worklist of all of the nodes that need to be simplified.
//
@@ -144,6 +156,7 @@ namespace {
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
+ bool SliceUpLoad(SDNode *N);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
@@ -283,11 +296,11 @@ namespace {
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
- bool isAlias(SDValue Ptr1, int64_t Size1,
+ bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
const MDNode *TBAAInfo1,
- SDValue Ptr2, int64_t Size2,
+ SDValue Ptr2, int64_t Size2, bool IsVolatile2,
const Value *SrcValue2, int SrcValueOffset2,
unsigned SrcValueAlign2,
const MDNode *TBAAInfo2) const;
@@ -299,7 +312,7 @@ namespace {
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool FindAliasInfo(SDNode *N,
- SDValue &Ptr, int64_t &Size,
+ SDValue &Ptr, int64_t &Size, bool &IsVolatile,
const Value *&SrcValue, int &SrcValueOffset,
unsigned &SrcValueAlignment,
const MDNode *&TBAAInfo) const;
@@ -315,8 +328,15 @@ namespace {
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
- : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
- OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
+ OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
+ AttributeSet FnAttrs =
+ DAG.getMachineFunction().getFunction()->getAttributes();
+ ForCodeSize =
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize) ||
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ }
/// Run - runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
@@ -329,7 +349,8 @@ namespace {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
- return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) : TLI.getPointerTy();
+ return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy)
+ : TLI.getPointerTy();
}
/// isTypeLegal - This method returns true if we are running before type
@@ -744,9 +765,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
Replace = true;
return DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(),
- MemVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ MemVT, LD->getMemOperand());
}
unsigned Opc = Op.getOpcode();
@@ -967,9 +986,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
: LD->getExtensionType();
SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(),
- MemVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ MemVT, LD->getMemOperand());
SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
DEBUG(dbgs() << "\nPromoting ";
@@ -1017,7 +1034,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// try and combine it.
while (!WorkListContents.empty()) {
SDNode *N;
- // The WorkListOrder holds the SDNodes in order, but it may contain duplicates.
+ // The WorkListOrder holds the SDNodes in order, but it may contain
+ // duplicates.
// In order to avoid a linear scan, we use a set (O(log N)) to hold what the
// worklist *should* contain, and check the node we want to visit is should
// actually be visited.
@@ -1617,19 +1635,8 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
bool LegalOperations, bool LegalTypes) {
if (!VT.isVector())
return DAG.getConstant(0, VT);
- if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
- // Produce a vector of zeros.
- EVT ElemTy = VT.getVectorElementType();
- if (LegalTypes && TLI.getTypeAction(*DAG.getContext(), ElemTy) ==
- TargetLowering::TypePromoteInteger)
- ElemTy = TLI.getTypeToTransformTo(*DAG.getContext(), ElemTy);
- assert((!LegalTypes || TLI.isTypeLegal(ElemTy)) &&
- "Type for zero vector elements is not legal");
- SDValue El = DAG.getConstant(0, ElemTy);
- std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
- return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
- &Ops[0], Ops.size());
- }
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return DAG.getConstant(0, VT);
return SDValue();
}
@@ -1771,8 +1778,8 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) {
return SDValue();
}
-/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
-/// all the same constant or undefined.
+/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
+/// elements are all the same constant or undefined.
static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
if (!C)
@@ -1808,9 +1815,11 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
} else {
N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0;
- ConstValue0 = N0IsConst? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() : APInt();
+ ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
+ : APInt();
N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0;
- ConstValue1 = N1IsConst? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() : APInt();
+ ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
+ : APInt();
}
// fold (mul c1, c2) -> c1*c2
@@ -1823,20 +1832,24 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1 == 0)
return N1;
+ // We require a splat of the entire scalar bit width for non-contiguous
+ // bit patterns.
+ bool IsFullSplat =
+ ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1 == 1)
+ if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
return N0;
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnesValue())
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getConstant(0, VT), N0);
// fold (mul x, (1 << c)) -> x << c
- if (N1IsConst && ConstValue1.isPowerOf2())
+ if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
DAG.getConstant(ConstValue1.logBase2(),
getShiftAmountTy(N0.getValueType())));
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
- if (N1IsConst && (-ConstValue1).isPowerOf2()) {
+ if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
unsigned Log2Val = (-ConstValue1).logBase2();
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
@@ -2675,6 +2688,19 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
}
}
+ // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
+ if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
+ Op0 == Op1 && LL.getValueType().isInteger() &&
+ Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
+ cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
+ (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ cast<ConstantSDNode>(RR)->isNullValue()))) {
+ SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
+ LL, DAG.getConstant(1, LL.getValueType()));
+ AddToWorkList(ADDNode.getNode());
+ return DAG.getSetCC(SDLoc(N), VT, ADDNode,
+ DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
+ }
// canonicalize equivalent to ll == rl
if (LL == RR && LR == RL) {
Op1 = ISD::getSetCCSwappedOperands(Op1);
@@ -2718,9 +2744,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), MemVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ MemVT, LN0->getMemOperand());
AddToWorkList(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -2739,11 +2763,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- MemVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
AddToWorkList(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -2773,10 +2794,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue NewLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
- LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(),
- ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getChain(), LN0->getBasePtr(), ExtVT,
+ LN0->getMemOperand());
AddToWorkList(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -2812,7 +2831,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
LN0->getChain(), NewPtr,
LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- Alignment);
+ Alignment, LN0->getTBAAInfo());
AddToWorkList(N);
CombineTo(LN0, Load, Load.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -2848,6 +2867,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
+ // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
+ if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
+ SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false);
+ if (BSwap.getNode())
+ return BSwap;
+ }
+
return SDValue();
}
@@ -2932,13 +2959,23 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
if (N00 != N10)
return SDValue();
- // Make sure everything beyond the low halfword is zero since the SRL 16
- // will clear the top bits.
+ // Make sure everything beyond the low halfword gets set to zero since the SRL
+ // 16 will clear the top bits.
unsigned OpSizeInBits = VT.getSizeInBits();
- if (DemandHighBits && OpSizeInBits > 16 &&
- (!LookPassAnd0 || !LookPassAnd1) &&
- !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16)))
- return SDValue();
+ if (DemandHighBits && OpSizeInBits > 16) {
+ // If the left-shift isn't masked out then the only way this is a bswap is
+ // if all bits beyond the low 8 are 0. In that case the entire pattern
+ // reduces to a left shift anyway: leave it for other parts of the combiner.
+ if (!LookPassAnd0)
+ return SDValue();
+
+ // However, if the right shift isn't masked out then it might be because
+ // it's not needed. See if we can spot that too.
+ if (!LookPassAnd1 &&
+ !DAG.MaskedValueIsZero(
+ N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
+ return SDValue();
+ }
SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
if (OpSizeInBits > 16)
@@ -3078,7 +3115,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT,
SDValue(Parts[0],0));
- // Result of the bswap should be rotated by 16. If it's not legal, than
+ // Result of the bswap should be rotated by 16. If it's not legal, then
// do (x << 16) | (x >> 16).
SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
@@ -3343,29 +3380,9 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
if (LHSMask.getNode() || RHSMask.getNode())
return 0;
- // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
- // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
- if (RHSShiftAmt.getOpcode() == ISD::SUB &&
- LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
- if (ConstantSDNode *SUBC =
- dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
- if (SUBC->getAPIntValue() == OpSizeInBits)
- return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
- HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
- }
- }
-
- // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
- // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
- if (LHSShiftAmt.getOpcode() == ISD::SUB &&
- RHSShiftAmt == LHSShiftAmt.getOperand(1))
- if (ConstantSDNode *SUBC =
- dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0)))
- if (SUBC->getAPIntValue() == OpSizeInBits)
- return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
- HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
-
- // Look for sign/zext/any-extended or truncate cases:
+ // If the shift amount is sign/zext/any-extended just peel it off.
+ SDValue LExtOp0 = LHSShiftAmt;
+ SDValue RExtOp0 = RHSShiftAmt;
if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
@@ -3374,33 +3391,31 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
- SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
- SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
- if (RExtOp0.getOpcode() == ISD::SUB &&
- RExtOp0.getOperand(1) == LExtOp0) {
- // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
- // (rotl x, y)
- // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
- // (rotr x, (sub 32, y))
- if (ConstantSDNode *SUBC =
+ LExtOp0 = LHSShiftAmt.getOperand(0);
+ RExtOp0 = RHSShiftAmt.getOperand(0);
+ }
+
+ if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) {
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, y)
+ // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+ // (rotr x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0)))
- if (SUBC->getAPIntValue() == OpSizeInBits)
- return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
- LHSShiftArg,
- HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
- } else if (LExtOp0.getOpcode() == ISD::SUB &&
- RExtOp0 == LExtOp0.getOperand(1)) {
- // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
- // (rotr x, y)
- // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
- // (rotl x, (sub 32, y))
- if (ConstantSDNode *SUBC =
+ if (SUBC->getAPIntValue() == OpSizeInBits)
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+ } else if (LExtOp0.getOpcode() == ISD::SUB &&
+ RExtOp0 == LExtOp0.getOperand(1)) {
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotr x, y)
+ // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+ // (rotl x, (sub 32, y))
+ if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0)))
- if (SUBC->getAPIntValue() == OpSizeInBits)
- return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
- LHSShiftArg,
- HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
- }
+ if (SUBC->getAPIntValue() == OpSizeInBits)
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
}
return 0;
@@ -3620,6 +3635,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
// fold (shl c1, c2) -> c1<<c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
@@ -3697,6 +3718,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}
+ // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
+ // Only fold this if the inner zext has no other uses to avoid increasing
+ // the total number of instructions.
+ if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ if (c1 < VT.getSizeInBits()) {
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 == c2) {
+ SDValue NewOp0 = N0.getOperand(0);
+ EVT CountVT = NewOp0.getOperand(1).getValueType();
+ SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
+ NewOp0, DAG.getConstant(c2, CountVT));
+ AddToWorkList(NewSHL.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+ }
+ }
+ }
+
// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
@@ -3750,6 +3792,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
// fold (sra c1, c2) -> (sra c1, c2)
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
@@ -3895,6 +3943,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ // fold vector ops
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVBinOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
// fold (srl c1, c2) -> c1 >>u c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
@@ -4217,6 +4271,23 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SDValue();
}
+static
+std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ EVT LoVT, HiVT;
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // Split the inputs.
+ SDValue Lo, Hi, LL, LH, RL, RH;
+ llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+
+ return std::make_pair(Lo, Hi);
+}
+
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4254,6 +4325,34 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
}
}
+ // If the VSELECT result requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+ if (N0.getOpcode() == ISD::SETCC) {
+ EVT VT = N->getValueType(0);
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
+ llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
+ llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
+ llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
+
+ // Add the new VSELECT nodes to the work list in case they need to be split
+ // again.
+ AddToWorkList(Lo.getNode());
+ AddToWorkList(Hi.getNode());
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+ }
+
return SDValue();
}
@@ -4469,10 +4568,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- N0.getValueType(),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
@@ -4493,10 +4590,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- MemVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), MemVT,
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
@@ -4524,11 +4619,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (DoXform) {
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(),
LN0->getMemoryVT(),
- LN0->isVolatile(),
- LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.sext(VT.getSizeInBits());
SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
@@ -4593,9 +4685,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) {
return DAG.getSelect(SDLoc(N), VT,
DAG.getSetCC(SDLoc(N),
- getSetCCResultType(VT),
- N0.getOperand(0), N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+ getSetCCResultType(VT),
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get()),
NegOne, DAG.getConstant(0, VT));
}
}
@@ -4762,10 +4854,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- N0.getValueType(),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
@@ -4795,11 +4885,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (DoXform) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(),
LN0->getMemoryVT(),
- LN0->isVolatile(),
- LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
@@ -4826,10 +4913,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- MemVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), MemVT,
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
@@ -4992,10 +5077,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- N0.getValueType(),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
@@ -5016,9 +5099,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N),
VT, LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), MemVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ MemVT, LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
@@ -5250,12 +5331,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), NewAlign);
+ LN0->isInvariant(), NewAlign, LN0->getTBAAInfo());
else
Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- NewAlign);
+ NewAlign, LN0->getTBAAInfo());
// Replace the old load's chain with the new load's chain.
WorkListRemover DeadNodes(*this);
@@ -5353,10 +5434,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- EVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), EVT,
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
AddToWorkList(ExtLoad.getNode());
@@ -5371,10 +5450,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- EVT,
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), EVT,
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -5657,7 +5734,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile() &&
- (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
unsigned Align = TLI.getDataLayout()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
@@ -5667,7 +5745,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), OrigAlign);
+ LN0->isInvariant(), OrigAlign,
+ LN0->getTBAAInfo());
AddToWorkList(N);
CombineTo(N0.getNode(),
DAG.getNode(ISD::BITCAST, SDLoc(N0),
@@ -6652,16 +6731,14 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
}
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
- if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- N0.getValueType(),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->getAlignment());
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(),
DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
@@ -7451,13 +7528,16 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
LD->getValueType(0),
Chain, Ptr, LD->getPointerInfo(),
LD->getMemoryVT(),
- LD->isVolatile(), LD->isNonTemporal(), Align);
+ LD->isVolatile(), LD->isNonTemporal(), Align,
+ LD->getTBAAInfo());
return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
}
}
}
- if (CombinerAA) {
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
+ TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ if (UseAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -7468,17 +7548,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Replace the chain to void dependency.
if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
- BetterChain, Ptr, LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
+ BetterChain, Ptr, LD->getMemOperand());
} else {
ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
LD->getValueType(0),
- BetterChain, Ptr, LD->getPointerInfo(),
- LD->getMemoryVT(),
- LD->isVolatile(),
- LD->isNonTemporal(),
- LD->getAlignment());
+ BetterChain, Ptr, LD->getMemoryVT(),
+ LD->getMemOperand());
}
// Create token factor to keep old chain connected.
@@ -7498,9 +7573,562 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
+ // Try to slice up N to more direct loads if the slices are mapped to
+ // different register banks or pairing can take place.
+ if (SliceUpLoad(N))
+ return SDValue(N, 0);
+
return SDValue();
}
+namespace {
+/// \brief Helper structure used to slice a load in smaller loads.
+/// Basically a slice is obtained from the following sequence:
+/// Origin = load Ty1, Base
+/// Shift = srl Ty1 Origin, CstTy Amount
+/// Inst = trunc Shift to Ty2
+///
+/// Then, it will be rewriten into:
+/// Slice = load SliceTy, Base + SliceOffset
+/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
+///
+/// SliceTy is deduced from the number of bits that are actually used to
+/// build Inst.
+struct LoadedSlice {
+ /// \brief Helper structure used to compute the cost of a slice.
+ struct Cost {
+ /// Are we optimizing for code size.
+ bool ForCodeSize;
+ /// Various cost.
+ unsigned Loads;
+ unsigned Truncates;
+ unsigned CrossRegisterBanksCopies;
+ unsigned ZExts;
+ unsigned Shift;
+
+ Cost(bool ForCodeSize = false)
+ : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
+ CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
+
+ /// \brief Get the cost of one isolated slice.
+ Cost(const LoadedSlice &LS, bool ForCodeSize = false)
+ : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
+ CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
+ EVT TruncType = LS.Inst->getValueType(0);
+ EVT LoadedType = LS.getLoadedType();
+ if (TruncType != LoadedType &&
+ !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
+ ZExts = 1;
+ }
+
+ /// \brief Account for slicing gain in the current cost.
+ /// Slicing provide a few gains like removing a shift or a
+ /// truncate. This method allows to grow the cost of the original
+ /// load with the gain from this slice.
+ void addSliceGain(const LoadedSlice &LS) {
+ // Each slice saves a truncate.
+ const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
+ if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
+ LS.Inst->getOperand(0).getValueType()))
+ ++Truncates;
+ // If there is a shift amount, this slice gets rid of it.
+ if (LS.Shift)
+ ++Shift;
+ // If this slice can merge a cross register bank copy, account for it.
+ if (LS.canMergeExpensiveCrossRegisterBankCopy())
+ ++CrossRegisterBanksCopies;
+ }
+
+ Cost &operator+=(const Cost &RHS) {
+ Loads += RHS.Loads;
+ Truncates += RHS.Truncates;
+ CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
+ ZExts += RHS.ZExts;
+ Shift += RHS.Shift;
+ return *this;
+ }
+
+ bool operator==(const Cost &RHS) const {
+ return Loads == RHS.Loads && Truncates == RHS.Truncates &&
+ CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
+ ZExts == RHS.ZExts && Shift == RHS.Shift;
+ }
+
+ bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
+
+ bool operator<(const Cost &RHS) const {
+ // Assume cross register banks copies are as expensive as loads.
+ // FIXME: Do we want some more target hooks?
+ unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
+ unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
+ // Unless we are optimizing for code size, consider the
+ // expensive operation first.
+ if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
+ return ExpensiveOpsLHS < ExpensiveOpsRHS;
+ return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
+ (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
+ }
+
+ bool operator>(const Cost &RHS) const { return RHS < *this; }
+
+ bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
+
+ bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
+ };
+ // The last instruction that represent the slice. This should be a
+ // truncate instruction.
+ SDNode *Inst;
+ // The original load instruction.
+ LoadSDNode *Origin;
+ // The right shift amount in bits from the original load.
+ unsigned Shift;
+ // The DAG from which Origin came from.
+ // This is used to get some contextual information about legal types, etc.
+ SelectionDAG *DAG;
+
+ LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL,
+ unsigned Shift = 0, SelectionDAG *DAG = NULL)
+ : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
+
+ LoadedSlice(const LoadedSlice &LS)
+ : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {}
+
+ /// \brief Get the bits used in a chunk of bits \p BitWidth large.
+ /// \return Result is \p BitWidth and has used bits set to 1 and
+ /// not used bits set to 0.
+ APInt getUsedBits() const {
+ // Reproduce the trunc(lshr) sequence:
+ // - Start from the truncated value.
+ // - Zero extend to the desired bit width.
+ // - Shift left.
+ assert(Origin && "No original load to compare against.");
+ unsigned BitWidth = Origin->getValueSizeInBits(0);
+ assert(Inst && "This slice is not bound to an instruction");
+ assert(Inst->getValueSizeInBits(0) <= BitWidth &&
+ "Extracted slice is bigger than the whole type!");
+ APInt UsedBits(Inst->getValueSizeInBits(0), 0);
+ UsedBits.setAllBits();
+ UsedBits = UsedBits.zext(BitWidth);
+ UsedBits <<= Shift;
+ return UsedBits;
+ }
+
+ /// \brief Get the size of the slice to be loaded in bytes.
+ unsigned getLoadedSize() const {
+ unsigned SliceSize = getUsedBits().countPopulation();
+ assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
+ return SliceSize / 8;
+ }
+
+ /// \brief Get the type that will be loaded for this slice.
+ /// Note: This may not be the final type for the slice.
+ EVT getLoadedType() const {
+ assert(DAG && "Missing context");
+ LLVMContext &Ctxt = *DAG->getContext();
+ return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
+ }
+
+ /// \brief Get the alignment of the load used for this slice.
+ unsigned getAlignment() const {
+ unsigned Alignment = Origin->getAlignment();
+ unsigned Offset = getOffsetFromBase();
+ if (Offset != 0)
+ Alignment = MinAlign(Alignment, Alignment + Offset);
+ return Alignment;
+ }
+
+ /// \brief Check if this slice can be rewritten with legal operations.
+ bool isLegal() const {
+ // An invalid slice is not legal.
+ if (!Origin || !Inst || !DAG)
+ return false;
+
+ // Offsets are for indexed load only, we do not handle that.
+ if (Origin->getOffset().getOpcode() != ISD::UNDEF)
+ return false;
+
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+
+ // Check that the type is legal.
+ EVT SliceType = getLoadedType();
+ if (!TLI.isTypeLegal(SliceType))
+ return false;
+
+ // Check that the load is legal for this type.
+ if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
+ return false;
+
+ // Check that the offset can be computed.
+ // 1. Check its type.
+ EVT PtrType = Origin->getBasePtr().getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ return false;
+
+ // 2. Check that it fits in the immediate.
+ if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
+ return false;
+
+ // 3. Check that the computation is legal.
+ if (!TLI.isOperationLegal(ISD::ADD, PtrType))
+ return false;
+
+ // Check that the zext is legal if it needs one.
+ EVT TruncateType = Inst->getValueType(0);
+ if (TruncateType != SliceType &&
+ !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
+ return false;
+
+ return true;
+ }
+
+ /// \brief Get the offset in bytes of this slice in the original chunk of
+ /// bits.
+ /// \pre DAG != NULL.
+ uint64_t getOffsetFromBase() const {
+ assert(DAG && "Missing context.");
+ bool IsBigEndian =
+ DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian();
+ assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
+ uint64_t Offset = Shift / 8;
+ unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
+ assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
+ "The size of the original loaded type is not a multiple of a"
+ " byte.");
+ // If Offset is bigger than TySizeInBytes, it means we are loading all
+ // zeros. This should have been optimized before in the process.
+ assert(TySizeInBytes > Offset &&
+ "Invalid shift amount for given loaded size");
+ if (IsBigEndian)
+ Offset = TySizeInBytes - Offset - getLoadedSize();
+ return Offset;
+ }
+
+ /// \brief Generate the sequence of instructions to load the slice
+ /// represented by this object and redirect the uses of this slice to
+ /// this new sequence of instructions.
+ /// \pre this->Inst && this->Origin are valid Instructions and this
+ /// object passed the legal check: LoadedSlice::isLegal returned true.
+ /// \return The last instruction of the sequence used to load the slice.
+ SDValue loadSlice() const {
+ assert(Inst && Origin && "Unable to replace a non-existing slice.");
+ const SDValue &OldBaseAddr = Origin->getBasePtr();
+ SDValue BaseAddr = OldBaseAddr;
+ // Get the offset in that chunk of bytes w.r.t. the endianess.
+ int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
+ assert(Offset >= 0 && "Offset too big to fit in int64_t!");
+ if (Offset) {
+ // BaseAddr = BaseAddr + Offset.
+ EVT ArithType = BaseAddr.getValueType();
+ BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr,
+ DAG->getConstant(Offset, ArithType));
+ }
+
+ // Create the type of the loaded slice according to its size.
+ EVT SliceType = getLoadedType();
+
+ // Create the load for the slice.
+ SDValue LastInst = DAG->getLoad(
+ SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
+ Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
+ Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
+ // If the final type is not the same as the loaded type, this means that
+ // we have to pad with zero. Create a zero extend for that.
+ EVT FinalType = Inst->getValueType(0);
+ if (SliceType != FinalType)
+ LastInst =
+ DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
+ return LastInst;
+ }
+
+ /// \brief Check if this slice can be merged with an expensive cross register
+ /// bank copy. E.g.,
+ /// i = load i32
+ /// f = bitcast i32 i to float
+ bool canMergeExpensiveCrossRegisterBankCopy() const {
+ if (!Inst || !Inst->hasOneUse())
+ return false;
+ SDNode *Use = *Inst->use_begin();
+ if (Use->getOpcode() != ISD::BITCAST)
+ return false;
+ assert(DAG && "Missing context");
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ EVT ResVT = Use->getValueType(0);
+ const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
+ const TargetRegisterClass *ArgRC =
+ TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
+ if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
+ return false;
+
+ // At this point, we know that we perform a cross-register-bank copy.
+ // Check if it is expensive.
+ const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo();
+ // Assume bitcasts are cheap, unless both register classes do not
+ // explicitly share a common sub class.
+ if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
+ return false;
+
+ // Check if it will be merged with the load.
+ // 1. Check the alignment constraint.
+ unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment(
+ ResVT.getTypeForEVT(*DAG->getContext()));
+
+ if (RequiredAlignment > getAlignment())
+ return false;
+
+ // 2. Check that the load is a legal operation for that type.
+ if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
+ return false;
+
+ // 3. Check that we do not have a zext in the way.
+ if (Inst->getValueType(0) != getLoadedType())
+ return false;
+
+ return true;
+ }
+};
+}
+
+/// \brief Sorts LoadedSlice according to their offset.
+struct LoadedSliceSorter {
+ bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) {
+ assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
+ return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
+ }
+};
+
+/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
+/// \p UsedBits looks like 0..0 1..1 0..0.
+static bool areUsedBitsDense(const APInt &UsedBits) {
+ // If all the bits are one, this is dense!
+ if (UsedBits.isAllOnesValue())
+ return true;
+
+ // Get rid of the unused bits on the right.
+ APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
+ // Get rid of the unused bits on the left.
+ if (NarrowedUsedBits.countLeadingZeros())
+ NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
+ // Check that the chunk of bits is completely used.
+ return NarrowedUsedBits.isAllOnesValue();
+}
+
+/// \brief Check whether or not \p First and \p Second are next to each other
+/// in memory. This means that there is no hole between the bits loaded
+/// by \p First and the bits loaded by \p Second.
+static bool areSlicesNextToEachOther(const LoadedSlice &First,
+ const LoadedSlice &Second) {
+ assert(First.Origin == Second.Origin && First.Origin &&
+ "Unable to match different memory origins.");
+ APInt UsedBits = First.getUsedBits();
+ assert((UsedBits & Second.getUsedBits()) == 0 &&
+ "Slices are not supposed to overlap.");
+ UsedBits |= Second.getUsedBits();
+ return areUsedBitsDense(UsedBits);
+}
+
+/// \brief Adjust the \p GlobalLSCost according to the target
+/// paring capabilities and the layout of the slices.
+/// \pre \p GlobalLSCost should account for at least as many loads as
+/// there is in the slices in \p LoadedSlices.
+static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
+ LoadedSlice::Cost &GlobalLSCost) {
+ unsigned NumberOfSlices = LoadedSlices.size();
+ // If there is less than 2 elements, no pairing is possible.
+ if (NumberOfSlices < 2)
+ return;
+
+ // Sort the slices so that elements that are likely to be next to each
+ // other in memory are next to each other in the list.
+ std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter());
+ const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
+ // First (resp. Second) is the first (resp. Second) potentially candidate
+ // to be placed in a paired load.
+ const LoadedSlice *First = NULL;
+ const LoadedSlice *Second = NULL;
+ for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
+ // Set the beginning of the pair.
+ First = Second) {
+
+ Second = &LoadedSlices[CurrSlice];
+
+ // If First is NULL, it means we start a new pair.
+ // Get to the next slice.
+ if (!First)
+ continue;
+
+ EVT LoadedType = First->getLoadedType();
+
+ // If the types of the slices are different, we cannot pair them.
+ if (LoadedType != Second->getLoadedType())
+ continue;
+
+ // Check if the target supplies paired loads for this type.
+ unsigned RequiredAlignment = 0;
+ if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
+ // move to the next pair, this type is hopeless.
+ Second = NULL;
+ continue;
+ }
+ // Check if we meet the alignment requirement.
+ if (RequiredAlignment > First->getAlignment())
+ continue;
+
+ // Check that both loads are next to each other in memory.
+ if (!areSlicesNextToEachOther(*First, *Second))
+ continue;
+
+ assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
+ --GlobalLSCost.Loads;
+ // Move to the next pair.
+ Second = NULL;
+ }
+}
+
+/// \brief Check the profitability of all involved LoadedSlice.
+/// Currently, it is considered profitable if there is exactly two
+/// involved slices (1) which are (2) next to each other in memory, and
+/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
+///
+/// Note: The order of the elements in \p LoadedSlices may be modified, but not
+/// the elements themselves.
+///
+/// FIXME: When the cost model will be mature enough, we can relax
+/// constraints (1) and (2).
+static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
+ const APInt &UsedBits, bool ForCodeSize) {
+ unsigned NumberOfSlices = LoadedSlices.size();
+ if (StressLoadSlicing)
+ return NumberOfSlices > 1;
+
+ // Check (1).
+ if (NumberOfSlices != 2)
+ return false;
+
+ // Check (2).
+ if (!areUsedBitsDense(UsedBits))
+ return false;
+
+ // Check (3).
+ LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
+ // The original code has one big load.
+ OrigCost.Loads = 1;
+ for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
+ const LoadedSlice &LS = LoadedSlices[CurrSlice];
+ // Accumulate the cost of all the slices.
+ LoadedSlice::Cost SliceCost(LS, ForCodeSize);
+ GlobalSlicingCost += SliceCost;
+
+ // Account as cost in the original configuration the gain obtained
+ // with the current slices.
+ OrigCost.addSliceGain(LS);
+ }
+
+ // If the target supports paired load, adjust the cost accordingly.
+ adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
+ return OrigCost > GlobalSlicingCost;
+}
+
+/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
+/// operations, split it in the various pieces being extracted.
+///
+/// This sort of thing is introduced by SROA.
+/// This slicing takes care not to insert overlapping loads.
+/// \pre LI is a simple load (i.e., not an atomic or volatile load).
+bool DAGCombiner::SliceUpLoad(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
+ !LD->getValueType(0).isInteger())
+ return false;
+
+ // Keep track of already used bits to detect overlapping values.
+ // In that case, we will just abort the transformation.
+ APInt UsedBits(LD->getValueSizeInBits(0), 0);
+
+ SmallVector<LoadedSlice, 4> LoadedSlices;
+
+ // Check if this load is used as several smaller chunks of bits.
+ // Basically, look for uses in trunc or trunc(lshr) and record a new chain
+ // of computation for each trunc.
+ for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
+ UI != UIEnd; ++UI) {
+ // Skip the uses of the chain.
+ if (UI.getUse().getResNo() != 0)
+ continue;
+
+ SDNode *User = *UI;
+ unsigned Shift = 0;
+
+ // Check if this is a trunc(lshr).
+ if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
+ isa<ConstantSDNode>(User->getOperand(1))) {
+ Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
+ User = *User->use_begin();
+ }
+
+ // At this point, User is a Truncate, iff we encountered, trunc or
+ // trunc(lshr).
+ if (User->getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ // The width of the type must be a power of 2 and greater than 8-bits.
+ // Otherwise the load cannot be represented in LLVM IR.
+ // Moreover, if we shifted with a non 8-bits multiple, the slice
+ // will be accross several bytes. We do not support that.
+ unsigned Width = User->getValueSizeInBits(0);
+ if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
+ return 0;
+
+ // Build the slice for this chain of computations.
+ LoadedSlice LS(User, LD, Shift, &DAG);
+ APInt CurrentUsedBits = LS.getUsedBits();
+
+ // Check if this slice overlaps with another.
+ if ((CurrentUsedBits & UsedBits) != 0)
+ return false;
+ // Update the bits used globally.
+ UsedBits |= CurrentUsedBits;
+
+ // Check if the new slice would be legal.
+ if (!LS.isLegal())
+ return false;
+
+ // Record the slice.
+ LoadedSlices.push_back(LS);
+ }
+
+ // Abort slicing if it does not seem to be profitable.
+ if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
+ return false;
+
+ ++SlicedLoads;
+
+ // Rewrite each chain to use an independent load.
+ // By construction, each chain can be represented by a unique load.
+
+ // Prepare the argument for the new token factor for all the slices.
+ SmallVector<SDValue, 8> ArgChains;
+ for (SmallVectorImpl<LoadedSlice>::const_iterator
+ LSIt = LoadedSlices.begin(),
+ LSItEnd = LoadedSlices.end();
+ LSIt != LSItEnd; ++LSIt) {
+ SDValue SliceInst = LSIt->loadSlice();
+ CombineTo(LSIt->Inst, SliceInst, true);
+ if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
+ SliceInst = SliceInst.getOperand(0);
+ assert(SliceInst->getOpcode() == ISD::LOAD &&
+ "It takes more than a zext to get to the loaded slice!!");
+ ArgChains.push_back(SliceInst.getValue(1));
+ }
+
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
+ &ArgChains[0], ArgChains.size());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ return true;
+}
+
/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
/// load is having specific bytes cleared out. If so, return the byte size
/// being masked out and the shift amount.
@@ -7735,7 +8363,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff),
LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), NewAlign);
+ LD->isInvariant(), NewAlign,
+ LD->getTBAAInfo());
SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, SDLoc(N),
@@ -7846,17 +8475,28 @@ struct BaseIndexOffset {
static BaseIndexOffset match(SDValue Ptr) {
bool IsIndexSignExt = false;
- // Just Base or possibly anything else.
+ // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
+ // instruction, then it could be just the BASE or everything else we don't
+ // know how to handle. Just use Ptr as BASE and give up.
if (Ptr->getOpcode() != ISD::ADD)
return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
- // Base + offset.
+ // We know that we have at least an ADD instruction. Try to pattern match
+ // the simple case of BASE + OFFSET.
if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
IsIndexSignExt);
}
+ // Inside a loop the current BASE pointer is calculated using an ADD and a
+ // MUL instruction. In this case Ptr is the actual BASE pointer.
+ // (i64 add (i64 %array_ptr)
+ // (i64 mul (i64 %induction_var)
+ // (i64 %element_size)))
+ if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
// Look at Base + Index + Offset cases.
SDValue Base = Ptr->getOperand(0);
SDValue IndexOffset = Ptr->getOperand(1);
@@ -8007,6 +8647,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
Index = STn;
break;
} else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+ if (Ldn->isVolatile()) {
+ Index = NULL;
+ break;
+ }
+
// Save the load node for later. Continue the scan.
AliasLoadNodes.push_back(Ldn);
NextInChain = Ldn->getChain().getNode();
@@ -8384,7 +9029,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
Ptr, ST->getPointerInfo(), ST->isVolatile(),
- ST->isNonTemporal(), OrigAlign);
+ ST->isNonTemporal(), OrigAlign,
+ ST->getTBAAInfo());
}
// Turn 'store undef, Ptr' -> nothing.
@@ -8399,7 +9045,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// transform should not be done in this case.
if (Value.getOpcode() != ISD::TargetConstantFP) {
SDValue Tmp;
- switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (CFP->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unknown FP type");
case MVT::f16: // We don't do this for these yet.
case MVT::f80:
@@ -8412,8 +9058,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
bitcastToAPInt().getZExtValue(), MVT::i32);
return DAG.getStore(Chain, SDLoc(N), Tmp,
- Ptr, ST->getPointerInfo(), ST->isVolatile(),
- ST->isNonTemporal(), ST->getAlignment());
+ Ptr, ST->getMemOperand());
}
break;
case MVT::f64:
@@ -8423,8 +9068,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
getZExtValue(), MVT::i64);
return DAG.getStore(Chain, SDLoc(N), Tmp,
- Ptr, ST->getPointerInfo(), ST->isVolatile(),
- ST->isNonTemporal(), ST->getAlignment());
+ Ptr, ST->getMemOperand());
}
if (!ST->isVolatile() &&
@@ -8440,18 +9084,19 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
+ const MDNode *TBAAInfo = ST->getTBAAInfo();
SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal,
- ST->getAlignment());
+ ST->getAlignment(), TBAAInfo);
Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
Ptr, ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal,
- Alignment);
+ Alignment, TBAAInfo);
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
St0, St1);
}
@@ -8467,7 +9112,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (Align > ST->getAlignment())
return DAG.getTruncStore(Chain, SDLoc(N), Value,
Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
- ST->isVolatile(), ST->isNonTemporal(), Align);
+ ST->isVolatile(), ST->isNonTemporal(), Align,
+ ST->getTBAAInfo());
}
}
@@ -8477,7 +9123,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (NewST.getNode())
return NewST;
- if (CombinerAA) {
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
+ TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ if (UseAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -8488,14 +9136,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Replace the chain to avoid dependency.
if (ST->isTruncatingStore()) {
ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr,
- ST->getPointerInfo(),
- ST->getMemoryVT(), ST->isVolatile(),
- ST->isNonTemporal(), ST->getAlignment());
+ ST->getMemoryVT(), ST->getMemOperand());
} else {
ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr,
- ST->getPointerInfo(),
- ST->isVolatile(), ST->isNonTemporal(),
- ST->getAlignment());
+ ST->getMemOperand());
}
// Create token to keep both nodes around.
@@ -8528,9 +9172,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
AddToWorkList(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
- Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
- ST->isVolatile(), ST->isNonTemporal(),
- ST->getAlignment());
+ Ptr, ST->getMemoryVT(), ST->getMemOperand());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
@@ -8561,9 +9203,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
ST->getMemoryVT())) {
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
- Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
- ST->isVolatile(), ST->isNonTemporal(),
- ST->getAlignment());
+ Ptr, ST->getMemoryVT(), ST->getMemOperand());
}
// Only perform this optimization before the types are legal, because we
@@ -8821,13 +9461,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
? ISD::ZEXTLOAD : ISD::EXTLOAD;
Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
- LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
+ LVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ Align, LN0->getTBAAInfo());
Chain = Load.getValue(1);
} else {
Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), Align);
+ LN0->isInvariant(), Align, LN0->getTBAAInfo());
Chain = Load.getValue(1);
if (NVT.bitsLT(LVT))
Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
@@ -9165,8 +9806,35 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return N->getOperand(0);
// Check if all of the operands are undefs.
+ EVT VT = N->getValueType(0);
if (ISD::allOperandsUndef(N))
- return DAG.getUNDEF(N->getValueType(0));
+ return DAG.getUNDEF(VT);
+
+ // Optimize concat_vectors where one of the vectors is undef.
+ if (N->getNumOperands() == 2 &&
+ N->getOperand(1)->getOpcode() == ISD::UNDEF) {
+ SDValue In = N->getOperand(0);
+ assert(In.getValueType().isVector() && "Must concat vectors");
+
+ // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
+ if (In->getOpcode() == ISD::BITCAST &&
+ !In->getOperand(0)->getValueType(0).isVector()) {
+ SDValue Scalar = In->getOperand(0);
+ EVT SclTy = Scalar->getValueType(0);
+
+ if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
+ return SDValue();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
+ VT.getSizeInBits() / SclTy.getSizeInBits());
+ if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
+ return SDValue();
+
+ SDLoc dl = SDLoc(N);
+ SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Res);
+ }
+ }
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes.
@@ -9225,7 +9893,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// (extract_subvec (concat V1, V2, ...), i)
// Into:
// Vi if possible
- // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same
+ // type.
if (V->getOperand(0).getValueType() != NVT)
return SDValue();
unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
@@ -9358,10 +10027,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx >= 0) {
- if (Idx < (int)NumElts)
- Idx += NumElts;
- else
+ if (Idx >= (int)NumElts)
Idx -= NumElts;
+ else
+ Idx = -1; // remove reference to lhs
}
NewMask.push_back(Idx);
}
@@ -9738,7 +10407,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
Load = DAG.getLoad(TheSelect->getValueType(0),
SDLoc(TheSelect),
- // FIXME: Discards pointer info.
+ // FIXME: Discards pointer and TBAA info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->isVolatile(), LLD->isNonTemporal(),
LLD->isInvariant(), LLD->getAlignment());
@@ -9747,7 +10416,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
RLD->getExtensionType() : LLD->getExtensionType(),
SDLoc(TheSelect),
TheSelect->getValueType(0),
- // FIXME: Discards pointer info.
+ // FIXME: Discards pointer and TBAA info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->getMemoryVT(), LLD->isVolatile(),
LLD->isNonTemporal(), LLD->getAlignment());
@@ -9852,7 +10521,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
Cond, One, Zero);
AddToWorkList(CstOffset.getNode());
- CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+ CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
CstOffset);
AddToWorkList(CPIdx.getNode());
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
@@ -9974,9 +10643,10 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
return Temp;
// shl setcc result by log2 n2c
- return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
- DAG.getConstant(N2C->getAPIntValue().logBase2(),
- getShiftAmountTy(Temp.getValueType())));
+ return DAG.getNode(
+ ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ getShiftAmountTy(Temp.getValueType())));
}
}
@@ -10132,17 +10802,20 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
-bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
+bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
const MDNode *TBAAInfo1,
- SDValue Ptr2, int64_t Size2,
+ SDValue Ptr2, int64_t Size2, bool IsVolatile2,
const Value *SrcValue2, int SrcValueOffset2,
unsigned SrcValueAlign2,
const MDNode *TBAAInfo2) const {
// If they are the same then they must be aliases.
if (Ptr1 == Ptr2) return true;
+ // If they are both volatile then they cannot be reordered.
+ if (IsVolatile1 && IsVolatile2) return true;
+
// Gather base node and offset information.
SDValue Base1, Base2;
int64_t Offset1, Offset2;
@@ -10187,7 +10860,9 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
return false;
}
- if (CombinerGlobalAA) {
+ bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA :
+ TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ if (UseAA && SrcValue1 && SrcValue2) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
@@ -10206,24 +10881,25 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
SDValue Ptr0, Ptr1;
int64_t Size0, Size1;
+ bool IsVolatile0, IsVolatile1;
const Value *SrcValue0, *SrcValue1;
int SrcValueOffset0, SrcValueOffset1;
unsigned SrcValueAlign0, SrcValueAlign1;
const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
- FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0,
+ FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
SrcValueAlign0, SrcTBAAInfo0);
- FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1,
+ FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
SrcValueAlign1, SrcTBAAInfo1);
- return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0,
+ return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
SrcValueAlign0, SrcTBAAInfo0,
- Ptr1, Size1, SrcValue1, SrcValueOffset1,
+ Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
SrcValueAlign1, SrcTBAAInfo1);
}
/// FindAliasInfo - Extracts the relevant alias information from the memory
-/// node. Returns true if the operand was a load.
+/// node. Returns true if the operand was a nonvolatile load.
bool DAGCombiner::FindAliasInfo(SDNode *N,
- SDValue &Ptr, int64_t &Size,
+ SDValue &Ptr, int64_t &Size, bool &IsVolatile,
const Value *&SrcValue,
int &SrcValueOffset,
unsigned &SrcValueAlign,
@@ -10232,11 +10908,12 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
Ptr = LS->getBasePtr();
Size = LS->getMemoryVT().getSizeInBits() >> 3;
+ IsVolatile = LS->isVolatile();
SrcValue = LS->getSrcValue();
SrcValueOffset = LS->getSrcValueOffset();
SrcValueAlign = LS->getOriginalAlignment();
TBAAInfo = LS->getTBAAInfo();
- return isa<LoadSDNode>(LS);
+ return isa<LoadSDNode>(LS) && !IsVolatile;
}
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
@@ -10249,12 +10926,13 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
// Get alias information for node.
SDValue Ptr;
int64_t Size;
+ bool IsVolatile;
const Value *SrcValue;
int SrcValueOffset;
unsigned SrcValueAlign;
const MDNode *SrcTBAAInfo;
- bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
- SrcValueAlign, SrcTBAAInfo);
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue,
+ SrcValueOffset, SrcValueAlign, SrcTBAAInfo);
// Starting off.
Chains.push_back(OriginalChain);
@@ -10295,20 +10973,21 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
// Get alias information for Chain.
SDValue OpPtr;
int64_t OpSize;
+ bool OpIsVolatile;
const Value *OpSrcValue;
int OpSrcValueOffset;
unsigned OpSrcValueAlign;
const MDNode *OpSrcTBAAInfo;
bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
- OpSrcValue, OpSrcValueOffset,
+ OpIsVolatile, OpSrcValue, OpSrcValueOffset,
OpSrcValueAlign,
OpSrcTBAAInfo);
// If chain is alias then stop here.
if (!(IsLoad && IsOpLoad) &&
- isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
- SrcTBAAInfo,
- OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+ isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset,
+ SrcValueAlign, SrcTBAAInfo,
+ OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset,
OpSrcValueAlign, OpSrcTBAAInfo)) {
Aliases.push_back(Chain);
} else {
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index b4ac948f..a6f7461 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -638,29 +638,25 @@ bool FastISel::SelectCall(const User *I) {
(!isa<AllocaInst>(Address) ||
!FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address),
- false);
+ false);
- if (Op)
+ if (Op) {
if (Op->isReg()) {
- // Set the indirect flag if the type and the DIVariable's
- // indirect field are in disagreement: Indirectly-addressed
- // variables that are nonpointer types should be marked as
- // indirect, and VLAs should be marked as indirect eventhough
- // they are a pointer type.
- bool IsIndirect = DI->getAddress()->getType()->isPointerTy()
- ^ DIVar.isIndirect();
Op->setIsDebug(true);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(TargetOpcode::DBG_VALUE),
- IsIndirect, Op->getReg(), Offset, DI->getVariable());
+ TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0,
+ DI->getVariable());
} else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(TargetOpcode::DBG_VALUE)).addOperand(*Op).addImm(0)
- .addMetadata(DI->getVariable());
- else
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addOperand(*Op)
+ .addImm(0)
+ .addMetadata(DI->getVariable());
+ } else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ }
return true;
}
case Intrinsic::dbg_value: {
@@ -688,6 +684,7 @@ bool FastISel::SelectCall(const User *I) {
.addFPImm(CF).addImm(DI->getOffset())
.addMetadata(DI->getVariable());
} else if (unsigned Reg = lookUpRegForValue(V)) {
+ // FIXME: This does not handle register-indirect values at offset 0.
bool IsIndirect = DI->getOffset() != 0;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect,
Reg, DI->getOffset(), DI->getVariable());
@@ -1574,4 +1571,19 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI);
}
+bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
+ // Must be an add.
+ if (!isa<AddOperator>(Add))
+ return false;
+ // Type size needs to match.
+ if (TD.getTypeSizeInBits(GEP->getType()) !=
+ TD.getTypeSizeInBits(Add->getType()))
+ return false;
+ // Must be in the same basic block.
+ if (isa<Instruction>(Add) &&
+ FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB)
+ return false;
+ // Must have a constant operand.
+ return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
+}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index e107276..3a8fb85 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -211,6 +212,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
"IMPLICIT_DEF should have been handled as a special case elsewhere!");
+ unsigned NumResults = CountResults(Node);
for (unsigned i = 0; i < II.getNumDefs(); ++i) {
// If the specific node value is only used by a CopyToReg and the dest reg
// is a vreg in the same register class, use the CopyToReg'd destination
@@ -218,6 +220,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
unsigned VRBase = 0;
const TargetRegisterClass *RC =
TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
+ // If the register class is unknown for the given definition, then try to
+ // infer one from the value type.
+ if (!RC && i < NumResults)
+ RC = TLI->getRegClassFor(Node->getSimpleValueType(i));
if (II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
unsigned NumResults = CountResults(Node);
@@ -722,10 +728,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
const MCInstrDesc &II = TII->get(Opc);
unsigned NumResults = CountResults(Node);
+ unsigned NumDefs = II.getNumDefs();
+ const uint16_t *ScratchRegs = NULL;
+
+ // Handle PATCHPOINT specially and then use the generic code.
+ if (Opc == TargetOpcode::PATCHPOINT) {
+ unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos);
+ NumDefs = NumResults;
+ ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
+ }
+
unsigned NumImpUses = 0;
unsigned NodeOperands =
- countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses);
- bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
+ countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses);
+ bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0;
#ifndef NDEBUG
unsigned NumMIOperands = NodeOperands + NumResults;
if (II.isVariadic())
@@ -748,14 +764,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Emit all of the actual operands of this instruction, adding them to the
// instruction as appropriate.
- bool HasOptPRefs = II.getNumDefs() > NumResults;
+ bool HasOptPRefs = NumDefs > NumResults;
assert((!HasOptPRefs || !HasPhysRegOuts) &&
"Unable to cope with optional defs and phys regs defs!");
- unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+ unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0;
for (unsigned i = NumSkip; i != NodeOperands; ++i)
- AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+ AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II,
VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
+ // Add scratch registers as implicit def and early clobber
+ if (ScratchRegs)
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine |
+ RegState::EarlyClobber);
+
// Transfer all of the memory reference descriptions of this instruction.
MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
cast<MachineSDNode>(Node)->memoperands_end());
@@ -784,8 +806,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Additional results must be physical register defs.
if (HasPhysRegOuts) {
- for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
- unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+ for (unsigned i = NumDefs; i < NumResults; ++i) {
+ unsigned Reg = II.getImplicitDefs()[i - NumDefs];
if (!Node->hasAnyUseOfValue(i))
continue;
// This implicitly defined physreg has a use.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bd844e5..9061ae9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -95,8 +95,8 @@ private:
SDValue N1, SDValue N2,
ArrayRef<int> Mask) const;
- void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
- SDLoc dl);
+ bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+ bool &NeedInvert, SDLoc dl);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
@@ -311,6 +311,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
SDValue Val = ST->getValue();
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
+ unsigned AS = ST->getAddressSpace();
+
SDLoc dl(ST);
if (ST->getMemoryVT().isFloatingPoint() ||
ST->getMemoryVT().isVector()) {
@@ -343,7 +345,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
SDValue Store = DAG.getTruncStore(Chain, dl,
Val, StackPtr, MachinePointerInfo(),
StoredVT, false, false, 0);
- SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy(AS));
SmallVector<SDValue, 8> Stores;
unsigned Offset = 0;
@@ -381,7 +383,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
.getWithOffset(Offset),
MemVT, ST->isVolatile(),
ST->isNonTemporal(),
- MinAlign(ST->getAlignment(), Offset)));
+ MinAlign(ST->getAlignment(), Offset),
+ ST->getTBAAInfo()));
// The order of the stores doesn't matter - say it with a TokenFactor.
SDValue Result =
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
@@ -408,13 +411,14 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
ST->getPointerInfo(), NewStoredVT,
ST->isVolatile(), ST->isNonTemporal(), Alignment);
+
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ DAG.getConstant(IncrementSize, TLI.getPointerTy(AS)));
Alignment = MinAlign(Alignment, IncrementSize);
Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
- Alignment);
+ Alignment, ST->getTBAAInfo());
SDValue Result =
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
@@ -438,10 +442,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) {
// Expand to a (misaligned) integer load of the same size,
// then bitconvert to floating point or vector.
- SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
- LD->isVolatile(),
- LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
+ LD->getMemOperand());
SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
if (LoadedVT != VT)
Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
@@ -474,7 +476,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
LD->getPointerInfo().getWithOffset(Offset),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(),
- MinAlign(LD->getAlignment(), Offset));
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getTBAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
MachinePointerInfo(), false, false, 0));
@@ -492,7 +495,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
LD->getPointerInfo().getWithOffset(Offset),
MemVT, LD->isVolatile(),
LD->isNonTemporal(),
- MinAlign(LD->getAlignment(), Offset));
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getTBAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
// On big-endian machines this requires a truncating store to ensure
// that the bits end up in the right place.
@@ -536,23 +540,25 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
if (TLI.isLittleEndian()) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), Alignment);
+ LD->isNonTemporal(), Alignment, LD->getTBAAInfo());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+ LD->isNonTemporal(), MinAlign(Alignment, IncrementSize),
+ LD->getTBAAInfo());
} else {
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), Alignment);
+ LD->isNonTemporal(), Alignment, LD->getTBAAInfo());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+ LD->isNonTemporal(), MinAlign(Alignment, IncrementSize),
+ LD->getTBAAInfo());
}
// aggregate the two parts
@@ -655,6 +661,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
+ const MDNode *TBAAInfo = ST->getTBAAInfo();
SDLoc dl(ST);
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
if (CFP->getValueType(0) == MVT::f32 &&
@@ -663,7 +670,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
bitcastToAPInt().zextOrTrunc(32),
MVT::i32);
return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
}
if (CFP->getValueType(0) == MVT::f64) {
@@ -672,7 +679,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), MVT::i64);
return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
}
if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
@@ -685,12 +692,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
if (TLI.isBigEndian()) std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment);
+ isNonTemporal, Alignment, TBAAInfo);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(4));
+ DAG.getConstant(4, Ptr.getValueType()));
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
- isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
+ isVolatile, isNonTemporal, MinAlign(Alignment, 4U),
+ TBAAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -708,6 +716,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
+ const MDNode *TBAAInfo = ST->getTBAAInfo();
if (!ST->isTruncatingStore()) {
if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
@@ -745,7 +754,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDValue Result =
DAG.getStore(Chain, dl, Value, Ptr,
ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment);
+ isNonTemporal, Alignment, TBAAInfo);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -767,7 +776,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Value = DAG.getZeroExtendInReg(Value, dl, StVT);
SDValue Result =
DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
+ NVT, isVolatile, isNonTemporal, Alignment,
+ TBAAInfo);
ReplaceNode(SDValue(Node, 0), Result);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
@@ -788,19 +798,20 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Store the bottom RoundWidth bits.
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
RoundVT,
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment,
+ TBAAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(RoundWidth,
TLI.getShiftAmountTy(Value.getValueType())));
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
} else {
// Big endian - avoid unaligned stores.
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
@@ -809,16 +820,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
DAG.getConstant(ExtraWidth,
TLI.getShiftAmountTy(Value.getValueType())));
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
- RoundVT, isVolatile, isNonTemporal, Alignment);
+ RoundVT, isVolatile, isNonTemporal, Alignment,
+ TBAAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
}
// The order of the stores doesn't matter.
@@ -854,7 +866,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
SDValue Result =
DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -902,9 +914,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
"Can only promote loads to same size type");
- SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
+ SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand());
RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res);
RChain = Res.getValue(1);
break;
@@ -924,6 +934,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ const MDNode *TBAAInfo = LD->getTBAAInfo();
if (SrcWidth != SrcVT.getStoreSizeInBits() &&
// Some targets pretend to have an i1 loading operation, and actually
@@ -950,7 +961,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
SDValue Result =
DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
Chain, Ptr, LD->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
+ NVT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
Ch = Result.getValue(1); // The chain.
@@ -987,16 +998,16 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
Chain, Ptr,
LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment);
+ isNonTemporal, Alignment, TBAAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -1016,17 +1027,17 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the top RoundWidth bits.
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment);
+ isNonTemporal, Alignment, TBAAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -1079,9 +1090,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
case TargetLowering::Expand:
if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
- LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
+ LD->getMemOperand());
unsigned ExtendOp;
switch (ExtType) {
case ISD::EXTLOAD:
@@ -1109,9 +1118,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Turn the unsupported load into an EXTLOAD followed by an explicit
// zero/sign extend inreg.
SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
- Chain, Ptr, LD->getPointerInfo(), SrcVT,
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ Chain, Ptr, SrcVT,
+ LD->getMemOperand());
SDValue ValRes;
if (ExtType == ISD::SEXTLOAD)
ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
@@ -1386,11 +1394,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
DAG.getConstant(EltSize, Idx.getValueType()));
- if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
- Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
- else
- Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
-
+ Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
if (Op.getValueType().isVector())
@@ -1428,11 +1432,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
DAG.getConstant(EltSize, Idx.getValueType()));
-
- if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
- Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
- else
- Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+ Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
StackPtr);
@@ -1531,7 +1531,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
- LoadPtr, DAG.getIntPtrConstant(ByteOffset));
+ LoadPtr,
+ DAG.getConstant(ByteOffset, LoadPtr.getValueType()));
// Load a legal integer containing the sign bit.
SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
false, false, false, 0);
@@ -1580,10 +1581,10 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
- if (Align > StackAlign)
- SP = DAG.getNode(ISD::AND, dl, VT, SP,
- DAG.getConstant(-(uint64_t)Align, VT));
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ if (Align > StackAlign)
+ Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
+ DAG.getConstant(-(uint64_t)Align, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
@@ -1595,22 +1596,44 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
}
/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
-/// condition code CC on the current target. This routine expands SETCC with
-/// illegal condition code into AND / OR of multiple SETCC values.
-void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
+/// condition code CC on the current target.
+///
+/// If the SETCC has been legalized using AND / OR, then the legalized node
+/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
+/// will be set to false.
+///
+/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
+/// then the values of LHS and RHS will be swapped, CC will be set to the
+/// new condition, and NeedInvert will be set to false.
+///
+/// If the SETCC has been legalized using the inverse condcode, then LHS and
+/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert
+/// will be set to true. The caller must invert the result of the SETCC with
+/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a
+/// true/false result.
+///
+/// \returns true if the SetCC has been legalized, false if it hasn't.
+bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
SDValue &LHS, SDValue &RHS,
SDValue &CC,
+ bool &NeedInvert,
SDLoc dl) {
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ NeedInvert = false;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
// Nothing to do.
break;
case TargetLowering::Expand: {
+ ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
+ if (TLI.isCondCodeLegal(InvCC, OpVT)) {
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(InvCC);
+ return true;
+ }
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
- ISD::CondCode InvCC = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
default: llvm_unreachable("Don't know how to expand this condition!");
@@ -1650,18 +1673,21 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETLT:
+ // We only support using the inverted operation, which is computed above
+ // and not a different manner of supporting expanding these cases.
+ llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETNE:
case ISD::SETEQ:
- InvCC = ISD::getSetCCSwappedOperands(CCCode);
- if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) {
- // We only support using the inverted operation and not a
- // different manner of supporting expanding these cases.
- llvm_unreachable("Don't know how to expand this condition!");
+ // Try inverting the result of the inverse condition.
+ InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ;
+ if (TLI.isCondCodeLegal(InvCC, OpVT)) {
+ CC = DAG.getCondCode(InvCC);
+ NeedInvert = true;
+ return true;
}
- LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC);
- RHS = SDValue();
- CC = SDValue();
- return;
+ // If inverting the condition didn't work then we have no means to expand
+ // the condition.
+ llvm_unreachable("Don't know how to expand this condition!");
}
SDValue SetCC1, SetCC2;
@@ -1678,9 +1704,10 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
RHS = SDValue();
CC = SDValue();
- break;
+ return true;
}
}
+ return false;
}
/// EmitStackConvert - Emit a store/load combination to the stack. This stores
@@ -1969,7 +1996,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128) {
RTLIB::Libcall LC;
- switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = Call_F32; break;
case MVT::f64: LC = Call_F64; break;
@@ -1987,7 +2014,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128) {
RTLIB::Libcall LC;
- switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC = Call_I8; break;
case MVT::i16: LC = Call_I16; break;
@@ -2002,7 +2029,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
const TargetLowering &TLI) {
RTLIB::Libcall LC;
- switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
@@ -2049,7 +2076,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
bool isSigned = Opcode == ISD::SDIVREM;
RTLIB::Libcall LC;
- switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
@@ -2106,7 +2133,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
/// isSinCosLibcallAvailable - Return true if sincos libcall is available.
static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
RTLIB::Libcall LC;
- switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = RTLIB::SINCOS_F32; break;
case MVT::f64: LC = RTLIB::SINCOS_F64; break;
@@ -2156,7 +2183,7 @@ void
SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
RTLIB::Libcall LC;
- switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+ switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = RTLIB::SINCOS_F32; break;
case MVT::f64: LC = RTLIB::SINCOS_F64; break;
@@ -2232,11 +2259,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
// word offset constant for Hi/Lo address computation
- SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+ SDValue WordOff = DAG.getConstant(sizeof(int), StackSlot.getValueType());
// set up Hi and Lo (into buffer) address based on endian
SDValue Hi = StackSlot;
- SDValue Lo = DAG.getNode(ISD::ADD, dl,
- TLI.getPointerTy(), StackSlot, WordOff);
+ SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(),
+ StackSlot, WordOff);
if (TLI.isLittleEndian())
std::swap(Hi, Lo);
@@ -2382,7 +2409,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
// as a negative number. To counteract this, the dynamic code adds an
// offset depending on the data type.
uint64_t FF;
- switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+ switch (Op0.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unsupported integer type!");
case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
@@ -2395,7 +2422,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
+ CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset);
Alignment = std::min(Alignment, 4u);
SDValue FudgeInReg;
if (DestVT == MVT::f32)
@@ -2656,6 +2683,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break;
}
break;
case ISD::ATOMIC_CMP_SWAP:
@@ -2665,6 +2693,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break;
}
break;
case ISD::ATOMIC_LOAD_ADD:
@@ -2674,6 +2703,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break;
}
break;
case ISD::ATOMIC_LOAD_SUB:
@@ -2683,6 +2713,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break;
}
break;
case ISD::ATOMIC_LOAD_AND:
@@ -2692,6 +2723,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break;
}
break;
case ISD::ATOMIC_LOAD_OR:
@@ -2701,6 +2733,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break;
}
break;
case ISD::ATOMIC_LOAD_XOR:
@@ -2710,6 +2743,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break;
}
break;
case ISD::ATOMIC_LOAD_NAND:
@@ -2719,6 +2753,47 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_MAX:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_UMAX:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_MIN:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_UMIN:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break;
}
break;
}
@@ -2730,6 +2805,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ bool NeedInvert;
switch (Node->getOpcode()) {
case ISD::CTPOP:
case ISD::CTLZ:
@@ -2947,20 +3023,20 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (Align > TLI.getMinStackArgumentAlignment()) {
assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
- VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
DAG.getConstant(Align - 1,
- TLI.getPointerTy()));
+ VAList.getValueType()));
- VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
+ VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList,
DAG.getConstant(-(int64_t)Align,
- TLI.getPointerTy()));
+ VAList.getValueType()));
}
// Increment the pointer, VAList, to the next vaarg
- Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
DAG.getConstant(TLI.getDataLayout()->
getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
- TLI.getPointerTy()));
+ VAList.getValueType()));
// Store the incremented VAList to the legalized pointer
Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
MachinePointerInfo(V), false, false, 0);
@@ -3231,6 +3307,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128));
break;
+ case ISD::FROUND:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128));
+ break;
case ISD::FPOWI:
Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
RTLIB::POWI_F80, RTLIB::POWI_F128,
@@ -3565,9 +3648,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
unsigned EntrySize =
DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
- Index = DAG.getNode(ISD::MUL, dl, PTy,
- Index, DAG.getConstant(EntrySize, PTy));
- SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(),
+ Index, DAG.getConstant(EntrySize, Index.getValueType()));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
+ Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
@@ -3611,10 +3695,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp1 = Node->getOperand(0);
Tmp2 = Node->getOperand(1);
Tmp3 = Node->getOperand(2);
- LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
+ bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2,
+ Tmp3, NeedInvert, dl);
+
+ if (Legalized) {
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SETCC node.
+ if (Tmp3.getNode())
+ Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Tmp3);
+
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert)
+ Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0));
- // If we expanded the SETCC into an AND/OR, return the new node
- if (Tmp2.getNode() == 0) {
Results.push_back(Tmp1);
break;
}
@@ -3645,14 +3740,52 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp4 = Node->getOperand(3); // False
SDValue CC = Node->getOperand(4);
- LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
- Tmp1, Tmp2, CC, dl);
+ bool Legalized = false;
+ // Try to legalize by inverting the condition. This is for targets that
+ // might support an ordered version of a condition, but not the unordered
+ // version (or vice versa).
+ ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ Tmp1.getValueType().isInteger());
+ if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) {
+ // Use the new condition code and swap true and false
+ Legalized = true;
+ Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
+ } else {
+ // If The inverse is not legal, then try to swap the arguments using
+ // the inverse condition code.
+ ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC);
+ if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) {
+ // The swapped inverse condition is legal, so swap true and false,
+ // lhs and rhs.
+ Legalized = true;
+ Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC);
+ }
+ }
+
+ if (!Legalized) {
+ Legalized = LegalizeSetCCCondCode(
+ getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert,
+ dl);
+
+ assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
+
+ // If we expanded the SETCC by inverting the condition code, then swap
+ // the True/False operands to match.
+ if (NeedInvert)
+ std::swap(Tmp3, Tmp4);
- assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
- Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
- CC = DAG.getCondCode(ISD::SETNE);
- Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
- Tmp3, Tmp4, CC);
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SELECT_CC node.
+ if (CC.getNode()) {
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Tmp3, Tmp4, CC);
+ } else {
+ Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+ CC = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+ Tmp3, Tmp4, CC);
+ }
+ }
Results.push_back(Tmp1);
break;
}
@@ -3662,14 +3795,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp3 = Node->getOperand(3); // RHS
Tmp4 = Node->getOperand(1); // CC
- LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()),
- Tmp2, Tmp3, Tmp4, dl);
-
- assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
- Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
- Tmp4 = DAG.getCondCode(ISD::SETNE);
- Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
- Tmp3, Node->getOperand(4));
+ bool Legalized = LegalizeSetCCCondCode(getSetCCResultType(
+ Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl);
+ (void)Legalized;
+ assert(Legalized && "Can't legalize BR_CC with legal condition!");
+
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert)
+ Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0));
+
+ // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC
+ // node.
+ if (Tmp4.getNode()) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
+ Tmp4, Tmp2, Tmp3, Node->getOperand(4));
+ } else {
+ Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+ Tmp4 = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
+ Tmp3, Node->getOperand(4));
+ }
Results.push_back(Tmp1);
break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index cea0b02..ecf4c5d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -88,6 +88,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break;
case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
@@ -160,7 +161,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
RTLIB::ADD_F80,
RTLIB::ADD_F128,
RTLIB::ADD_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
@@ -172,7 +173,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
RTLIB::CEIL_F80,
RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
@@ -226,7 +227,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
RTLIB::COS_F80,
RTLIB::COS_F128,
RTLIB::COS_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
@@ -239,7 +240,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
@@ -251,7 +252,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
RTLIB::EXP_F80,
RTLIB::EXP_F128,
RTLIB::EXP_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
@@ -263,7 +264,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
RTLIB::EXP2_F80,
RTLIB::EXP2_F128,
RTLIB::EXP2_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
@@ -275,7 +276,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
RTLIB::FLOOR_F80,
RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
@@ -287,7 +288,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
RTLIB::LOG_F80,
RTLIB::LOG_F128,
RTLIB::LOG_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
@@ -299,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
RTLIB::LOG2_F80,
RTLIB::LOG2_F128,
RTLIB::LOG2_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
@@ -311,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
RTLIB::LOG10_F80,
RTLIB::LOG10_F128,
RTLIB::LOG10_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
@@ -325,7 +326,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- NVT, Ops, 3, false, SDLoc(N));
+ NVT, Ops, 3, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
@@ -338,7 +339,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
@@ -350,7 +351,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
RTLIB::NEARBYINT_F80,
RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
@@ -364,7 +365,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -372,7 +373,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SDValue Op = N->getOperand(0);
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -381,7 +382,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = N->getOperand(0);
return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
- SDLoc(N));
+ SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -389,7 +390,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
SDValue Op = N->getOperand(0);
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
- return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
@@ -402,7 +403,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
RTLIB::POW_F80,
RTLIB::POW_F128,
RTLIB::POW_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
@@ -416,7 +417,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
RTLIB::POWI_F80,
RTLIB::POWI_F128,
RTLIB::POWI_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
@@ -429,7 +430,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
RTLIB::REM_F80,
RTLIB::REM_F128,
RTLIB::REM_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
@@ -441,7 +442,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
RTLIB::RINT_F80,
RTLIB::RINT_F128,
RTLIB::RINT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128),
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
@@ -453,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
RTLIB::SIN_F80,
RTLIB::SIN_F128,
RTLIB::SIN_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
@@ -465,7 +478,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
RTLIB::SQRT_F80,
RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
@@ -478,7 +491,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, 2, false, SDLoc(N));
+ NVT, Ops, 2, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
@@ -490,7 +503,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
RTLIB::TRUNC_F80,
RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128),
- NVT, &Op, 1, false, SDLoc(N));
+ NVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
@@ -504,7 +517,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
L->getPointerInfo(), NVT, L->isVolatile(),
- L->isNonTemporal(), false, L->getAlignment());
+ L->isNonTemporal(), false, L->getAlignment(),
+ L->getTBAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -516,7 +530,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getMemoryVT(), dl, L->getChain(),
L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
L->getMemoryVT(), L->isVolatile(),
- L->isNonTemporal(), false, L->getAlignment());
+ L->isNonTemporal(), false, L->getAlignment(),
+ L->getTBAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -585,7 +600,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
NVT, N->getOperand(0));
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- &Op, 1, false, dl);
+ &Op, 1, false, dl).first;
}
@@ -645,7 +660,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
@@ -676,7 +691,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
@@ -684,14 +699,14 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
EVT RVT = N->getValueType(0);
RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
@@ -754,9 +769,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
Val = GetSoftenedFloat(Val);
return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
- ST->getPointerInfo(),
- ST->isVolatile(), ST->isNonTemporal(),
- ST->getAlignment());
+ ST->getMemOperand());
}
@@ -817,6 +830,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break;
case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
@@ -912,7 +926,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
N->getValueType(0), Ops, 2, false,
- SDLoc(N));
+ SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -986,7 +1000,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
N->getValueType(0), Ops, 3, false,
- SDLoc(N));
+ SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1000,7 +1014,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
N->getValueType(0), Ops, 2, false,
- SDLoc(N));
+ SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1072,6 +1086,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
GetPairElements(Call, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
@@ -1102,7 +1128,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
N->getValueType(0), Ops, 2, false,
- SDLoc(N));
+ SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1134,8 +1160,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
- LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->getMemoryVT(), LD->getMemOperand());
// Remember the chain.
Chain = Hi.getValue(1);
@@ -1181,7 +1206,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
- Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl);
+ Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first;
GetPairElements(Hi, Lo, Hi);
}
@@ -1251,6 +1276,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break;
case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
@@ -1325,6 +1351,17 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
N->getOperand(4)), 0);
}
+SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(1), Lo, Hi);
+ // The ppcf128 value is providing only the sign; take it from the
+ // higher-order double (which must have the larger magnitude).
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N),
+ N->getValueType(0), N->getOperand(0), Hi);
+}
+
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
"Logic only correct for ppcf128!");
@@ -1353,7 +1390,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl);
+ return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
@@ -1386,7 +1423,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1,
- false, dl);
+ false, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
@@ -1445,7 +1482,5 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
GetExpandedOp(ST->getValue(), Lo, Hi);
return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr,
- ST->getPointerInfo(),
- ST->getMemoryVT(), ST->isVolatile(),
- ST->isNonTemporal(), ST->getAlignment());
+ ST->getMemoryVT(), ST->getMemOperand());
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index ff8f1f9..4255948 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -417,9 +417,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
SDLoc dl(N);
SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
- N->getPointerInfo(),
- N->getMemoryVT(), N->isVolatile(),
- N->isNonTemporal(), N->getAlignment());
+ N->getMemoryVT(), N->getMemOperand());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -919,7 +917,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
// type does not have a strange size (eg: it is not i1).
EVT VecVT = N->getValueType(0);
unsigned NumElts = VecVT.getVectorNumElements();
- assert(!(NumElts & 1) && "Legal vector of one illegal element?");
+ assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) &&
+ "Legal vector of one illegal element?");
// Promote the inserted value. The type does not need to match the
// vector element type. Check that any extra bits introduced will be
@@ -1037,17 +1036,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
- unsigned Alignment = N->getAlignment();
- bool isVolatile = N->isVolatile();
- bool isNonTemporal = N->isNonTemporal();
SDLoc dl(N);
SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
// Truncate the value and store the result.
- return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
- N->getMemoryVT(),
- isVolatile, isNonTemporal, Alignment);
+ return DAG.getTruncStore(Ch, dl, Val, Ptr,
+ N->getMemoryVT(), N->getMemOperand());
}
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
@@ -1193,6 +1188,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break;
}
break;
case ISD::ATOMIC_CMP_SWAP:
@@ -1202,6 +1198,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break;
}
break;
case ISD::ATOMIC_LOAD_ADD:
@@ -1211,6 +1208,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break;
}
break;
case ISD::ATOMIC_LOAD_SUB:
@@ -1220,6 +1218,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break;
}
break;
case ISD::ATOMIC_LOAD_AND:
@@ -1229,6 +1228,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break;
}
break;
case ISD::ATOMIC_LOAD_OR:
@@ -1238,6 +1238,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break;
}
break;
case ISD::ATOMIC_LOAD_XOR:
@@ -1247,6 +1248,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break;
}
break;
case ISD::ATOMIC_LOAD_NAND:
@@ -1256,6 +1258,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break;
}
break;
}
@@ -1770,7 +1773,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
SDValue Op = N->getOperand(0);
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl),
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/,
+ dl).first,
Lo, Hi);
}
@@ -1781,7 +1785,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
SDValue Op = N->getOperand(0);
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl),
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/,
+ dl).first,
Lo, Hi);
}
@@ -1803,6 +1808,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
bool isInvariant = N->isInvariant();
+ const MDNode *TBAAInfo = N->getTBAAInfo();
SDLoc dl(N);
assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1811,7 +1817,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
EVT MemVT = N->getMemoryVT();
Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
- MemVT, isVolatile, isNonTemporal, Alignment);
+ MemVT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
// Remember the chain.
Ch = Lo.getValue(1);
@@ -1833,7 +1839,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
} else if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
- isVolatile, isNonTemporal, isInvariant, Alignment);
+ isVolatile, isNonTemporal, isInvariant, Alignment,
+ TBAAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -1842,11 +1849,11 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1864,17 +1871,17 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
// Load the rest of the low bits.
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1997,7 +2004,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl),
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/,
+ dl).first,
Lo, Hi);
}
@@ -2060,7 +2068,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
@@ -2155,7 +2163,8 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo,
+ Hi);
return;
}
@@ -2238,7 +2247,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
@@ -2378,7 +2387,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
@@ -2398,7 +2407,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
@@ -2685,7 +2694,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this SINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N));
+ return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2702,6 +2711,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
+ const MDNode *TBAAInfo = N->getTBAAInfo();
SDLoc dl(N);
SDValue Lo, Hi;
@@ -2711,7 +2721,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
GetExpandedInteger(N->getValue(), Lo, Hi);
return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
N->getMemoryVT(), isVolatile, isNonTemporal,
- Alignment);
+ Alignment, TBAAInfo);
}
if (TLI.isLittleEndian()) {
@@ -2719,7 +2729,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
GetExpandedInteger(N->getValue(), Lo, Hi);
Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2728,11 +2738,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
NEVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -2760,17 +2770,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Store both the high bits and maybe some of the low bits.
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
- HiVT, isVolatile, isNonTemporal, Alignment);
+ HiVT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
// Store the lowest ExcessBits bits in the second half.
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -2835,7 +2845,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet,
Zero, Four);
unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
- FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
+ FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(),
+ FudgePtr, Offset);
Alignment = std::min(Alignment, 4u);
// Load the value out, extending it from f32 to the destination float type.
@@ -2852,7 +2863,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this UINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl);
+ return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index fd770d1..eb13230 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -958,20 +958,6 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) {
return SDValue(N->getOperand(ResNo));
}
-/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
-/// which is split into two not necessarily identical pieces.
-void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
- // Currently all types are split in half.
- if (!InVT.isVector()) {
- LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
- } else {
- unsigned NumElements = InVT.getVectorNumElements();
- assert(!(NumElements & 1) && "Splitting vector, but not in half!");
- LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(), NumElements/2);
- }
-}
-
/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
/// high parts of the given value.
void DAGTypeLegalizer::GetPairElements(SDValue Pair,
@@ -988,10 +974,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
SDValue Index) {
SDLoc dl(Index);
// Make sure the index type is big enough to compute in.
- if (Index.getValueType().bitsGT(TLI.getPointerTy()))
- Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index);
- else
- Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index);
+ Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy());
// Calculate the element offset and add it to the pointer.
unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
@@ -1024,20 +1007,23 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
unsigned NumOps = N->getNumOperands();
SDLoc dl(N);
if (NumOps == 0) {
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned,
+ dl).first;
} else if (NumOps == 1) {
SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned,
+ dl).first;
} else if (NumOps == 2) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned,
+ dl).first;
}
SmallVector<SDValue, 8> Ops(NumOps);
for (unsigned i = 0; i < NumOps; ++i)
Ops[i] = N->getOperand(i);
return TLI.makeLibCall(DAG, LC, N->getValueType(0),
- &Ops[0], NumOps, isSigned, dl);
+ &Ops[0], NumOps, isSigned, dl).first;
}
// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 63e9af3..13bb08f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -410,6 +410,7 @@ private:
SDValue SoftenFloatRes_FPOWI(SDNode *N);
SDValue SoftenFloatRes_FREM(SDNode *N);
SDValue SoftenFloatRes_FRINT(SDNode *N);
+ SDValue SoftenFloatRes_FROUND(SDNode *N);
SDValue SoftenFloatRes_FSIN(SDNode *N);
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
@@ -470,6 +471,7 @@ private:
void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -480,6 +482,7 @@ private:
// Float Operand Expansion.
bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
SDValue ExpandFloatOp_BR_CC(SDNode *N);
+ SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N);
SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
@@ -534,7 +537,7 @@ private:
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_BITCAST(SDNode *N);
- SDValue ScalarizeVecOp_EXTEND(SDNode *N);
+ SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -558,6 +561,7 @@ private:
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -628,6 +632,7 @@ private:
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Shift(SDNode *N);
@@ -699,10 +704,6 @@ private:
GetExpandedFloat(Op, Lo, Hi);
}
- /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
- /// which is split (or expanded) into two not necessarily identical pieces.
- void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT);
-
/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
/// high parts of the given value.
void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
@@ -730,6 +731,12 @@ private:
GetExpandedFloat(Op, Lo, Hi);
}
+
+ /// This function will split the integer \p Op into \p NumElements
+ /// operations of type \p EltVT and store them in \p Ops.
+ void IntegerToVector(SDValue Op, unsigned NumElements,
+ SmallVectorImpl<SDValue> &Ops, EVT EltVT);
+
// Generic Result Expansion.
void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 96f6143..c749fde 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -77,13 +77,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
case TargetLowering::TypeWidenVector: {
assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
InOp = GetWidenedVector(InOp);
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- InVT.getVectorNumElements()/2);
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getConstant(InNVT.getVectorNumElements(),
- TLI.getVectorIdxTy()));
+ EVT LoVT, HiVT;
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
+ llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
@@ -169,7 +165,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize,
+ StackPtr.getValueType()));
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
@@ -253,20 +250,22 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
bool isInvariant = LD->isInvariant();
+ const MDNode *TBAAInfo = LD->getTBAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, isInvariant, Alignment);
+ isVolatile, isNonTemporal, isInvariant, Alignment,
+ TBAAInfo);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal, isInvariant,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -307,6 +306,25 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Generic Operand Expansion.
//===--------------------------------------------------------------------===//
+void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
+ SmallVectorImpl<SDValue> &Ops,
+ EVT EltVT) {
+ assert(Op.getValueType().isInteger());
+ SDLoc DL(Op);
+ SDValue Parts[2];
+
+ if (NumElements > 1) {
+ NumElements >>= 1;
+ SplitInteger(Op, Parts[0], Parts[1]);
+ if (TLI.isBigEndian())
+ std::swap(Parts[0], Parts[1]);
+ IntegerToVector(Parts[0], NumElements, Ops, EltVT);
+ IntegerToVector(Parts[1], NumElements, Ops, EltVT);
+ } else {
+ Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op));
+ }
+}
+
SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
SDLoc dl(N);
if (N->getValueType(0).isVector()) {
@@ -315,21 +333,27 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
// instead, but only if the new vector type is legal (otherwise there
// is no point, and it might create expansion loops). For example, on
// x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
+ //
+ // FIXME: I'm not sure why we are first trying to split the input into
+ // a 2 element vector, so I'm leaving it here to maintain the current
+ // behavior.
+ unsigned NumElts = 2;
EVT OVT = N->getOperand(0).getValueType();
EVT NVT = EVT::getVectorVT(*DAG.getContext(),
TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
- 2);
-
- if (isTypeLegal(NVT)) {
- SDValue Parts[2];
- GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);
+ NumElts);
+ if (!isTypeLegal(NVT)) {
+ // If we can't find a legal type by splitting the integer in half,
+ // then we can use the node's value type.
+ NumElts = N->getValueType(0).getVectorNumElements();
+ NVT = N->getValueType(0);
+ }
- if (TLI.isBigEndian())
- std::swap(Parts[0], Parts[1]);
+ SmallVector<SDValue, 8> Ops;
+ IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType());
- SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
- return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
- }
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
}
// Otherwise, store to a temporary and load out again as the new type.
@@ -439,6 +463,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
unsigned Alignment = St->getAlignment();
bool isVolatile = St->isVolatile();
bool isNonTemporal = St->isNonTemporal();
+ const MDNode *TBAAInfo = St->getTBAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -450,15 +475,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
- assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
St->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize), TBAAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -489,14 +513,12 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
SDValue Cond = N->getOperand(0);
CL = CH = Cond;
if (Cond.getValueType().isVector()) {
- assert(Cond.getValueType().getVectorElementType() == MVT::i1 &&
- "Condition legalized before result?");
- unsigned NumElements = Cond.getValueType().getVectorNumElements();
- EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2);
- CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
- DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy()));
+ // Check if there are already splitted versions of the vector available and
+ // use those instead of splitting the mask operand again.
+ if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Cond, CL, CH);
+ else
+ llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl);
}
Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
@@ -518,7 +540,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT LoVT, HiVT;
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getUNDEF(LoVT);
Hi = DAG.getUNDEF(HiVT);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index bbe11b8..2c3cdcc 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -171,7 +171,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return TranslateLegalizeResults(Op, Result);
case TargetLowering::Custom:
Changed = true;
- return LegalizeOp(TLI.LowerOperation(Result, DAG));
+ return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG));
case TargetLowering::Expand:
Changed = true;
return LegalizeOp(ExpandStore(Op));
@@ -227,6 +227,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FP_TO_UINT:
case ISD::FNEG:
case ISD::FABS:
+ case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
@@ -241,6 +242,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FTRUNC:
case ISD::FRINT:
case ISD::FNEARBYINT:
+ case ISD::FROUND:
case ISD::FFLOOR:
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
@@ -416,7 +418,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Offset),
LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
+ LD->isInvariant(), LD->getAlignment(),
+ LD->getTBAAInfo());
} else {
EVT LoadVT = WideVT;
while (RemainingBytes < LoadBytes) {
@@ -426,13 +429,14 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Offset),
LoadVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->isNonTemporal(), LD->getAlignment(),
+ LD->getTBAAInfo());
}
RemainingBytes -= LoadBytes;
Offset += LoadBytes;
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getIntPtrConstant(LoadBytes));
+ DAG.getConstant(LoadBytes, BasePTR.getValueType()));
LoadVals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
@@ -497,10 +501,10 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
SrcVT.getScalarType(),
LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
+ LD->getAlignment(), LD->getTBAAInfo());
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getIntPtrConstant(Stride));
+ DAG.getConstant(Stride, BasePTR.getValueType()));
Vals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
@@ -529,6 +533,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
+ const MDNode *TBAAInfo = ST->getTBAAInfo();
unsigned NumElem = StVT.getVectorNumElements();
// The type of the data we want to save
@@ -556,10 +561,10 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
- isVolatile, isNonTemporal, Alignment);
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getIntPtrConstant(Stride));
+ DAG.getConstant(Stride, BasePTR.getValueType()));
Stores.push_back(Store);
}
@@ -597,10 +602,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
// Generate a mask operand.
- EVT MaskTy = TLI.getSetCCResultType(*DAG.getContext(), VT);
- assert(MaskTy.isVector() && "Invalid CC type");
- assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits()
- && "Invalid mask size");
+ EVT MaskTy = VT.changeVectorElementTypeToInteger();
// What is the size of each element in the vector mask.
EVT BitTy = MaskTy.getScalarType();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 54380ec..f7a3e3d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -83,6 +83,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
+ case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
@@ -97,6 +98,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ADD:
case ISD::AND:
case ISD::FADD:
+ case ISD::FCOPYSIGN:
case ISD::FDIV:
case ISD::FMUL:
case ISD::FPOW:
@@ -215,7 +217,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
- N->isInvariant(), N->getOriginalAlignment());
+ N->isInvariant(), N->getOriginalAlignment(),
+ N->getTBAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -369,7 +372,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
- Res = ScalarizeVecOp_EXTEND(N);
+ case ISD::TRUNCATE:
+ Res = ScalarizeVecOp_UnaryOp(N);
break;
case ISD::CONCAT_VECTORS:
Res = ScalarizeVecOp_CONCAT_VECTORS(N);
@@ -408,7 +412,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs
/// to be scalarized, it must be <1 x ty>. Extend the element instead.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
assert(N->getValueType(0).getVectorNumElements() == 1 &&
"Unexected vector type!");
SDValue Elt = GetScalarizedVector(N->getOperand(0));
@@ -455,12 +459,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getBasePtr(), N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment());
+ N->getAlignment(), N->getTBAAInfo());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
N->getBasePtr(), N->getPointerInfo(),
N->isVolatile(), N->isNonTemporal(),
- N->getOriginalAlignment());
+ N->getOriginalAlignment(), N->getTBAAInfo());
}
@@ -517,7 +521,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
- case ISD::ANY_EXTEND:
case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
case ISD::CTTZ:
@@ -540,21 +543,27 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
+ case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
- case ISD::SIGN_EXTEND:
case ISD::SINT_TO_FP:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
- case ISD::ZERO_EXTEND:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ SplitVecRes_ExtendOp(N, Lo, Hi);
+ break;
+
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::FADD:
+ case ISD::FCOPYSIGN:
case ISD::FSUB:
case ISD::FMUL:
case ISD::SDIV:
@@ -615,7 +624,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
// We know the result is a vector. The input may be either a vector or a
// scalar value.
EVT LoVT, HiVT;
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
SDLoc dl(N);
SDValue InOp = N->getOperand(0);
@@ -670,7 +679,7 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc dl(N);
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
unsigned LoNumElts = LoVT.getVectorNumElements();
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
@@ -691,7 +700,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
}
EVT LoVT, HiVT;
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
@@ -707,7 +716,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT LoVT, HiVT;
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
@@ -731,7 +740,8 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT LoVT, HiVT;
- GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) =
+ DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());
Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
DAG.getValueType(LoVT));
@@ -783,7 +793,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, StackPtr.getValueType()));
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
@@ -794,7 +804,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc dl(N);
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
Hi = DAG.getUNDEF(HiVT);
}
@@ -804,7 +814,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
EVT LoVT, HiVT;
SDLoc dl(LD);
- GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
ISD::LoadExtType ExtType = LD->getExtensionType();
SDValue Ch = LD->getChain();
@@ -815,20 +825,22 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
bool isInvariant = LD->isInvariant();
+ const MDNode *TBAAInfo = LD->getTBAAInfo();
EVT LoMemVT, HiMemVT;
- GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+ llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
- isInvariant, Alignment);
+ isInvariant, Alignment, TBAAInfo);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment);
+ HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment,
+ TBAAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -847,24 +859,12 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc DL(N);
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
// Split the input.
- EVT InVT = N->getOperand(0).getValueType();
SDValue LL, LH, RL, RH;
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- LoVT.getVectorNumElements());
- LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
- DAG.getConstant(InNVT.getVectorNumElements(),
- TLI.getVectorIdxTy()));
-
- RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
- DAG.getConstant(InNVT.getVectorNumElements(),
- TLI.getVectorIdxTy()));
+ llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
@@ -875,22 +875,15 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
// Get the dest types - they may not match the input types, e.g. int_to_fp.
EVT LoVT, HiVT;
SDLoc dl(N);
- GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
// If the input also splits, handle it directly for a compile time speedup.
// Otherwise split it by hand.
EVT InVT = N->getOperand(0).getValueType();
- if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
GetSplitVector(N->getOperand(0), Lo, Hi);
- } else {
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
- LoVT.getVectorNumElements());
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
- DAG.getConstant(InNVT.getVectorNumElements(),
- TLI.getVectorIdxTy()));
- }
+ else
+ llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
if (N->getOpcode() == ISD::FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
@@ -913,6 +906,58 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
}
}
+void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DestVT = N->getValueType(0);
+ EVT LoVT, HiVT;
+ llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
+
+ // We can do better than a generic split operation if the extend is doing
+ // more than just doubling the width of the elements and the following are
+ // true:
+ // - The number of vector elements is even,
+ // - the source type is legal,
+ // - the type of a split source is illegal,
+ // - the type of an extended (by doubling element size) source is legal, and
+ // - the type of that extended source when split is legal.
+ //
+ // This won't necessarily completely legalize the operation, but it will
+ // more effectively move in the right direction and prevent falling down
+ // to scalarization in many cases due to the input vector being split too
+ // far.
+ unsigned NumElements = SrcVT.getVectorNumElements();
+ if ((NumElements & 1) == 0 &&
+ SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT NewSrcVT = EVT::getVectorVT(
+ Ctx, EVT::getIntegerVT(
+ Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2),
+ NumElements);
+ EVT SplitSrcVT =
+ EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2);
+ EVT SplitLoVT, SplitHiVT;
+ llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
+ if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
+ TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
+ DEBUG(dbgs() << "Split vector extend via incremental extend:";
+ N->dump(&DAG); dbgs() << "\n");
+ // Extend the source vector by one step.
+ SDValue NewSrc =
+ DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
+ // Get the low and high halves of the new, extended one step, vector.
+ llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+ // Extend those vector halves the rest of the way.
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ return;
+ }
+ }
+ // Fall back to the generic unary operator splitting otherwise.
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+}
+
void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
SDValue &Lo, SDValue &Hi) {
// The low and high parts of the original input give four input vectors.
@@ -1105,41 +1150,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
SDValue Mask = N->getOperand(0);
SDValue Src0 = N->getOperand(1);
SDValue Src1 = N->getOperand(2);
+ EVT Src0VT = Src0.getValueType();
SDLoc DL(N);
- EVT MaskVT = Mask.getValueType();
- assert(MaskVT.isVector() && "VSELECT without a vector mask?");
+ assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?");
SDValue Lo, Hi;
GetSplitVector(N->getOperand(0), Lo, Hi);
assert(Lo.getValueType() == Hi.getValueType() &&
"Lo and Hi have differing types");
- unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
- unsigned HiNumElts = Hi.getValueType().getVectorNumElements();
- assert(LoNumElts == HiNumElts && "Asymmetric vector split?");
-
- LLVMContext &Ctx = *DAG.getContext();
- SDValue Zero = DAG.getConstant(0, TLI.getVectorIdxTy());
- SDValue LoElts = DAG.getConstant(LoNumElts, TLI.getVectorIdxTy());
- EVT Src0VT = Src0.getValueType();
- EVT Src0EltTy = Src0VT.getVectorElementType();
- EVT MaskEltTy = MaskVT.getVectorElementType();
-
- EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts);
- EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts);
- EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts);
- EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts);
+ EVT LoOpVT, HiOpVT;
+ llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
+ assert(LoOpVT == HiOpVT && "Asymmetric vector split?");
- SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero);
- SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero);
-
- SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts);
- SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts);
-
- SDValue LoMask =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero);
- SDValue HiMask =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts);
+ SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
+ llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
+ llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
+ llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
SDValue LoSelect =
DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
@@ -1249,33 +1276,34 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned Alignment = N->getOriginalAlignment();
bool isVol = N->isVolatile();
bool isNT = N->isNonTemporal();
+ const MDNode *TBAAInfo = N->getTBAAInfo();
SDValue Lo, Hi;
GetSplitVector(N->getOperand(1), Lo, Hi);
EVT LoMemVT, HiMemVT;
- GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+ llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
- LoMemVT, isVol, isNT, Alignment);
+ LoMemVT, isVol, isNT, Alignment, TBAAInfo);
else
Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
- isVol, isNT, Alignment);
+ isVol, isNT, Alignment, TBAAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
if (isTruncating)
Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVol, isNT, Alignment);
+ HiMemVT, isVol, isNT, Alignment, TBAAInfo);
else
Hi = DAG.getStore(Ch, DL, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
- isVol, isNT, Alignment);
+ isVol, isNT, Alignment, TBAAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
@@ -1341,13 +1369,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
SDLoc DL(N);
// Extract the halves of the input via extract_subvector.
- EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(), NumElements/2);
- SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
- DAG.getConstant(NumElements/2,
- TLI.getVectorIdxTy()));
+ SDValue InLoVec, InHiVec;
+ llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
// Truncate them to 1/2 the element size.
EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
@@ -1446,27 +1469,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VECTOR_SHUFFLE:
Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
break;
+
case ISD::ADD:
case ISD::AND:
case ISD::BSWAP:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR:
+ case ISD::SUB:
+ case ISD::XOR:
+ Res = WidenVecRes_Binary(N);
+ break;
+
case ISD::FADD:
case ISD::FCOPYSIGN:
- case ISD::FDIV:
case ISD::FMUL:
case ISD::FPOW:
- case ISD::FREM:
case ISD::FSUB:
- case ISD::MUL:
- case ISD::MULHS:
- case ISD::MULHU:
- case ISD::OR:
+ case ISD::FDIV:
+ case ISD::FREM:
case ISD::SDIV:
- case ISD::SREM:
case ISD::UDIV:
+ case ISD::SREM:
case ISD::UREM:
- case ISD::SUB:
- case ISD::XOR:
- Res = WidenVecRes_Binary(N);
+ Res = WidenVecRes_BinaryCanTrap(N);
break;
case ISD::FPOWI:
@@ -1507,6 +1534,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FRINT:
+ case ISD::FROUND:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
@@ -1534,6 +1562,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
// Binary op widening.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
+ // Binary op widening for operations that can trap.
unsigned Opcode = N->getOpcode();
SDLoc dl(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -2532,6 +2569,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
bool isInvariant = LD->isInvariant();
+ const MDNode *TBAAInfo = LD->getTBAAInfo();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth; // Difference
@@ -2541,7 +2579,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, isInvariant, Align);
+ isVolatile, isNonTemporal, isInvariant, Align,
+ TBAAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction
@@ -2577,7 +2616,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned Increment = NewVTWidth / 8;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getIntPtrConstant(Increment));
+ DAG.getConstant(Increment, BasePtr.getValueType()));
SDValue L;
if (LdWidth < NewVTWidth) {
@@ -2586,7 +2625,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
NewVTWidth = NewVT.getSizeInBits();
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset), isVolatile,
- isNonTemporal, isInvariant, MinAlign(Align, Increment));
+ isNonTemporal, isInvariant, MinAlign(Align, Increment),
+ TBAAInfo);
LdChain.push_back(L.getValue(1));
if (L->getValueType(0).isVector()) {
SmallVector<SDValue, 16> Loads;
@@ -2602,7 +2642,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
} else {
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset), isVolatile,
- isNonTemporal, isInvariant, MinAlign(Align, Increment));
+ isNonTemporal, isInvariant, MinAlign(Align, Increment),
+ TBAAInfo);
LdChain.push_back(L.getValue(1));
}
@@ -2682,6 +2723,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
+ const MDNode *TBAAInfo = LD->getTBAAInfo();
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
@@ -2693,15 +2735,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned Increment = LdEltVT.getSizeInBits() / 8;
Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
LD->getPointerInfo(),
- LdEltVT, isVolatile, isNonTemporal, Align);
+ LdEltVT, isVolatile, isNonTemporal, Align, TBAAInfo);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
- BasePtr, DAG.getIntPtrConstant(Offset));
+ BasePtr,
+ DAG.getConstant(Offset,
+ BasePtr.getValueType()));
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
- isVolatile, isNonTemporal, Align);
+ isVolatile, isNonTemporal, Align, TBAAInfo);
LdChain.push_back(Ops[i].getValue(1));
}
@@ -2724,6 +2768,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
+ const MDNode *TBAAInfo = ST->getTBAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
SDLoc dl(ST);
@@ -2750,12 +2795,12 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo().getWithOffset(Offset),
isVolatile, isNonTemporal,
- MinAlign(Align, Offset)));
+ MinAlign(Align, Offset), TBAAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
Idx += NumVTElts;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getIntPtrConstant(Increment));
+ DAG.getConstant(Increment, BasePtr.getValueType()));
} while (StWidth != 0 && StWidth >= NewVTWidth);
} else {
// Cast the vector to the scalar type we can store
@@ -2770,11 +2815,11 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo().getWithOffset(Offset),
isVolatile, isNonTemporal,
- MinAlign(Align, Offset)));
+ MinAlign(Align, Offset), TBAAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getIntPtrConstant(Increment));
+ DAG.getConstant(Increment, BasePtr.getValueType()));
} while (StWidth != 0 && StWidth >= NewVTWidth);
// Restore index back to be relative to the original widen element type
Idx = Idx * NewVTWidth / ValEltWidth;
@@ -2792,6 +2837,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
+ const MDNode *TBAAInfo = ST->getTBAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
SDLoc dl(ST);
@@ -2814,17 +2860,19 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
DAG.getConstant(0, TLI.getVectorIdxTy()));
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo(), StEltVT,
- isVolatile, isNonTemporal, Align));
+ isVolatile, isNonTemporal, Align,
+ TBAAInfo));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
- BasePtr, DAG.getIntPtrConstant(Offset));
+ BasePtr, DAG.getConstant(Offset,
+ BasePtr.getValueType()));
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getConstant(0, TLI.getVectorIdxTy()));
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
ST->getPointerInfo().getWithOffset(Offset),
StEltVT, isVolatile, isNonTemporal,
- MinAlign(Align, Offset)));
+ MinAlign(Align, Offset), TBAAInfo));
}
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index d684164..1dd2128 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -389,10 +389,9 @@ signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
// Constants used to denote relative importance of
// heuristic components for cost computation.
static const unsigned PriorityOne = 200;
-static const unsigned PriorityTwo = 100;
-static const unsigned PriorityThree = 50;
-static const unsigned PriorityFour = 15;
-static const unsigned PriorityFive = 5;
+static const unsigned PriorityTwo = 50;
+static const unsigned PriorityThree = 15;
+static const unsigned PriorityFour = 5;
static const unsigned ScaleOne = 20;
static const unsigned ScaleTwo = 10;
static const unsigned ScaleThree = 5;
@@ -449,7 +448,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
if (N->isMachineOpcode()) {
const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
if (TID.isCall())
- ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
+ ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
}
else
switch (N->getOpcode()) {
@@ -457,11 +456,11 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
case ISD::TokenFactor:
case ISD::CopyFromReg:
case ISD::CopyToReg:
- ResCount += PriorityFive;
+ ResCount += PriorityFour;
break;
case ISD::INLINEASM:
- ResCount += PriorityFour;
+ ResCount += PriorityThree;
break;
}
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index f5fe168..1a562d7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -718,7 +718,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
// indicate the scheduled cycle.
SU->setHeightToAtLeast(CurCycle);
- // Reserve resources for the scheduled intruction.
+ // Reserve resources for the scheduled instruction.
EmitNode(SU);
Sequence.push_back(SU);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 982dcc9..054e3dd 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -690,15 +690,6 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
}
#endif // NDEBUG
-namespace {
- struct OrderSorter {
- bool operator()(const std::pair<unsigned, MachineInstr*> &A,
- const std::pair<unsigned, MachineInstr*> &B) {
- return A.first < B.first;
- }
- };
-}
-
/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
static void
ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
@@ -744,7 +735,10 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
}
MachineBasicBlock *BB = Emitter.getBlock();
- if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
+ if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() ||
+ // Fast-isel may have inserted some instructions, in which case the
+ // BB->back().isPHI() test will not fire when we want it to.
+ prior(Emitter.getInsertPos())->isPHI()) {
// Did not insert any instruction.
Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
return;
@@ -857,7 +851,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Sort the source order instructions and use the order to insert debug
// values.
- std::sort(Orders.begin(), Orders.end(), OrderSorter());
+ std::sort(Orders.begin(), Orders.end(), less_first());
SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bc6063c..45d5a4f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -869,16 +869,19 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
- : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), OptLevel(OL),
+ : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL),
EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
- Root(getEntryNode()), UpdateListeners(0) {
+ Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
+ UpdateListeners(0) {
AllNodes.push_back(&EntryNode);
DbgInfo = new SDDbgInfo();
}
-void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) {
+void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti,
+ const TargetLowering *tli) {
MF = &mf;
TTI = tti;
+ TLI = tli;
Context = &mf.getFunction()->getContext();
}
@@ -983,6 +986,54 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits());
Elt = ConstantInt::get(*getContext(), NewVal);
}
+ // In other cases the element type is illegal and needs to be expanded, for
+ // example v2i64 on MIPS32. In this case, find the nearest legal type, split
+ // the value into n parts and use a vector type with n-times the elements.
+ // Then bitcast to the type requested.
+ // Legalizing constants too early makes the DAGCombiner's job harder so we
+ // only legalize if the DAG tells us we must produce legal types.
+ else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
+ TLI->getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypeExpandInteger) {
+ APInt NewVal = Elt->getValue();
+ EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
+ unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
+ EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);
+
+ // Check the temporary vector is the correct size. If this fails then
+ // getTypeToTransformTo() probably returned a type whose size (in bits)
+ // isn't a power-of-2 factor of the requested type size.
+ assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SmallVector<SDValue, 2> EltParts;
+ for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
+ EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
+ .trunc(ViaEltSizeInBits),
+ ViaEltVT, isT));
+ }
+
+ // EltParts is currently in little endian order. If we actually want
+ // big-endian order then reverse it now.
+ if (TLI->isBigEndian())
+ std::reverse(EltParts.begin(), EltParts.end());
+
+ // The elements must be reversed when the element order is different
+ // to the endianness of the elements (because the BITCAST is itself a
+ // vector shuffle in this situation). However, we do not need any code to
+ // perform this reversal because getConstant() is producing a vector
+ // splat.
+ // This situation occurs in MIPS MSA.
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
+ Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
+
+ SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT,
+ getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT,
+ &Ops[0], Ops.size()));
+ return Result;
+ }
assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
"APInt size does not match type size!");
@@ -1077,9 +1128,10 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
unsigned char TargetFlags) {
assert((TargetFlags == 0 || isTargetGA) &&
"Cannot set target flags on target-independent globals");
+ const TargetLowering *TLI = TM.getTargetLowering();
// Truncate (with sign-extension) the offset value to the pointer size.
- unsigned BitWidth = TM.getTargetLowering()->getPointerTy().getSizeInBits();
+ unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType());
if (BitWidth < 64)
Offset = SignExtend64(Offset, BitWidth);
@@ -1298,11 +1350,8 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
SDValue N2, const int *Mask) {
- assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
- assert(VT.isVector() && N1.getValueType().isVector() &&
- "Vector Shuffle VTs must be a vectors");
- assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
- && "Vector Shuffle VTs must have same element type");
+ assert(VT == N1.getValueType() && VT == N2.getValueType() &&
+ "Invalid VECTOR_SHUFFLE");
// Canonicalize shuffle undef, undef -> undef
if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
@@ -1351,17 +1400,13 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
commuteShuffle(N1, N2, MaskVec);
}
- // If Identity shuffle, or all shuffle in to undef, return that node.
- bool AllUndef = true;
+ // If Identity shuffle return that node.
bool Identity = true;
for (unsigned i = 0; i != NElts; ++i) {
if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
- if (MaskVec[i] >= 0) AllUndef = false;
}
- if (Identity && NElts == N1.getValueType().getVectorNumElements())
+ if (Identity && NElts)
return N1;
- if (AllUndef)
- return getUNDEF(VT);
FoldingSetNodeID ID;
SDValue Ops[2] = { N1, N2 };
@@ -1380,7 +1425,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
ShuffleVectorSDNode *N =
- new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(), dl.getDebugLoc(), N1, N2, MaskAlloc);
+ new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(),
+ dl.getDebugLoc(), N1, N2,
+ MaskAlloc);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -1403,8 +1450,9 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), dl.getDebugLoc(), Ops, 5,
- Code);
+ CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(),
+ dl.getDebugLoc(),
+ Ops, 5, Code);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -1447,7 +1495,8 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) {
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), dl.getDebugLoc(), Root, Label);
+ SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(),
+ dl.getDebugLoc(), Root, Label);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -1510,6 +1559,26 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
return SDValue(N, 0);
}
+/// getAddrSpaceCast - Return an AddrSpaceCastSDNode.
+SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
+ unsigned SrcAS, unsigned DestAS) {
+ SDValue Ops[] = {Ptr};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), &Ops[0], 1);
+ ID.AddInteger(SrcAS);
+ ID.AddInteger(DestAS);
+
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(),
+ dl.getDebugLoc(),
+ VT, Ptr, SrcAS, DestAS);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
/// getShiftAmountOperand - Return the specified value casted to
/// the target's desired shift amount type.
@@ -1561,7 +1630,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
case ISD::SETFALSE:
case ISD::SETFALSE2: return getConstant(0, VT);
case ISD::SETTRUE:
- case ISD::SETTRUE2: return getConstant(1, VT);
+ case ISD::SETTRUE2: {
+ const TargetLowering *TLI = TM.getTargetLowering();
+ TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector());
+ return getConstant(
+ Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
+ }
case ISD::SETOEQ:
case ISD::SETOGT:
@@ -1643,7 +1717,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
}
} else {
// Ensure that the constant occurs on the RHS.
- return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+ ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
+ MVT CompVT = N1.getValueType().getSimpleVT();
+ if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT))
+ return SDValue();
+
+ return getSetCC(dl, VT, N2, N1, SwappedCond);
}
}
@@ -1942,7 +2021,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
case ISD::SIGN_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InSignBit = APInt::getSignBit(InBits);
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
@@ -2054,7 +2132,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
const APInt &RA = Rem->getAPIntValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
// The low bits of the first operand are unchanged by the srem.
@@ -2150,7 +2227,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
}
case ISD::SIGN_EXTEND:
- Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ Tmp =
+ VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
case ISD::SIGN_EXTEND_INREG:
@@ -2411,7 +2489,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), getVTList(VT));
+ SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), getVTList(VT));
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -2672,10 +2751,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Operand);
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, Operand);
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Operand);
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, Operand);
}
AllNodes.push_back(N);
@@ -3073,9 +3154,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
if (VT.isSimple() && N1.getValueType().isSimple()) {
assert(VT.isVector() && N1.getValueType().isVector() &&
"Extract subvector VTs must be a vectors!");
- assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+ assert(VT.getVectorElementType() ==
+ N1.getValueType().getVectorElementType() &&
"Extract subvector VTs must have the same element type!");
- assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
"Extract subvector must be from larger vector to smaller vector!");
if (isa<ConstantSDNode>(Index.getNode())) {
@@ -3086,7 +3168,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
// Trivial extraction.
- if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+ if (VT.getSimpleVT() == N1.getSimpleValueType())
return N1;
}
break;
@@ -3244,10 +3326,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, N1, N2);
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, N1, N2);
}
AllNodes.push_back(N);
@@ -3316,7 +3400,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
"Insert subvector VTs must be a vectors");
assert(VT == N1.getValueType() &&
"Dest and insert subvector source types must match!");
- assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ assert(N2.getSimpleValueType() <= N1.getSimpleValueType() &&
"Insert subvector must be from smaller vector to larger vector!");
if (isa<ConstantSDNode>(Index.getNode())) {
assert((N2.getValueType().getVectorNumElements() +
@@ -3326,7 +3410,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
}
// Trivial insertion.
- if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+ if (VT.getSimpleVT() == N2.getSimpleValueType())
return N2;
}
break;
@@ -3349,10 +3433,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, N3);
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, N1, N2, N3);
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, N3);
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, N1, N2, N3);
}
AllNodes.push_back(N);
@@ -3771,7 +3857,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
unsigned VTSize = VT.getSizeInBits() / 8;
- SDValue Value, Store;
+ SDValue Value;
Value = DAG.getLoad(VT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, dl, DAG),
@@ -3787,7 +3873,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
unsigned VTSize = VT.getSizeInBits() / 8;
- SDValue Value, Store;
+ SDValue Store;
Store = DAG.getStore(Chain, dl, LoadValues[i],
getMemBasePlusOffset(Dst, DstOff, dl, DAG),
@@ -3800,6 +3886,24 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
&OutChains[0], OutChains.size());
}
+/// \brief Lower the call to 'memset' intrinsic function into a series of store
+/// operations.
+///
+/// \param DAG Selection DAG where lowered code is placed.
+/// \param dl Link to corresponding IR location.
+/// \param Chain Control flow dependency.
+/// \param Dst Pointer to destination memory location.
+/// \param Src Value of byte to write into the memory.
+/// \param Size Number of bytes to write.
+/// \param Align Alignment of the destination in bytes.
+/// \param isVol True if destination is volatile.
+/// \param DstPtrInfo IR information on the memory pointer.
+/// \returns New head in the control flow, if lowering was successful, empty
+/// SDValue otherwise.
+///
+/// The function tries to replace 'llvm.memset' intrinsic with several store
+/// operations and value calculation code. This is usually profitable for small
+/// memory size.
static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, uint64_t Size,
@@ -4078,6 +4182,37 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
+ SDVTList VTList, SDValue* Ops, unsigned NumOps,
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ // Allocate the operands array for the node out of the BumpPtrAllocator, since
+ // SDNode doesn't have access to it. This memory will be "leaked" when
+ // the node is deallocated, but recovered when the allocator is released.
+ // If the number of operands is less than 5 we use AtomicSDNode's internal
+ // storage.
+ SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) : 0;
+
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(),
+ dl.getDebugLoc(), VTList, MemVT,
+ Ops, DynOps, NumOps, MMO,
+ Ordering, SynchScope);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Cmp,
SDValue Swp, MachinePointerInfo PtrInfo,
unsigned Alignment,
@@ -4117,22 +4252,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
EVT VT = Cmp.getValueType();
SDVTList VTs = getVTList(VT, MVT::Other);
- FoldingSetNodeID ID;
- ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
- AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
- ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void* IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
- cast<AtomicSDNode>(E)->refineAlignment(MMO);
- return SDValue(E, 0);
- }
- SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, Chain,
- Ptr, Cmp, Swp, MMO, Ordering,
- SynchScope);
- CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
- return SDValue(N, 0);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
@@ -4190,22 +4311,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
getVTList(VT, MVT::Other);
- FoldingSetNodeID ID;
- ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Val};
- AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
- ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void* IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
- cast<AtomicSDNode>(E)->refineAlignment(MMO);
- return SDValue(E, 0);
- }
- SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, Chain,
- Ptr, Val, MMO,
- Ordering, SynchScope);
- CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
- return SDValue(N, 0);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
@@ -4248,21 +4355,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
SDVTList VTs = getVTList(VT, MVT::Other);
- FoldingSetNodeID ID;
- ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr};
- AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
- ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void* IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
- cast<AtomicSDNode>(E)->refineAlignment(MMO);
- return SDValue(E, 0);
- }
- SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, Chain,
- Ptr, MMO, Ordering, SynchScope);
- CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
- return SDValue(N, 0);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope);
}
/// getMergeValues - Create a MERGE_VALUES node from the given operands.
@@ -4339,12 +4433,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
return SDValue(E, 0);
}
- N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, NumOps,
- MemVT, MMO);
+ N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(),
+ dl.getDebugLoc(), VTList, Ops,
+ NumOps, MemVT, MMO);
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, NumOps,
- MemVT, MMO);
+ N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(),
+ dl.getDebugLoc(), VTList, Ops,
+ NumOps, MemVT, MMO);
}
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -4458,7 +4554,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
cast<LoadSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ExtType,
+ SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(),
+ dl.getDebugLoc(), VTs, AM, ExtType,
MemVT, MMO);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -4478,6 +4575,14 @@ SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl,
TBAAInfo, Ranges);
}
+SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl,
+ SDValue Chain, SDValue Ptr,
+ MachineMemOperand *MMO) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ VT, MMO);
+}
+
SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
SDValue Chain, SDValue Ptr,
MachinePointerInfo PtrInfo, EVT MemVT,
@@ -4490,6 +4595,14 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
}
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
+ SDValue Chain, SDValue Ptr, EVT MemVT,
+ MachineMemOperand *MMO) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
+ MemVT, MMO);
+}
+
SDValue
SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM) {
@@ -4548,8 +4661,9 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
cast<StoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, ISD::UNINDEXED,
- false, VT, MMO);
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(),
+ dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, false, VT, MMO);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -4616,8 +4730,9 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
cast<StoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, ISD::UNINDEXED,
- true, SVT, MMO);
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(),
+ dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, true, SVT, MMO);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -4640,7 +4755,8 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(),
+ dl.getDebugLoc(), VTs, AM,
ST->isTruncatingStore(),
ST->getMemoryVT(),
ST->getMemOperand());
@@ -4715,10 +4831,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Ops, NumOps);
+ N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
+ VTs, Ops, NumOps);
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Ops, NumOps);
+ N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
+ VTs, Ops, NumOps);
}
AllNodes.push_back(N);
@@ -4781,26 +4899,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
return SDValue(E, 0);
if (NumOps == 1) {
- N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0]);
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0]);
} else if (NumOps == 2) {
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1]);
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0],
+ Ops[1]);
} else if (NumOps == 3) {
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1],
- Ops[2]);
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0],
+ Ops[1], Ops[2]);
} else {
- N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops, NumOps);
+ N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
+ VTList, Ops, NumOps);
}
CSEMap.InsertNode(N, IP);
} else {
if (NumOps == 1) {
- N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0]);
+ N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0]);
} else if (NumOps == 2) {
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1]);
+ N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0],
+ Ops[1]);
} else if (NumOps == 3) {
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1],
- Ops[2]);
+ N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTList, Ops[0],
+ Ops[1], Ops[2]);
} else {
- N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops, NumOps);
+ N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
+ VTList, Ops, NumOps);
}
}
AllNodes.push_back(N);
@@ -4851,76 +4979,81 @@ SDVTList SelectionDAG::getVTList(EVT VT) {
}
SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
- for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
- E = VTList.rend(); I != E; ++I)
- if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
- return *I;
-
- EVT *Array = Allocator.Allocate<EVT>(2);
- Array[0] = VT1;
- Array[1] = VT2;
- SDVTList Result = makeVTList(Array, 2);
- VTList.push_back(Result);
- return Result;
+ FoldingSetNodeID ID;
+ ID.AddInteger(2U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+
+ void *IP = 0;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (Result == NULL) {
+ EVT *Array = Allocator.Allocate<EVT>(2);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
}
SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
- for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
- E = VTList.rend(); I != E; ++I)
- if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
- I->VTs[2] == VT3)
- return *I;
-
- EVT *Array = Allocator.Allocate<EVT>(3);
- Array[0] = VT1;
- Array[1] = VT2;
- Array[2] = VT3;
- SDVTList Result = makeVTList(Array, 3);
- VTList.push_back(Result);
- return Result;
+ FoldingSetNodeID ID;
+ ID.AddInteger(3U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+ ID.AddInteger(VT3.getRawBits());
+
+ void *IP = 0;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (Result == NULL) {
+ EVT *Array = Allocator.Allocate<EVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
}
SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
- for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
- E = VTList.rend(); I != E; ++I)
- if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
- I->VTs[2] == VT3 && I->VTs[3] == VT4)
- return *I;
-
- EVT *Array = Allocator.Allocate<EVT>(4);
- Array[0] = VT1;
- Array[1] = VT2;
- Array[2] = VT3;
- Array[3] = VT4;
- SDVTList Result = makeVTList(Array, 4);
- VTList.push_back(Result);
- return Result;
+ FoldingSetNodeID ID;
+ ID.AddInteger(4U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+ ID.AddInteger(VT3.getRawBits());
+ ID.AddInteger(VT4.getRawBits());
+
+ void *IP = 0;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (Result == NULL) {
+ EVT *Array = Allocator.Allocate<EVT>(4);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Array[3] = VT4;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
}
SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
- switch (NumVTs) {
- case 0: llvm_unreachable("Cannot have nodes without results!");
- case 1: return getVTList(VTs[0]);
- case 2: return getVTList(VTs[0], VTs[1]);
- case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
- case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]);
- default: break;
+ FoldingSetNodeID ID;
+ ID.AddInteger(NumVTs);
+ for (unsigned index = 0; index < NumVTs; index++) {
+ ID.AddInteger(VTs[index].getRawBits());
}
- for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
- E = VTList.rend(); I != E; ++I) {
- if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
- continue;
-
- if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2]))
- return *I;
+ void *IP = 0;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (Result == NULL) {
+ EVT *Array = Allocator.Allocate<EVT>(NumVTs);
+ std::copy(VTs, VTs + NumVTs, Array);
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs);
+ VTListMap.InsertNode(Result, IP);
}
-
- EVT *Array = Allocator.Allocate<EVT>(NumVTs);
- std::copy(VTs, VTs+NumVTs, Array);
- SDVTList Result = makeVTList(Array, NumVTs);
- VTList.push_back(Result);
- return Result;
+ return Result->getSDVTList();
}
@@ -5410,7 +5543,8 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
}
// Allocate a new MachineSDNode.
- N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs);
// Initialize the operands list.
if (NumOps > array_lengthof(N->LocalOperands))
@@ -5916,6 +6050,12 @@ GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,
TheGlobal = GA;
}
+AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, DebugLoc dl, EVT VT,
+ SDValue X, unsigned SrcAS,
+ unsigned DestAS)
+ : UnarySDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT), X),
+ SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {}
+
MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
EVT memvt, MachineMemOperand *mmo)
: SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
@@ -6162,8 +6302,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
case ISD::ROTL:
case ISD::ROTR:
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
- getShiftAmountOperand(Operands[0].getValueType(),
- Operands[1])));
+ getShiftAmountOperand(Operands[0].getValueType(),
+ Operands[1])));
break;
case ISD::SIGN_EXTEND_INREG:
case ISD::FP_ROUND_INREG: {
@@ -6235,7 +6375,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
int64_t GVOffset = 0;
const TargetLowering *TLI = TM.getTargetLowering();
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
- unsigned PtrWidth = TLI->getPointerTy().getSizeInBits();
+ unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());
APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
TLI->getDataLayout());
@@ -6268,6 +6408,38 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
return 0;
}
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split (or expanded) into two not necessarily identical pieces.
+std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
+ // Currently all types are split in half.
+ EVT LoVT, HiVT;
+ if (!VT.isVector()) {
+ LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT);
+ } else {
+ unsigned NumElements = VT.getVectorNumElements();
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+ LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(),
+ NumElements/2);
+ }
+ return std::make_pair(LoVT, HiVT);
+}
+
+/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the
+/// low/high part.
+std::pair<SDValue, SDValue>
+SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
+ const EVT &HiVT) {
+ assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <=
+ N.getValueType().getVectorNumElements() &&
+ "More vector elements requested than available!");
+ SDValue Lo, Hi;
+ Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
+ getConstant(0, TLI->getVectorIdxTy()));
+ Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
+ getConstant(LoVT.getVectorNumElements(), TLI->getVectorIdxTy()));
+ return std::make_pair(Lo, Hi);
+}
+
// getAddressSpace - Return the address space this GlobalAddress belongs to.
unsigned GlobalAddressSDNode::getAddressSpace() const {
return getGlobal()->getType()->getAddressSpace();
@@ -6389,7 +6561,7 @@ static void checkForCyclesHelper(const SDNode *N,
void llvm::checkForCycles(const llvm::SDNode *N) {
#ifdef XDEBUG
- assert(N && "Checking nonexistant SDNode");
+ assert(N && "Checking nonexistent SDNode");
SmallPtrSet<const SDNode*, 32> visited;
SmallPtrSet<const SDNode*, 32> checked;
checkForCyclesHelper(N, visited, checked);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index b9f4381..2b2713d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/DebugInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
@@ -49,7 +50,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/IntegersSubsetMapping.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -58,6 +58,7 @@
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
#include <algorithm>
using namespace llvm;
@@ -1063,8 +1064,10 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
- if (isa<ConstantPointerNull>(C))
- return DAG.getConstant(0, TLI->getPointerTy());
+ if (isa<ConstantPointerNull>(C)) {
+ unsigned AS = V->getType()->getPointerAddressSpace();
+ return DAG.getConstant(0, TLI->getPointerTy(AS));
+ }
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return DAG.getConstantFP(*CFP, VT);
@@ -1268,7 +1271,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
for (unsigned i = 0; i < NumParts; ++i) {
Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
- /*isfixed=*/true, 0, 0));
+ VT, /*isfixed=*/true, 0, 0));
OutVals.push_back(Parts[i]);
}
}
@@ -1617,8 +1620,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
} else
Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
} else {
- assert(CB.CC == ISD::SETCC_INVALID &&
- "Condition is undefined for to-the-range belonging check.");
+ assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
@@ -1626,9 +1628,9 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
SDValue CmpOp = getValue(CB.CmpMHS);
EVT VT = CmpOp.getValueType();
- if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) {
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
- ISD::SETULE);
+ ISD::SETLE);
} else {
SDValue SUB = DAG.getNode(ISD::SUB, dl,
VT, CmpOp, DAG.getConstant(Low, VT));
@@ -1741,6 +1743,77 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
DAG.setRoot(BrCond);
}
+/// Codegen a new tail for a stack protector check ParentMBB which has had its
+/// tail spliced into a stack protector check success bb.
+///
+/// For a high level explanation of how this fits into the stack protector
+/// generation see the comment on the declaration of class
+/// StackProtectorDescriptor.
+void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB) {
+
+ // First create the loads to the guard/stack slot for the comparison.
+ const TargetLowering *TLI = TM.getTargetLowering();
+ EVT PtrTy = TLI->getPointerTy();
+
+ MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo();
+ int FI = MFI->getStackProtectorIndex();
+
+ const Value *IRGuard = SPD.getGuard();
+ SDValue GuardPtr = getValue(IRGuard);
+ SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
+
+ unsigned Align =
+ TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType());
+ SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
+ GuardPtr, MachinePointerInfo(IRGuard, 0),
+ true, false, false, Align);
+
+ SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
+ StackSlotPtr,
+ MachinePointerInfo::getFixedStack(FI),
+ true, false, false, Align);
+
+ // Perform the comparison via a subtract/getsetcc.
+ EVT VT = Guard.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot);
+
+ SDValue Cmp = DAG.getSetCC(getCurSDLoc(),
+ TLI->getSetCCResultType(*DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(0, VT),
+ ISD::SETNE);
+
+ // If the sub is not 0, then we know the guard/stackslot do not equal, so
+ // branch to failure MBB.
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
+ MVT::Other, StackSlot.getOperand(0),
+ Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
+ // Otherwise branch to success MBB.
+ SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(),
+ MVT::Other, BrCond,
+ DAG.getBasicBlock(SPD.getSuccessMBB()));
+
+ DAG.setRoot(Br);
+}
+
+/// Codegen the failure basic block for a stack protector check.
+///
+/// A failure stack protector machine basic block consists simply of a call to
+/// __stack_chk_fail().
+///
+/// For a high level explanation of how this fits into the stack protector
+/// generation see the comment on the declaration of class
+/// StackProtectorDescriptor.
+void
+SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
+ const TargetLowering *TLI = TM.getTargetLowering();
+ SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL,
+ MVT::isVoid, 0, 0, false, getCurSDLoc(),
+ false, false).second;
+ DAG.setRoot(Chain);
+}
+
/// visitBitTestHeader - This function emits necessary code to produce value
/// suitable for "bit tests"
void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
@@ -2073,7 +2146,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
CC = ISD::SETEQ;
LHS = SV; RHS = I->High; MHS = NULL;
} else {
- CC = ISD::SETCC_INVALID;
+ CC = ISD::SETLE;
LHS = I->Low; MHS = SV; RHS = I->High;
}
@@ -2107,7 +2180,7 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) {
static APInt ComputeRange(const APInt &First, const APInt &Last) {
uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
- APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth);
+ APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
return (LastExt - FirstExt + 1ULL);
}
@@ -2174,7 +2247,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
const APInt &High = cast<ConstantInt>(I->High)->getValue();
- if (Low.ule(TEI) && TEI.ule(High)) {
+ if (Low.sle(TEI) && TEI.sle(High)) {
DestBBs.push_back(I->BB);
if (TEI==High)
++I;
@@ -2348,7 +2421,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
// Create a CaseBlock record representing a conditional branch to
// the LHS node if the value being switched on SV is less than C.
// Otherwise, branch to LHS.
- CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+ CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
if (CR.CaseBB == SwitchBB)
visitSwitchCase(CB, SwitchBB);
@@ -2378,7 +2451,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
MachineFunction *CurMF = FuncInfo.MF;
// If target does not have legal shift left, do not emit bit tests at all.
- if (!TLI->isOperationLegal(ISD::SHL, TLI->getPointerTy()))
+ if (!TLI->isOperationLegal(ISD::SHL, PTy))
return false;
size_t numCmps = 0;
@@ -2421,7 +2494,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
// Optimize the case where all the case values fit in a
// word without having to subtract minValue. In this case,
// we can optimize away the subtraction.
- if (maxValue.ult(IntPtrBits)) {
+ if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
cmpRange = maxValue;
} else {
lowBound = minValue;
@@ -2496,12 +2569,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
/// Clusterify - Transform simple list of Cases into list of CaseRange's
size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
const SwitchInst& SI) {
-
- /// Use a shorter form of declaration, and also
- /// show the we want to use CRSBuilder as Clusterifier.
- typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier;
-
- Clusterifier TheClusterifier;
+ size_t numCmps = 0;
BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
@@ -2510,27 +2578,40 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
const BasicBlock *SuccBB = i.getCaseSuccessor();
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
- TheClusterifier.add(i.getCaseValueEx(), SMBB,
- BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0);
- }
-
- TheClusterifier.optimize();
-
- size_t numCmps = 0;
- for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
- e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
- Clusterifier::Cluster &C = *i;
- // Update edge weight for the cluster.
- unsigned W = C.first.Weight;
-
- // FIXME: Currently work with ConstantInt based numbers.
- // Changing it to APInt based is a pretty heavy for this commit.
- Cases.push_back(Case(C.first.getLow().toConstantInt(),
- C.first.getHigh().toConstantInt(), C.second, W));
+ uint32_t ExtraWeight =
+ BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0;
+
+ Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
+ SMBB, ExtraWeight));
+ }
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size() >= 2)
+ // Must recompute end() each iteration because it may be
+ // invalidated by erase if we hold on to it
+ for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+ J != Cases.end(); ) {
+ const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
+ const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
+ MachineBasicBlock* nextBB = J->BB;
+ MachineBasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ I->ExtraWeight += J->ExtraWeight;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
- if (C.first.getLow() != C.first.getHigh())
- // A range counts double, since it requires two compares.
- ++numCmps;
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
+ // A range counts double, since it requires two compares.
+ ++numCmps;
}
return numCmps;
@@ -2859,6 +2940,21 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
setValue(&I, N); // noop cast.
}
+void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const Value *SV = I.getOperand(0);
+ SDValue N = getValue(SV);
+ EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+
+ unsigned SrcAS = SV->getType()->getPointerAddressSpace();
+ unsigned DestAS = I.getType()->getPointerAddressSpace();
+
+ if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+ N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
+
+ setValue(&I, N);
+}
+
void SelectionDAGBuilder::visitInsertElement(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue InVec = getValue(I.getOperand(0));
@@ -3151,10 +3247,12 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
}
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
- SDValue N = getValue(I.getOperand(0));
+ Value *Op0 = I.getOperand(0);
// Note that the pointer operand may be a vector of pointers. Take the scalar
// element which holds a pointer.
- Type *Ty = I.getOperand(0)->getType()->getScalarType();
+ Type *Ty = Op0->getType()->getScalarType();
+ unsigned AS = Ty->getPointerAddressSpace();
+ SDValue N = getValue(Op0);
for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
OI != E; ++OI) {
@@ -3179,14 +3277,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
uint64_t Offs =
TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
SDValue OffsVal;
- EVT PTy = TLI->getPointerTy();
+ EVT PTy = TLI->getPointerTy(AS);
unsigned PtrBits = PTy.getSizeInBits();
if (PtrBits < 64)
- OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(),
- TLI->getPointerTy(),
+ OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
DAG.getConstant(Offs, MVT::i64));
else
- OffsVal = DAG.getIntPtrConstant(Offs);
+ OffsVal = DAG.getConstant(Offs, PTy);
N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
OffsVal);
@@ -3194,7 +3291,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
}
// N = N + Idx * ElementSize;
- APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(),
+ APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS),
TD->getTypeAllocSize(Ty));
SDValue IdxN = getValue(Idx);
@@ -3451,7 +3548,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDValue L =
DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
- getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
+ getValue(I.getCompareOperand()).getSimpleValueType(),
InChain,
getValue(I.getPointerOperand()),
getValue(I.getCompareOperand()),
@@ -3499,7 +3596,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
SDValue L =
DAG.getAtomic(NT, dl,
- getValue(I.getValOperand()).getValueType().getSimpleVT(),
+ getValue(I.getValOperand()).getSimpleValueType(),
InChain,
getValue(I.getPointerOperand()),
getValue(I.getValOperand()),
@@ -4193,7 +4290,7 @@ static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const TargetLowering &TLI) {
bool IsExp10 = false;
- if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 &&
+ if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
APFloat Ten(10.0f);
@@ -4705,14 +4802,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl,
TLI->getPointerTy());
SDValue Offset = DAG.getNode(ISD::ADD, sdl,
- TLI->getPointerTy(),
+ CfaArg.getValueType(),
DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl,
- TLI->getPointerTy()),
+ CfaArg.getValueType()),
CfaArg);
SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl,
TLI->getPointerTy(),
DAG.getConstant(0, TLI->getPointerTy()));
- setValue(&I, DAG.getNode(ISD::ADD, sdl, TLI->getPointerTy(),
+ setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
FA, Offset));
return 0;
}
@@ -4902,7 +4999,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
- case Intrinsic::nearbyint: {
+ case Intrinsic::nearbyint:
+ case Intrinsic::round: {
unsigned Opcode;
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@@ -4915,6 +5013,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
case Intrinsic::rint: Opcode = ISD::FRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
}
setValue(&I, DAG.getNode(Opcode, sdl,
@@ -4922,6 +5021,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0))));
return 0;
}
+ case Intrinsic::copysign:
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return 0;
case Intrinsic::fma:
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -5207,9 +5312,30 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::invariant_end:
// Discard region information.
return 0;
+ case Intrinsic::stackprotectorcheck: {
+ // Do not actually emit anything for this basic block. Instead we initialize
+ // the stack protector descriptor and export the guard variable so we can
+ // access it in FinishBasicBlock.
+ const BasicBlock *BB = I.getParent();
+ SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I);
+ ExportFromCurrentBlock(SPDescriptor.getGuard());
+
+ // Flush our exports since we are going to process a terminator.
+ (void)getControlRoot();
+ return 0;
+ }
case Intrinsic::donothing:
// ignore
return 0;
+ case Intrinsic::experimental_stackmap: {
+ visitStackmap(I);
+ return 0;
+ }
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64: {
+ visitPatchpoint(I);
+ return 0;
+ }
}
}
@@ -5274,15 +5400,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode; Entry.Ty = V->getType();
- unsigned attrInd = i - CS.arg_begin() + 1;
- Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
- Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
- Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
- Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
- Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
- Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
- Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned);
- Entry.Alignment = CS.getParamAlignment(attrInd);
+ // Skip the first return-type Attribute to get to params.
+ Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
Args.push_back(Entry);
}
@@ -5364,8 +5483,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
}
if (!Result.second.getNode()) {
- // As a special case, a null chain means that a tail call has been emitted and
- // the DAG root is already updated.
+ // As a special case, a null chain means that a tail call has been emitted
+ // and the DAG root is already updated.
HasTailCall = true;
// Since there's no actual continuation from this block, nothing can be
@@ -5445,6 +5564,18 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
return LoadVal;
}
+/// processIntegerCallValue - Record the value for an instruction that
+/// produces an integer result, converting the type where necessary.
+void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
+ SDValue Value,
+ bool IsSigned) {
+ EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true);
+ if (IsSigned)
+ Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
+ else
+ Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
+ setValue(&I, Value);
+}
/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
/// If so, return true and lower it, otherwise return false and it will be
@@ -5460,15 +5591,33 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
!I.getType()->isIntegerTy())
return false;
- const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
+ const Value *Size = I.getArgOperand(2);
+ const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
+ if (CSize && CSize->getZExtValue() == 0) {
+ EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true);
+ setValue(&I, DAG.getConstant(0, CallVT));
+ return true;
+ }
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(LHS), getValue(RHS), getValue(Size),
+ MachinePointerInfo(LHS),
+ MachinePointerInfo(RHS));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, true);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
- if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
+ if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) {
bool ActuallyDoIt = true;
MVT LoadVT;
Type *LoadTy;
- switch (Size->getZExtValue()) {
+ switch (CSize->getZExtValue()) {
default:
LoadVT = MVT::Other;
LoadTy = 0;
@@ -5476,20 +5625,20 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
break;
case 2:
LoadVT = MVT::i16;
- LoadTy = Type::getInt16Ty(Size->getContext());
+ LoadTy = Type::getInt16Ty(CSize->getContext());
break;
case 4:
LoadVT = MVT::i32;
- LoadTy = Type::getInt32Ty(Size->getContext());
+ LoadTy = Type::getInt32Ty(CSize->getContext());
break;
case 8:
LoadVT = MVT::i64;
- LoadTy = Type::getInt64Ty(Size->getContext());
+ LoadTy = Type::getInt64Ty(CSize->getContext());
break;
/*
case 16:
LoadVT = MVT::v4i32;
- LoadTy = Type::getInt32Ty(Size->getContext());
+ LoadTy = Type::getInt32Ty(CSize->getContext());
LoadTy = VectorType::get(LoadTy, 4);
break;
*/
@@ -5503,7 +5652,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
// supports unaligned loads of that type. Expanding into byte loads would
// bloat the code.
const TargetLowering *TLI = TM.getTargetLowering();
- if (ActuallyDoIt && Size->getZExtValue() > 4) {
+ if (ActuallyDoIt && CSize->getZExtValue() > 4) {
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT))
@@ -5516,8 +5665,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal,
ISD::SETNE);
- EVT CallVT = TLI->getValueType(I.getType(), true);
- setValue(&I, DAG.getZExtOrTrunc(Res, getCurSDLoc(), CallVT));
+ processIntegerCallValue(I, Res, false);
return true;
}
}
@@ -5526,6 +5674,148 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
return false;
}
+/// visitMemChrCall -- See if we can lower a memchr call into an optimized
+/// form. If so, return true and lower it, otherwise return false and it
+/// will be lowered like a normal call.
+bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
+ // Verify that the prototype makes sense. void *memchr(void *, int, size_t)
+ if (I.getNumArgOperands() != 3)
+ return false;
+
+ const Value *Src = I.getArgOperand(0);
+ const Value *Char = I.getArgOperand(1);
+ const Value *Length = I.getArgOperand(2);
+ if (!Src->getType()->isPointerTy() ||
+ !Char->getType()->isIntegerTy() ||
+ !Length->getType()->isIntegerTy() ||
+ !I.getType()->isPointerTy())
+ return false;
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Src), getValue(Char), getValue(Length),
+ MachinePointerInfo(Src));
+ if (Res.first.getNode()) {
+ setValue(&I, Res.first);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an
+/// optimized form. If so, return true and lower it, otherwise return false
+/// and it will be lowered like a normal call.
+bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
+ // Verify that the prototype makes sense. char *strcpy(char *, char *)
+ if (I.getNumArgOperands() != 2)
+ return false;
+
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+ if (!Arg0->getType()->isPointerTy() ||
+ !Arg1->getType()->isPointerTy() ||
+ !I.getType()->isPointerTy())
+ return false;
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0),
+ MachinePointerInfo(Arg1), isStpcpy);
+ if (Res.first.getNode()) {
+ setValue(&I, Res.first);
+ DAG.setRoot(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form.
+/// If so, return true and lower it, otherwise return false and it will be
+/// lowered like a normal call.
+bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
+ // Verify that the prototype makes sense. int strcmp(void*,void*)
+ if (I.getNumArgOperands() != 2)
+ return false;
+
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+ if (!Arg0->getType()->isPointerTy() ||
+ !Arg1->getType()->isPointerTy() ||
+ !I.getType()->isIntegerTy())
+ return false;
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0),
+ MachinePointerInfo(Arg1));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, true);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitStrLenCall -- See if we can lower a strlen call into an optimized
+/// form. If so, return true and lower it, otherwise return false and it
+/// will be lowered like a normal call.
+bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
+ // Verify that the prototype makes sense. size_t strlen(char *)
+ if (I.getNumArgOperands() != 1)
+ return false;
+
+ const Value *Arg0 = I.getArgOperand(0);
+ if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy())
+ return false;
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), MachinePointerInfo(Arg0));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, false);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized
+/// form. If so, return true and lower it, otherwise return false and it
+/// will be lowered like a normal call.
+bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
+ // Verify that the prototype makes sense. size_t strnlen(char *, size_t)
+ if (I.getNumArgOperands() != 2)
+ return false;
+
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+ if (!Arg0->getType()->isPointerTy() ||
+ !Arg1->getType()->isIntegerTy() ||
+ !I.getType()->isIntegerTy())
+ return false;
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, false);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
/// visitUnaryFloatCall - If a call instruction is a unary floating-point
/// operation (as expected), translate it to an SDNode with the specified opcode
/// and return true.
@@ -5644,6 +5934,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FRINT))
return;
break;
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ if (visitUnaryFloatCall(I, ISD::FROUND))
+ return;
+ break;
case LibFunc::trunc:
case LibFunc::truncf:
case LibFunc::truncl:
@@ -5666,6 +5962,30 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitMemCmpCall(I))
return;
break;
+ case LibFunc::memchr:
+ if (visitMemChrCall(I))
+ return;
+ break;
+ case LibFunc::strcpy:
+ if (visitStrCpyCall(I, false))
+ return;
+ break;
+ case LibFunc::stpcpy:
+ if (visitStrCpyCall(I, true))
+ return;
+ break;
+ case LibFunc::strcmp:
+ if (visitStrCmpCall(I))
+ return;
+ break;
+ case LibFunc::strlen:
+ if (visitStrLenCall(I))
+ return;
+ break;
+ case LibFunc::strnlen:
+ if (visitStrNLenCall(I))
+ return;
+ break;
}
}
}
@@ -6421,6 +6741,248 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
DAG.getSrcValue(I.getArgOperand(1))));
}
+/// \brief Lower an argument list according to the target calling convention.
+///
+/// \return A tuple of <return-value, token-chain>
+///
+/// This is a helper for lowering intrinsics that follow a target calling
+/// convention or require stack pointer adjustment. Only a subset of the
+/// intrinsic's operands need to participate in the calling convention.
+std::pair<SDValue, SDValue>
+SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
+ unsigned NumArgs, SDValue Callee,
+ bool useVoidTy) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ ImmutableCallSite CS(&CI);
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
+ ArgI != ArgE; ++ArgI) {
+ const Value *V = CI.getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = getValue(V);
+ Entry.Ty = V->getType();
+ Entry.setAttributes(&CS, AttrI);
+ Args.push_back(Entry);
+ }
+
+ Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType();
+ TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false,
+ /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs,
+ CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false,
+ /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc());
+
+ const TargetLowering *TLI = TM.getTargetLowering();
+ return TLI->LowerCallTo(CLI);
+}
+
+/// \brief Lower llvm.experimental.stackmap directly to its target opcode.
+void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
+ // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
+ // [live variables...])
+
+ assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
+
+ SDValue Callee = getValue(CI.getCalledValue());
+
+ // Lower into a call sequence with no args and no return value.
+ std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee);
+ // Set the root to the target-lowered call chain.
+ SDValue Chain = Result.second;
+ DAG.setRoot(Chain);
+
+ /// Get a call instruction from the call sequence chain.
+ /// Tail calls are not allowed.
+ SDNode *CallEnd = Chain.getNode();
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
+ "Expected a callseq node.");
+ SDNode *Call = CallEnd->getOperand(0).getNode();
+ bool hasGlue = Call->getGluedNode();
+
+ // Replace the target specific call node with the stackmap intrinsic.
+ SmallVector<SDValue, 8> Ops;
+
+ // Add the <id> and <numShadowBytes> constants.
+ for (unsigned i = 0; i < 2; ++i) {
+ SDValue tmp = getValue(CI.getOperand(i));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32));
+ }
+ // Push live variables for the stack map.
+ for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i)
+ Ops.push_back(getValue(CI.getArgOperand(i)));
+
+ // Push the chain (this is originally the first operand of the call, but
+ // becomes now the last or second to last operand).
+ Ops.push_back(*(Call->op_begin()));
+
+ // Push the glue flag (last operand).
+ if (hasGlue)
+ Ops.push_back(*(Call->op_end()-1));
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ // Replace the target specific call node with a STACKMAP node.
+ MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(),
+ NodeTys, Ops);
+
+ // StackMap generates no value, so nothing goes in the NodeMap.
+
+ // Fixup the consumers of the intrinsic. The chain and glue may be used in the
+ // call sequence.
+ DAG.ReplaceAllUsesWith(Call, MN);
+
+ DAG.DeleteNode(Call);
+}
+
+/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
+void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
+ // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
+
+ CallingConv::ID CC = CI.getCallingConv();
+ bool isAnyRegCC = CC == CallingConv::AnyReg;
+ bool hasDef = !CI.getType()->isVoidTy();
+ SDValue Callee = getValue(CI.getOperand(2)); // <target>
+
+ // Get the real number of arguments participating in the call <numArgs>
+ unsigned NumArgs =
+ cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue();
+
+ // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
+ assert(CI.getNumArgOperands() >= NumArgs + 4 &&
+ "Not enough arguments provided to the patchpoint intrinsic");
+
+ // For AnyRegCC the arguments are lowered later on manually.
+ unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs;
+ std::pair<SDValue, SDValue> Result =
+ LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC);
+
+ // Set the root to the target-lowered call chain.
+ SDValue Chain = Result.second;
+ DAG.setRoot(Chain);
+
+ SDNode *CallEnd = Chain.getNode();
+ if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
+ CallEnd = CallEnd->getOperand(0).getNode();
+
+ /// Get a call instruction from the call sequence chain.
+ /// Tail calls are not allowed.
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
+ "Expected a callseq node.");
+ SDNode *Call = CallEnd->getOperand(0).getNode();
+ bool hasGlue = Call->getGluedNode();
+
+ // Replace the target specific call node with the patchable intrinsic.
+ SmallVector<SDValue, 8> Ops;
+
+ // Add the <id> and <numNopBytes> constants.
+ for (unsigned i = 0; i < 2; ++i) {
+ SDValue tmp = getValue(CI.getOperand(i));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32));
+ }
+ // Assume that the Callee is a constant address.
+ Ops.push_back(
+ DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(),
+ /*isTarget=*/true));
+
+ // Adjust <numArgs> to account for any arguments that have been passed on the
+ // stack instead.
+ // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+ unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3);
+ NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs;
+ Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
+
+ // Add the calling convention
+ Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32));
+
+ // Add the arguments we omitted previously. The register allocator should
+ // place these in any free register.
+ if (isAnyRegCC)
+ for (unsigned i = 4, e = NumArgs + 4; i != e; ++i)
+ Ops.push_back(getValue(CI.getArgOperand(i)));
+
+ // Push the arguments from the call instruction.
+ SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1;
+ for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i)
+ Ops.push_back(*i);
+
+ // Push live variables for the stack map.
+ for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) {
+ SDValue OpVal = getValue(CI.getArgOperand(i));
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
+ Ops.push_back(
+ DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+ Ops.push_back(
+ DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
+ } else
+ Ops.push_back(OpVal);
+ }
+
+ // Push the register mask info.
+ if (hasGlue)
+ Ops.push_back(*(Call->op_end()-2));
+ else
+ Ops.push_back(*(Call->op_end()-1));
+
+ // Push the chain (this is originally the first operand of the call, but
+ // becomes now the last or second to last operand).
+ Ops.push_back(*(Call->op_begin()));
+
+ // Push the glue flag (last operand).
+ if (hasGlue)
+ Ops.push_back(*(Call->op_end()-1));
+
+ SDVTList NodeTys;
+ if (isAnyRegCC && hasDef) {
+ // Create the return types based on the intrinsic definition
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 3> ValueVTs;
+ ComputeValueVTs(TLI, CI.getType(), ValueVTs);
+ assert(ValueVTs.size() == 1 && "Expected only one return value type.");
+
+ // There is always a chain and a glue type at the end
+ ValueVTs.push_back(MVT::Other);
+ ValueVTs.push_back(MVT::Glue);
+ NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+ } else
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ // Replace the target specific call node with a PATCHPOINT node.
+ MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
+ getCurSDLoc(), NodeTys, Ops);
+
+ // Update the NodeMap.
+ if (hasDef) {
+ if (isAnyRegCC)
+ setValue(&CI, SDValue(MN, 0));
+ else
+ setValue(&CI, Result.first);
+ }
+
+ // Fixup the consumers of the intrinsic. The chain and glue may be used in the
+ // call sequence. Furthermore the location of the chain and glue can change
+ // when the AnyReg calling convention is used and the intrinsic returns a
+ // value.
+ if (isAnyRegCC && hasDef) {
+ SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
+ SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ } else
+ DAG.ReplaceAllUsesWith(Call, MN);
+ DAG.DeleteNode(Call);
+}
+
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
/// implementation, which just calls LowerCall.
/// FIXME: When all targets are
@@ -6438,6 +7000,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
MyFlags.Used = CLI.IsReturnValueUsed;
if (CLI.RetSExt)
MyFlags.Flags.setSExt();
@@ -6527,7 +7090,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
- ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
+ ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
i < CLI.NumFixedArgs,
i, j*Parts[j].getValueType().getStoreSize());
if (NumParts > 1 && j == 0)
@@ -6666,7 +7229,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
ISD::ArgFlagsTy Flags;
Flags.setSRet();
MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
- ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0);
+ ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0);
Ins.push_back(RetArg);
}
@@ -6677,6 +7240,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, I->getType(), ValueVTs);
bool isArgValueUsed = !I->use_empty();
+ unsigned PartBase = 0;
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
@@ -6714,8 +7278,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
- ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed,
- Idx-1, i*RegisterVT.getStoreSize());
+ ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
+ Idx-1, PartBase+i*RegisterVT.getStoreSize());
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
@@ -6723,6 +7287,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MyFlags.Flags.setOrigAlign(1);
Ins.push_back(MyFlags);
}
+ PartBase += VT.getStoreSize();
}
}
@@ -6940,3 +7505,22 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
ConstantsOut.clear();
}
+
+/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
+/// is 0.
+MachineBasicBlock *
+SelectionDAGBuilder::StackProtectorDescriptor::
+AddSuccessorMBB(const BasicBlock *BB,
+ MachineBasicBlock *ParentMBB,
+ MachineBasicBlock *SuccMBB) {
+ // If SuccBB has not been created yet, create it.
+ if (!SuccMBB) {
+ MachineFunction *MF = ParentMBB->getParent();
+ MachineFunction::iterator BBI = ParentMBB;
+ SuccMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(++BBI, SuccMBB);
+ }
+ // Add it as a successor of ParentMBB.
+ ParentMBB->addSuccessor(SuccMBB);
+ return SuccMBB;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index ef73c00..835f643 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -1,4 +1,4 @@
-//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- c++ -*---===//
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===//
//
// The LLVM Compiler Infrastructure
//
@@ -26,6 +26,7 @@
namespace llvm {
+class AddrSpaceCastInst;
class AliasAnalysis;
class AllocaInst;
class BasicBlock;
@@ -84,7 +85,7 @@ class SelectionDAGBuilder {
const Instruction *CurInst;
DenseMap<const Value*, SDValue> NodeMap;
-
+
/// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
/// to preserve debug information for incoming arguments.
DenseMap<const Value*, SDValue> UnusedArgNodeMap;
@@ -182,6 +183,17 @@ private:
typedef std::vector<CaseRec> CaseRecVector;
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator()(const Case &C1, const Case &C2) {
+ assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
+
struct CaseBitsCmp {
bool operator()(const CaseBits &C1, const CaseBits &C2) {
return C1.Bits > C2.Bits;
@@ -224,7 +236,7 @@ private:
struct JumpTable {
JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
-
+
/// Reg - the virtual register containing the index of the jump table entry
//. to jump to.
unsigned Reg;
@@ -278,6 +290,201 @@ private:
BitTestInfo Cases;
};
+ /// A class which encapsulates all of the information needed to generate a
+ /// stack protector check and signals to isel via its state being initialized
+ /// that a stack protector needs to be generated.
+ ///
+ /// *NOTE* The following is a high level documentation of SelectionDAG Stack
+ /// Protector Generation. The reason that it is placed here is for a lack of
+ /// other good places to stick it.
+ ///
+ /// High Level Overview of SelectionDAG Stack Protector Generation:
+ ///
+ /// Previously, generation of stack protectors was done exclusively in the
+ /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated
+ /// splitting basic blocks at the IR level to create the success/failure basic
+ /// blocks in the tail of the basic block in question. As a result of this,
+ /// calls that would have qualified for the sibling call optimization were no
+ /// longer eligible for optimization since said calls were no longer right in
+ /// the "tail position" (i.e. the immediate predecessor of a ReturnInst
+ /// instruction).
+ ///
+ /// Then it was noticed that since the sibling call optimization causes the
+ /// callee to reuse the caller's stack, if we could delay the generation of
+ /// the stack protector check until later in CodeGen after the sibling call
+ /// decision was made, we get both the tail call optimization and the stack
+ /// protector check!
+ ///
+ /// A few goals in solving this problem were:
+ ///
+ /// 1. Preserve the architecture independence of stack protector generation.
+ ///
+ /// 2. Preserve the normal IR level stack protector check for platforms like
+ /// OpenBSD for which we support platform specific stack protector
+ /// generation.
+ ///
+ /// The main problem that guided the present solution is that one can not
+ /// solve this problem in an architecture independent manner at the IR level
+ /// only. This is because:
+ ///
+ /// 1. The decision on whether or not to perform a sibling call on certain
+ /// platforms (for instance i386) requires lower level information
+ /// related to available registers that can not be known at the IR level.
+ ///
+ /// 2. Even if the previous point were not true, the decision on whether to
+ /// perform a tail call is done in LowerCallTo in SelectionDAG which
+ /// occurs after the Stack Protector Pass. As a result, one would need to
+ /// put the relevant callinst into the stack protector check success
+ /// basic block (where the return inst is placed) and then move it back
+ /// later at SelectionDAG/MI time before the stack protector check if the
+ /// tail call optimization failed. The MI level option was nixed
+ /// immediately since it would require platform specific pattern
+ /// matching. The SelectionDAG level option was nixed because
+ /// SelectionDAG only processes one IR level basic block at a time
+ /// implying one could not create a DAG Combine to move the callinst.
+ ///
+ /// To get around this problem a few things were realized:
+ ///
+ /// 1. While one can not handle multiple IR level basic blocks at the
+ /// SelectionDAG Level, one can generate multiple machine basic blocks
+ /// for one IR level basic block. This is how we handle bit tests and
+ /// switches.
+ ///
+ /// 2. At the MI level, tail calls are represented via a special return
+ /// MIInst called "tcreturn". Thus if we know the basic block in which we
+ /// wish to insert the stack protector check, we get the correct behavior
+ /// by always inserting the stack protector check right before the return
+ /// statement. This is a "magical transformation" since no matter where
+ /// the stack protector check intrinsic is, we always insert the stack
+ /// protector check code at the end of the BB.
+ ///
+ /// Given the aforementioned constraints, the following solution was devised:
+ ///
+ /// 1. On platforms that do not support SelectionDAG stack protector check
+ /// generation, allow for the normal IR level stack protector check
+ /// generation to continue.
+ ///
+ /// 2. On platforms that do support SelectionDAG stack protector check
+ /// generation:
+ ///
+ /// a. Use the IR level stack protector pass to decide if a stack
+ /// protector is required/which BB we insert the stack protector check
+ /// in by reusing the logic already therein. If we wish to generate a
+ /// stack protector check in a basic block, we place a special IR
+ /// intrinsic called llvm.stackprotectorcheck right before the BB's
+ /// returninst or if there is a callinst that could potentially be
+ /// sibling call optimized, before the call inst.
+ ///
+ /// b. Then when a BB with said intrinsic is processed, we codegen the BB
+ /// normally via SelectBasicBlock. In said process, when we visit the
+ /// stack protector check, we do not actually emit anything into the
+ /// BB. Instead, we just initialize the stack protector descriptor
+ /// class (which involves stashing information/creating the success
+ /// mbbb and the failure mbb if we have not created one for this
+ /// function yet) and export the guard variable that we are going to
+ /// compare.
+ ///
+ /// c. After we finish selecting the basic block, in FinishBasicBlock if
+ /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is
+ /// initialized, we first find a splice point in the parent basic block
+ /// before the terminator and then splice the terminator of said basic
+ /// block into the success basic block. Then we code-gen a new tail for
+ /// the parent basic block consisting of the two loads, the comparison,
+ /// and finally two branches to the success/failure basic blocks. We
+ /// conclude by code-gening the failure basic block if we have not
+ /// code-gened it already (all stack protector checks we generate in
+ /// the same function, use the same failure basic block).
+ class StackProtectorDescriptor {
+ public:
+ StackProtectorDescriptor() : ParentMBB(0), SuccessMBB(0), FailureMBB(0),
+ Guard(0) { }
+ ~StackProtectorDescriptor() { }
+
+ /// Returns true if all fields of the stack protector descriptor are
+ /// initialized implying that we should/are ready to emit a stack protector.
+ bool shouldEmitStackProtector() const {
+ return ParentMBB && SuccessMBB && FailureMBB && Guard;
+ }
+
+ /// Initialize the stack protector descriptor structure for a new basic
+ /// block.
+ void initialize(const BasicBlock *BB,
+ MachineBasicBlock *MBB,
+ const CallInst &StackProtCheckCall) {
+ // Make sure we are not initialized yet.
+ assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
+ "already initialized!");
+ ParentMBB = MBB;
+ SuccessMBB = AddSuccessorMBB(BB, MBB);
+ FailureMBB = AddSuccessorMBB(BB, MBB, FailureMBB);
+ if (!Guard)
+ Guard = StackProtCheckCall.getArgOperand(0);
+ }
+
+ /// Reset state that changes when we handle different basic blocks.
+ ///
+ /// This currently includes:
+ ///
+ /// 1. The specific basic block we are generating a
+ /// stack protector for (ParentMBB).
+ ///
+ /// 2. The successor machine basic block that will contain the tail of
+ /// parent mbb after we create the stack protector check (SuccessMBB). This
+ /// BB is visited only on stack protector check success.
+ void resetPerBBState() {
+ ParentMBB = 0;
+ SuccessMBB = 0;
+ }
+
+ /// Reset state that only changes when we switch functions.
+ ///
+ /// This currently includes:
+ ///
+ /// 1. FailureMBB since we reuse the failure code path for all stack
+ /// protector checks created in an individual function.
+ ///
+ /// 2.The guard variable since the guard variable we are checking against is
+ /// always the same.
+ void resetPerFunctionState() {
+ FailureMBB = 0;
+ Guard = 0;
+ }
+
+ MachineBasicBlock *getParentMBB() { return ParentMBB; }
+ MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
+ MachineBasicBlock *getFailureMBB() { return FailureMBB; }
+ const Value *getGuard() { return Guard; }
+
+ private:
+ /// The basic block for which we are generating the stack protector.
+ ///
+ /// As a result of stack protector generation, we will splice the
+ /// terminators of this basic block into the successor mbb SuccessMBB and
+ /// replace it with a compare/branch to the successor mbbs
+ /// SuccessMBB/FailureMBB depending on whether or not the stack protector
+ /// was violated.
+ MachineBasicBlock *ParentMBB;
+
+ /// A basic block visited on stack protector check success that contains the
+ /// terminators of ParentMBB.
+ MachineBasicBlock *SuccessMBB;
+
+ /// This basic block visited on stack protector check failure that will
+ /// contain a call to __stack_chk_fail().
+ MachineBasicBlock *FailureMBB;
+
+ /// The guard variable which we will compare against the stored value in the
+ /// stack protector stack slot.
+ const Value *Guard;
+
+ /// Add a successor machine basic block to ParentMBB. If the successor mbb
+ /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
+ /// block will be created.
+ MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB,
+ MachineBasicBlock *ParentMBB,
+ MachineBasicBlock *SuccMBB = 0);
+ };
+
private:
const TargetMachine &TM;
public:
@@ -295,6 +502,9 @@ public:
/// BitTestCases - Vector of BitTestBlock structures used to communicate
/// SwitchInst code generation information.
std::vector<BitTestBlock> BitTestCases;
+ /// A StackProtectorDescriptor structure used to communicate stack protector
+ /// information in between SelectBasicBlock and FinishBasicBlock.
+ StackProtectorDescriptor SPDescriptor;
// Emit PHI-node-operand constants only once even if used by multiple
// PHI nodes.
@@ -305,9 +515,9 @@ public:
FunctionLoweringInfo &FuncInfo;
/// OptLevel - What optimization level we're generating code for.
- ///
+ ///
CodeGenOpt::Level OptLevel;
-
+
/// GFI - Garbage collection metadata for the function.
GCFunctionInfo *GFI;
@@ -389,7 +599,7 @@ public:
assert(N.getNode() == 0 && "Already set a value for this node!");
N = NewN;
}
-
+
void setUnusedArgValue(const Value *V, SDValue NewN) {
SDValue &N = UnusedArgNodeMap[V];
assert(N.getNode() == 0 && "Already set a value for this node!");
@@ -410,6 +620,12 @@ public:
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
MachineBasicBlock *LandingPad = NULL);
+ std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI,
+ unsigned ArgIdx,
+ unsigned NumArgs,
+ SDValue Callee,
+ bool useVoidTy = false);
+
/// UpdateSplitBlock - When an MBB was split during scheduling, update the
/// references that ned to refer to the last resulting block.
void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
@@ -451,6 +667,9 @@ private:
public:
void visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB);
+ void visitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB);
+ void visitSPDescriptorFailure(StackProtectorDescriptor &SPD);
void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
void visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
@@ -461,7 +680,7 @@ public:
void visitJumpTable(JumpTable &JT);
void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB);
-
+
private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
@@ -502,6 +721,7 @@ private:
void visitPtrToInt(const User &I);
void visitIntToPtr(const User &I);
void visitBitCast(const User &I);
+ void visitAddrSpaceCast(const User &I);
void visitExtractElement(const User &I);
void visitInsertElement(const User &I);
@@ -523,6 +743,11 @@ private:
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
bool visitMemCmpCall(const CallInst &I);
+ bool visitMemChrCall(const CallInst &I);
+ bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
+ bool visitStrCmpCall(const CallInst &I);
+ bool visitStrLenCall(const CallInst &I);
+ bool visitStrNLenCall(const CallInst &I);
bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
void visitAtomicLoad(const LoadInst &I);
void visitAtomicStore(const StoreInst &I);
@@ -535,6 +760,8 @@ private:
void visitVAArg(const VAArgInst &I);
void visitVAEnd(const CallInst &I);
void visitVACopy(const CallInst &I);
+ void visitStackmap(const CallInst &I);
+ void visitPatchpoint(const CallInst &I);
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
@@ -543,10 +770,13 @@ private:
llvm_unreachable("UserOp2 should not exist at instruction selection time!");
}
+ void processIntegerCallValue(const Instruction &I,
+ SDValue Value, bool IsSigned);
+
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
/// EmitFuncArgumentDbgValue - If V is an function argument then create
- /// corresponding DBG_VALUE machine instruction for it now. At the end of
+ /// corresponding DBG_VALUE machine instruction for it now. At the end of
/// instruction selection, they will be inserted to the entry BB.
bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
int64_t Offset, const SDValue &N);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index d8ee221..c04a08d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -142,6 +142,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FCEIL: return "fceil";
case ISD::FRINT: return "frint";
case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::FROUND: return "fround";
case ISD::FEXP: return "fexp";
case ISD::FEXP2: return "fexp2";
case ISD::FLOG: return "flog";
@@ -223,6 +224,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FP_TO_SINT: return "fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
case ISD::BITCAST: return "bitcast";
+ case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP32: return "fp16_to_fp32";
case ISD::FP32_TO_FP16: return "fp32_to_fp16";
@@ -484,6 +486,13 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << " " << offset;
if (unsigned int TF = BA->getTargetFlags())
OS << " [TF=" << TF << ']';
+ } else if (const AddrSpaceCastSDNode *ASC =
+ dyn_cast<AddrSpaceCastSDNode>(this)) {
+ OS << '['
+ << ASC->getSrcAddressSpace()
+ << " -> "
+ << ASC->getDestAddressSpace()
+ << ']';
}
if (unsigned Order = getIROrder())
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 01da51c..3a0cfa1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -223,6 +223,44 @@ defaultListDAGScheduler("default", "Best scheduler for the target",
namespace llvm {
//===--------------------------------------------------------------------===//
+ /// \brief This class is used by SelectionDAGISel to temporarily override
+ /// the optimization level on a per-function basis.
+ class OptLevelChanger {
+ SelectionDAGISel &IS;
+ CodeGenOpt::Level SavedOptLevel;
+ bool SavedFastISel;
+
+ public:
+ OptLevelChanger(SelectionDAGISel &ISel,
+ CodeGenOpt::Level NewOptLevel) : IS(ISel) {
+ SavedOptLevel = IS.OptLevel;
+ if (NewOptLevel == SavedOptLevel)
+ return;
+ IS.OptLevel = NewOptLevel;
+ IS.TM.setOptLevel(NewOptLevel);
+ SavedFastISel = IS.TM.Options.EnableFastISel;
+ if (NewOptLevel == CodeGenOpt::None)
+ IS.TM.setFastISel(true);
+ DEBUG(dbgs() << "\nChanging optimization level for Function "
+ << IS.MF->getFunction()->getName() << "\n");
+ DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel
+ << " ; After: -O" << NewOptLevel << "\n");
+ }
+
+ ~OptLevelChanger() {
+ if (IS.OptLevel == SavedOptLevel)
+ return;
+ DEBUG(dbgs() << "\nRestoring optimization level for Function "
+ << IS.MF->getFunction()->getName() << "\n");
+ DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel
+ << " ; After: -O" << SavedOptLevel << "\n");
+ IS.OptLevel = SavedOptLevel;
+ IS.TM.setOptLevel(SavedOptLevel);
+ IS.TM.setFastISel(SavedFastISel);
+ }
+ };
+
+ //===--------------------------------------------------------------------===//
/// createDefaultScheduler - This creates an instruction scheduler appropriate
/// for the target.
ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
@@ -230,7 +268,7 @@ namespace llvm {
const TargetLowering *TLI = IS->getTargetLowering();
const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>();
- if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() ||
+ if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() ||
TLI->getSchedulingPreference() == Sched::Source)
return createSourceListDAGScheduler(IS, OptLevel);
if (TLI->getSchedulingPreference() == Sched::RegPressure)
@@ -356,6 +394,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
const Function &Fn = *mf.getFunction();
const TargetInstrInfo &TII = *TM.getInstrInfo();
const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetLowering *TLI = TM.getTargetLowering();
MF = &mf;
RegInfo = &MF->getRegInfo();
@@ -369,11 +408,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
ST.resetSubtargetFeatures(MF);
TM.resetTargetOptions(MF);
+ // Reset OptLevel to None for optnone functions.
+ CodeGenOpt::Level NewOptLevel = OptLevel;
+ if (Fn.hasFnAttribute(Attribute::OptimizeNone))
+ NewOptLevel = CodeGenOpt::None;
+ OptLevelChanger OLC(*this, NewOptLevel);
+
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
- CurDAG->init(*MF, TTI);
+ CurDAG->init(*MF, TTI, TLI);
FuncInfo->set(Fn, *MF);
if (UseMBPI && OptLevel != CodeGenOpt::None)
@@ -408,9 +453,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
EntryMBB->insert(EntryMBB->begin(), MI);
else {
MachineInstr *Def = RegInfo->getVRegDef(Reg);
- MachineBasicBlock::iterator InsertPos = Def;
- // FIXME: VR def may not be in entry block.
- Def->getParent()->insert(llvm::next(InsertPos), MI);
+ if (Def) {
+ MachineBasicBlock::iterator InsertPos = Def;
+ // FIXME: VR def may not be in entry block.
+ Def->getParent()->insert(llvm::next(InsertPos), MI);
+ } else
+ DEBUG(dbgs() << "Dropping debug info for dead vreg"
+ << TargetRegisterInfo::virtReg2Index(Reg) << "\n");
}
// If Reg is live-in then update debug info to track its copy in a vreg.
@@ -422,7 +471,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MachineBasicBlock::iterator InsertPos = Def;
const MDNode *Variable =
MI->getOperand(MI->getNumOperands()-1).getMetadata();
- bool IsIndirect = MI->getOperand(1).isImm();
+ bool IsIndirect = MI->isIndirectDebugValue();
unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
// Def is never a terminator here, so it is ok to increment InsertPos.
BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
@@ -497,6 +546,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
if (J == E) break;
To = J->second;
}
+ // Make sure the new register has a sufficiently constrained register class.
+ if (TargetRegisterInfo::isVirtualRegister(From) &&
+ TargetRegisterInfo::isVirtualRegister(To))
+ MRI.constrainRegClass(To, MRI.getRegClass(From));
// Replace it.
MRI.replaceRegWith(From, To);
}
@@ -617,6 +670,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
+ CurDAG->NewNodesMustHaveLegalTypes = true;
+
if (Changed) {
if (ViewDAGCombineLT)
CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
@@ -1140,6 +1195,91 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
delete FastIS;
SDB->clearDanglingDebugInfo();
+ SDB->SPDescriptor.resetPerFunctionState();
+}
+
+/// Given that the input MI is before a partial terminator sequence TSeq, return
+/// true if M + TSeq also a partial terminator sequence.
+///
+/// A Terminator sequence is a sequence of MachineInstrs which at this point in
+/// lowering copy vregs into physical registers, which are then passed into
+/// terminator instructors so we can satisfy ABI constraints. A partial
+/// terminator sequence is an improper subset of a terminator sequence (i.e. it
+/// may be the whole terminator sequence).
+static bool MIIsInTerminatorSequence(const MachineInstr *MI) {
+ // If we do not have a copy or an implicit def, we return true if and only if
+ // MI is a debug value.
+ if (!MI->isCopy() && !MI->isImplicitDef())
+ // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
+ // physical registers if there is debug info associated with the terminator
+ // of our mbb. We want to include said debug info in our terminator
+ // sequence, so we return true in that case.
+ return MI->isDebugValue();
+
+ // We have left the terminator sequence if we are not doing one of the
+ // following:
+ //
+ // 1. Copying a vreg into a physical register.
+ // 2. Copying a vreg into a vreg.
+ // 3. Defining a register via an implicit def.
+
+ // OPI should always be a register definition...
+ MachineInstr::const_mop_iterator OPI = MI->operands_begin();
+ if (!OPI->isReg() || !OPI->isDef())
+ return false;
+
+ // Defining any register via an implicit def is always ok.
+ if (MI->isImplicitDef())
+ return true;
+
+ // Grab the copy source...
+ MachineInstr::const_mop_iterator OPI2 = OPI;
+ ++OPI2;
+ assert(OPI2 != MI->operands_end()
+ && "Should have a copy implying we should have 2 arguments.");
+
+ // Make sure that the copy dest is not a vreg when the copy source is a
+ // physical register.
+ if (!OPI2->isReg() ||
+ (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) &&
+ TargetRegisterInfo::isPhysicalRegister(OPI2->getReg())))
+ return false;
+
+ return true;
+}
+
+/// Find the split point at which to splice the end of BB into its success stack
+/// protector check machine basic block.
+///
+/// On many platforms, due to ABI constraints, terminators, even before register
+/// allocation, use physical registers. This creates an issue for us since
+/// physical registers at this point can not travel across basic
+/// blocks. Luckily, selectiondag always moves physical registers into vregs
+/// when they enter functions and moves them through a sequence of copies back
+/// into the physical registers right before the terminator creating a
+/// ``Terminator Sequence''. This function is searching for the beginning of the
+/// terminator sequence so that we can ensure that we splice off not just the
+/// terminator, but additionally the copies that move the vregs into the
+/// physical registers.
+static MachineBasicBlock::iterator
+FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) {
+ MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
+ //
+ if (SplitPoint == BB->begin())
+ return SplitPoint;
+
+ MachineBasicBlock::iterator Start = BB->begin();
+ MachineBasicBlock::iterator Previous = SplitPoint;
+ --Previous;
+
+ while (MIIsInTerminatorSequence(Previous)) {
+ SplitPoint = Previous;
+ if (Previous == Start)
+ break;
+ --Previous;
+ }
+
+ return SplitPoint;
}
void
@@ -1152,11 +1292,13 @@ SelectionDAGISel::FinishBasicBlock() {
<< FuncInfo->PHINodesToUpdate[i].first
<< ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
+ const bool MustUpdatePHINodes = SDB->SwitchCases.empty() &&
+ SDB->JTCases.empty() &&
+ SDB->BitTestCases.empty();
+
// Next, now that we know what the last MBB the LLVM BB expanded is, update
// PHI nodes in successors.
- if (SDB->SwitchCases.empty() &&
- SDB->JTCases.empty() &&
- SDB->BitTestCases.empty()) {
+ if (MustUpdatePHINodes) {
for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
assert(PHI->isPHI() &&
@@ -1165,9 +1307,54 @@ SelectionDAGISel::FinishBasicBlock() {
continue;
PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
}
- return;
}
+ // Handle stack protector.
+ if (SDB->SPDescriptor.shouldEmitStackProtector()) {
+ MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
+ MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB();
+
+ // Find the split point to split the parent mbb. At the same time copy all
+ // physical registers used in the tail of parent mbb into virtual registers
+ // before the split point and back into physical registers after the split
+ // point. This prevents us needing to deal with Live-ins and many other
+ // register allocation issues caused by us splitting the parent mbb. The
+ // register allocator will clean up said virtual copies later on.
+ MachineBasicBlock::iterator SplitPoint =
+ FindSplitPointForStackProtector(ParentMBB, SDB->getCurDebugLoc());
+
+ // Splice the terminator of ParentMBB into SuccessMBB.
+ SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
+ SplitPoint,
+ ParentMBB->end());
+
+ // Add compare/jump on neq/jump to the parent BB.
+ FuncInfo->MBB = ParentMBB;
+ FuncInfo->InsertPt = ParentMBB->end();
+ SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // CodeGen Failure MBB if we have not codegened it yet.
+ MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB();
+ if (!FailureMBB->size()) {
+ FuncInfo->MBB = FailureMBB;
+ FuncInfo->InsertPt = FailureMBB->end();
+ SDB->visitSPDescriptorFailure(SDB->SPDescriptor);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Clear the Per-BB State.
+ SDB->SPDescriptor.resetPerBBState();
+ }
+
+ // If we updated PHI Nodes, return early.
+ if (MustUpdatePHINodes)
+ return;
+
for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
// Lower header first, if it wasn't already lowered
if (!SDB->BitTestCases[i].Emitted) {
@@ -1741,15 +1928,15 @@ WalkChainUsers(const SDNode *ChainedNode,
SDNode *User = *UI;
+ if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
+ continue;
+
// If we see an already-selected machine node, then we've gone beyond the
// pattern that we're selecting down into the already selected chunk of the
// DAG.
- if (User->isMachineOpcode() ||
- User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
- continue;
-
unsigned UserOpcode = User->getOpcode();
- if (UserOpcode == ISD::CopyToReg ||
+ if (User->isMachineOpcode() ||
+ UserOpcode == ISD::CopyToReg ||
UserOpcode == ISD::CopyFromReg ||
UserOpcode == ISD::INLINEASM ||
UserOpcode == ISD::EH_LABEL ||
@@ -1886,7 +2073,6 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
}
}
- SDValue Res;
if (InputChains.size() == 1)
return InputChains[0];
return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]),
@@ -1962,6 +2148,18 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return N == RecordedNodes[RecNo].first;
}
+/// CheckChildSame - Implements OP_CheckChildXSame.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo),
+ RecordedNodes);
+}
+
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
@@ -2076,6 +2274,13 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
case SelectionDAGISel::OPC_CheckSame:
Result = !::CheckSame(Table, Index, N, RecordedNodes);
return Index;
+ case SelectionDAGISel::OPC_CheckChild0Same:
+ case SelectionDAGISel::OPC_CheckChild1Same:
+ case SelectionDAGISel::OPC_CheckChild2Same:
+ case SelectionDAGISel::OPC_CheckChild3Same:
+ Result = !::CheckChildSame(Table, Index, N, RecordedNodes,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same);
+ return Index;
case SelectionDAGISel::OPC_CheckPatternPredicate:
Result = !::CheckPatternPredicate(Table, Index, SDISel);
return Index;
@@ -2373,6 +2578,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_CheckSame:
if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
continue;
+
+ case OPC_CheckChild0Same: case OPC_CheckChild1Same:
+ case OPC_CheckChild2Same: case OPC_CheckChild3Same:
+ if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes,
+ Opcode-OPC_CheckChild0Same))
+ break;
+ continue;
+
case OPC_CheckPatternPredicate:
if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
continue;
@@ -2432,7 +2645,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
}
case OPC_SwitchType: {
- MVT CurNodeVT = N.getValueType().getSimpleVT();
+ MVT CurNodeVT = N.getSimpleValueType();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
unsigned CaseSize;
while (1) {
@@ -2544,7 +2757,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_EmitConvertToTarget: {
// Convert from IMM/FPIMM to target version.
unsigned RecNo = MatcherTable[MatcherIndex++];
- assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget");
SDValue Imm = RecordedNodes[RecNo].first;
if (Imm->getOpcode() == ISD::Constant) {
@@ -2569,7 +2782,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// Read all of the chained nodes.
unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
- assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
// FIXME: What if other value results of the node have uses not matched
@@ -2606,7 +2819,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// Read all of the chained nodes.
for (unsigned i = 0; i != NumChains; ++i) {
unsigned RecNo = MatcherTable[MatcherIndex++];
- assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
// FIXME: What if other value results of the node have uses not matched
@@ -2633,7 +2846,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_EmitCopyToReg: {
unsigned RecNo = MatcherTable[MatcherIndex++];
- assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg");
unsigned DestPhysReg = MatcherTable[MatcherIndex++];
if (InputChain.getNode() == 0)
@@ -2650,7 +2863,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_EmitNodeXForm: {
unsigned XFormNo = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
- assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm");
SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
continue;
@@ -2827,7 +3040,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (RecNo & 128)
RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
- assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ assert(RecNo < RecordedNodes.size() && "Invalid MarkGlueResults");
GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
}
continue;
@@ -2844,7 +3057,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (ResSlot & 128)
ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
- assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
+ assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch");
SDValue Res = RecordedNodes[ResSlot].first;
assert(i < NodeToMatch->getNumValues() &&
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e3c6306..82b068d 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -64,13 +64,29 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
return isUsedByReturnOnly(Node, Chain);
}
+/// \brief Set CallLoweringInfo attribute flags based on a call instruction
+/// and called function attributes.
+void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
+ unsigned AttrIdx) {
+ isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
+ isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
+ isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
+ isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
+ isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
+ isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
+ isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
+ Alignment = CS->getParamAlignment(AttrIdx);
+}
/// Generate a libcall taking the given operands as arguments and returning a
/// result of type RetVT.
-SDValue TargetLowering::makeLibCall(SelectionDAG &DAG,
- RTLIB::Libcall LC, EVT RetVT,
- const SDValue *Ops, unsigned NumOps,
- bool isSigned, SDLoc dl) const {
+std::pair<SDValue, SDValue>
+TargetLowering::makeLibCall(SelectionDAG &DAG,
+ RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, SDLoc dl,
+ bool doesNotReturn,
+ bool isReturnValueUsed) const {
TargetLowering::ArgListTy Args;
Args.reserve(NumOps);
@@ -89,11 +105,9 @@ SDValue TargetLowering::makeLibCall(SelectionDAG &DAG,
CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
false, 0, getLibcallCallingConv(LC),
/*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl);
- std::pair<SDValue,SDValue> CallInfo = LowerCallTo(CLI);
-
- return CallInfo.first;
+ doesNotReturn, isReturnValueUsed, Callee, Args,
+ DAG, dl);
+ return LowerCallTo(CLI);
}
@@ -183,14 +197,16 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Use the target specific return value for comparions lib calls.
EVT RetVT = getCmpLibcallReturnType();
SDValue Ops[2] = { NewLHS, NewRHS };
- NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/,
+ dl).first;
NewRHS = DAG.getConstant(0, RetVT);
CCCode = getCmpLibcallCC(LC1);
if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
SDValue Tmp = DAG.getNode(ISD::SETCC, dl,
getSetCCResultType(*DAG.getContext(), RetVT),
NewLHS, NewRHS, DAG.getCondCode(CCCode));
- NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/,
+ dl).first;
NewLHS = DAG.getNode(ISD::SETCC, dl,
getSetCCResultType(*DAG.getContext(), RetVT), NewLHS,
NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
@@ -632,6 +648,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
NarrowShl));
}
+ // Repeat the SHL optimization above in cases where an extension
+ // intervenes: (shl (anyext (shr x, c1)), c2) to
+ // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
+ // aren't demanded (as above) and that the shifted upper c1 bits of
+ // x aren't demanded.
+ if (InOp.hasOneUse() &&
+ InnerOp.getOpcode() == ISD::SRL &&
+ InnerOp.hasOneUse() &&
+ isa<ConstantSDNode>(InnerOp.getOperand(1))) {
+ uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
+ ->getZExtValue();
+ if (InnerShAmt < ShAmt &&
+ InnerShAmt < InnerBits &&
+ NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 &&
+ NewMask.trunc(ShAmt) == 0) {
+ SDValue NewSA =
+ TLO.DAG.getConstant(ShAmt - InnerShAmt,
+ Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
+ InnerOp.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
+ NewExt, NewSA));
+ }
+ }
}
KnownZero <<= SA->getZExtValue();
@@ -722,13 +763,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
- if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+ if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits)
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
Op.getOperand(0),
Op.getOperand(1)));
- } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
- KnownOne |= HighBits;
+
+ int Log2 = NewMask.exactLogBase2();
+ if (Log2 >= 0) {
+ // The bit must come from the sign.
+ SDValue NewSA =
+ TLO.DAG.getConstant(BitWidth - 1 - Log2,
+ Op.getOperand(1).getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ Op.getOperand(0), NewSA));
}
+
+ if (KnownOne.intersects(SignBit))
+ // New bits are known one.
+ KnownOne |= HighBits;
}
break;
case ISD::SIGN_EXTEND_INREG: {
@@ -1077,13 +1129,20 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
case ISD::SETFALSE:
case ISD::SETFALSE2: return DAG.getConstant(0, VT);
case ISD::SETTRUE:
- case ISD::SETTRUE2: return DAG.getConstant(1, VT);
+ case ISD::SETTRUE2: {
+ TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector());
+ return DAG.getConstant(
+ Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
+ }
}
// Ensure that the constant occurs on the RHS, and fold constant
// comparisons.
- if (isa<ConstantSDNode>(N0.getNode()))
- return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+ ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
+ if (isa<ConstantSDNode>(N0.getNode()) &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
+ return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
@@ -1178,6 +1237,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// the test is for equality or unsigned, and all 1 bits of the const are
// in the same partial word, see if we can shorten the load.
if (DCI.isBeforeLegalize() &&
+ !ISD::isSignedIntSetCC(Cond) &&
N0.getOpcode() == ISD::AND && C1 == 0 &&
N0.getNode()->hasOneUse() &&
isa<LoadSDNode>(N0.getOperand(0)) &&
@@ -1322,7 +1382,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
CC = ISD::getSetCCInverse(CC,
N0.getOperand(0).getValueType().isInteger());
- return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
}
if ((N0.getOpcode() == ISD::XOR ||
@@ -1759,16 +1821,22 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
if (ValueHasExactlyOneBitSet(N1, DAG)) {
Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
- SDValue Zero = DAG.getConstant(0, N1.getValueType());
- return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(Cond, N0.getSimpleValueType())) {
+ SDValue Zero = DAG.getConstant(0, N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+ }
}
}
if (N1.getOpcode() == ISD::AND)
if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
if (ValueHasExactlyOneBitSet(N0, DAG)) {
Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
- SDValue Zero = DAG.getConstant(0, N0.getValueType());
- return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(Cond, N1.getSimpleValueType())) {
+ SDValue Zero = DAG.getConstant(0, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+ }
}
}
}
@@ -1993,7 +2061,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const {
- if (Constraint[0] != '{')
+ if (Constraint.empty() || Constraint[0] != '{')
return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
@@ -2142,8 +2210,9 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
break;
}
} else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
- OpInfo.ConstraintVT = MVT::getIntegerVT(
- 8*getDataLayout()->getPointerSize(PT->getAddressSpace()));
+ unsigned PtrSize
+ = getDataLayout()->getPointerSizeInBits(PT->getAddressSpace());
+ OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
} else {
OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
}
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
deleted file mode 100644
index 6c826de..0000000
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ /dev/null
@@ -1,1152 +0,0 @@
-//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a shrink wrapping variant of prolog/epilog insertion:
-// - Spills and restores of callee-saved registers (CSRs) are placed in the
-// machine CFG to tightly surround their uses so that execution paths that
-// do not use CSRs do not pay the spill/restore penalty.
-//
-// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
-// loop the spills are placed in the loop preheader, and restores are
-// placed in the loop exit nodes (the successors of loop _exiting_ nodes).
-//
-// - Covering paths without CSR uses:
-// If a region in a CFG uses CSRs and has multiple entry and/or exit points,
-// the use info for the CSRs inside the region is propagated outward in the
-// CFG to ensure validity of the spill/restore placements. This decreases
-// the effectiveness of shrink wrapping but does not require edge splitting
-// in the machine CFG.
-//
-// This shrink wrapping implementation uses an iterative analysis to determine
-// which basic blocks require spills and restores for CSRs.
-//
-// This pass uses MachineDominators and MachineLoopInfo. Loop information
-// is used to prevent placement of callee-saved register spills/restores
-// in the bodies of loops.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "shrink-wrap"
-
-#include "PrologEpilogInserter.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include <sstream>
-
-using namespace llvm;
-
-STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
-
-// Shrink Wrapping:
-static cl::opt<bool>
-ShrinkWrapping("shrink-wrap",
- cl::desc("Shrink wrap callee-saved register spills/restores"));
-
-// Shrink wrap only the specified function, a debugging aid.
-static cl::opt<std::string>
-ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
- cl::desc("Shrink wrap the specified function"),
- cl::value_desc("funcname"),
- cl::init(""));
-
-// Debugging level for shrink wrapping.
-enum ShrinkWrapDebugLevel {
- Disabled, BasicInfo, Iterations, Details
-};
-
-static cl::opt<enum ShrinkWrapDebugLevel>
-ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
- cl::desc("Print shrink wrapping debugging information"),
- cl::values(
- clEnumVal(Disabled , "disable debug output"),
- clEnumVal(BasicInfo , "print basic DF sets"),
- clEnumVal(Iterations, "print SR sets for each iteration"),
- clEnumVal(Details , "print all DF sets"),
- clEnumValEnd));
-
-
-void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- if (ShrinkWrapping || ShrinkWrapFunc != "") {
- AU.addRequired<MachineLoopInfo>();
- AU.addRequired<MachineDominatorTree>();
- }
- AU.addPreserved<MachineLoopInfo>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<TargetPassConfig>();
- MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-//===----------------------------------------------------------------------===//
-// ShrinkWrapping implementation
-//===----------------------------------------------------------------------===//
-
-// Convienences for dealing with machine loops.
-MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) {
- assert(LP && "Machine loop is NULL.");
- MachineBasicBlock* PHDR = LP->getLoopPreheader();
- MachineLoop* PLP = LP->getParentLoop();
- while (PLP) {
- PHDR = PLP->getLoopPreheader();
- PLP = PLP->getParentLoop();
- }
- return PHDR;
-}
-
-MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
- if (LP == 0)
- return 0;
- MachineLoop* PLP = LP->getParentLoop();
- while (PLP) {
- LP = PLP;
- PLP = PLP->getParentLoop();
- }
- return LP;
-}
-
-bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
- return (MBB && !MBB->empty() && MBB->back().isReturn());
-}
-
-// Initialize shrink wrapping DFA sets, called before iterations.
-void PEI::clearAnticAvailSets() {
- AnticIn.clear();
- AnticOut.clear();
- AvailIn.clear();
- AvailOut.clear();
-}
-
-// Clear all sets constructed by shrink wrapping.
-void PEI::clearAllSets() {
- ReturnBlocks.clear();
- clearAnticAvailSets();
- UsedCSRegs.clear();
- CSRUsed.clear();
- TLLoops.clear();
- CSRSave.clear();
- CSRRestore.clear();
-}
-
-// Initialize all shrink wrapping data.
-void PEI::initShrinkWrappingInfo() {
- clearAllSets();
- EntryBlock = 0;
-#ifndef NDEBUG
- HasFastExitPath = false;
-#endif
- ShrinkWrapThisFunction = ShrinkWrapping;
- // DEBUG: enable or disable shrink wrapping for the current function
- // via --shrink-wrap-func=<funcname>.
-#ifndef NDEBUG
- if (ShrinkWrapFunc != "") {
- std::string MFName = MF->getName().str();
- ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
- }
-#endif
-}
-
-
-/// placeCSRSpillsAndRestores - determine which MBBs of the function
-/// need save, restore code for callee-saved registers by doing a DF analysis
-/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
-/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
-/// is used to ensure that CSR save/restore code is not placed inside loops.
-/// This function computes the maps of MBBs -> CSRs to spill and restore
-/// in CSRSave, CSRRestore.
-///
-/// If shrink wrapping is not being performed, place all spills in
-/// the entry block, all restores in return blocks. In this case,
-/// CSRSave has a single mapping, CSRRestore has mappings for each
-/// return block.
-///
-void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
-
- DEBUG(MF = &Fn);
-
- initShrinkWrappingInfo();
-
- DEBUG(if (ShrinkWrapThisFunction) {
- dbgs() << "Place CSR spills/restores for "
- << MF->getName() << "\n";
- });
-
- if (calculateSets(Fn))
- placeSpillsAndRestores(Fn);
-}
-
-/// calcAnticInOut - calculate the anticipated in/out reg sets
-/// for the given MBB by looking forward in the MCFG at MBB's
-/// successors.
-///
-bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
- bool changed = false;
-
- // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
- SmallVector<MachineBasicBlock*, 4> successors;
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock* SUCC = *SI;
- if (SUCC != MBB)
- successors.push_back(SUCC);
- }
-
- unsigned i = 0, e = successors.size();
- if (i != e) {
- CSRegSet prevAnticOut = AnticOut[MBB];
- MachineBasicBlock* SUCC = successors[i];
-
- AnticOut[MBB] = AnticIn[SUCC];
- for (++i; i != e; ++i) {
- SUCC = successors[i];
- AnticOut[MBB] &= AnticIn[SUCC];
- }
- if (prevAnticOut != AnticOut[MBB])
- changed = true;
- }
-
- // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
- CSRegSet prevAnticIn = AnticIn[MBB];
- AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
- if (prevAnticIn != AnticIn[MBB])
- changed = true;
- return changed;
-}
-
-/// calcAvailInOut - calculate the available in/out reg sets
-/// for the given MBB by looking backward in the MCFG at MBB's
-/// predecessors.
-///
-bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
- bool changed = false;
-
- // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
- SmallVector<MachineBasicBlock*, 4> predecessors;
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock* PRED = *PI;
- if (PRED != MBB)
- predecessors.push_back(PRED);
- }
-
- unsigned i = 0, e = predecessors.size();
- if (i != e) {
- CSRegSet prevAvailIn = AvailIn[MBB];
- MachineBasicBlock* PRED = predecessors[i];
-
- AvailIn[MBB] = AvailOut[PRED];
- for (++i; i != e; ++i) {
- PRED = predecessors[i];
- AvailIn[MBB] &= AvailOut[PRED];
- }
- if (prevAvailIn != AvailIn[MBB])
- changed = true;
- }
-
- // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
- CSRegSet prevAvailOut = AvailOut[MBB];
- AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
- if (prevAvailOut != AvailOut[MBB])
- changed = true;
- return changed;
-}
-
-/// calculateAnticAvail - build the sets anticipated and available
-/// registers in the MCFG of the current function iteratively,
-/// doing a combined forward and backward analysis.
-///
-void PEI::calculateAnticAvail(MachineFunction &Fn) {
- // Initialize data flow sets.
- clearAnticAvailSets();
-
- // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
- bool changed = true;
- unsigned iterations = 0;
- while (changed) {
- changed = false;
- ++iterations;
- for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
- MBBI != MBBE; ++MBBI) {
- MachineBasicBlock* MBB = MBBI;
-
- // Calculate anticipated in, out regs at MBB from
- // anticipated at successors of MBB.
- changed |= calcAnticInOut(MBB);
-
- // Calculate available in, out regs at MBB from
- // available at predecessors of MBB.
- changed |= calcAvailInOut(MBB);
- }
- }
-
- DEBUG({
- if (ShrinkWrapDebugging >= Details) {
- dbgs()
- << "-----------------------------------------------------------\n"
- << " Antic/Avail Sets:\n"
- << "-----------------------------------------------------------\n"
- << "iterations = " << iterations << "\n"
- << "-----------------------------------------------------------\n"
- << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"
- << "-----------------------------------------------------------\n";
-
- for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
- MBBI != MBBE; ++MBBI) {
- MachineBasicBlock* MBB = MBBI;
- dumpSets(MBB);
- }
-
- dbgs()
- << "-----------------------------------------------------------\n";
- }
- });
-}
-
-/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
-/// of the loop given by LP and its parent loops. This prevents spills/restores
-/// from being placed in the bodies of loops.
-///
-void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
- if (! MBB || !LP)
- return;
-
- std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
- for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
- MachineBasicBlock* LBB = loopBlocks[i];
- if (LBB == MBB)
- continue;
- if (CSRUsed[LBB].contains(CSRUsed[MBB]))
- continue;
- CSRUsed[LBB] |= CSRUsed[MBB];
- }
-}
-
-/// calculateSets - collect the CSRs used in this function, compute
-/// the DF sets that describe the initial minimal regions in the
-/// Machine CFG around which CSR spills and restores must be placed.
-///
-/// Additionally, this function decides if shrink wrapping should
-/// be disabled for the current function, checking the following:
-/// 1. the current function has more than 500 MBBs: heuristic limit
-/// on function size to reduce compile time impact of the current
-/// iterative algorithm.
-/// 2. all CSRs are used in the entry block.
-/// 3. all CSRs are used in all immediate successors of the entry block.
-/// 4. all CSRs are used in a subset of blocks, each of which dominates
-/// all return blocks. These blocks, taken as a subgraph of the MCFG,
-/// are equivalent to the entry block since all execution paths pass
-/// through them.
-///
-bool PEI::calculateSets(MachineFunction &Fn) {
- // Sets used to compute spill, restore placement sets.
- const std::vector<CalleeSavedInfo> CSI =
- Fn.getFrameInfo()->getCalleeSavedInfo();
-
- // If no CSRs used, we are done.
- if (CSI.empty()) {
- DEBUG(if (ShrinkWrapThisFunction)
- dbgs() << "DISABLED: " << Fn.getName()
- << ": uses no callee-saved registers\n");
- return false;
- }
-
- // Save refs to entry and return blocks.
- EntryBlock = Fn.begin();
- for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
- MBB != E; ++MBB)
- if (isReturnBlock(MBB))
- ReturnBlocks.push_back(MBB);
-
- // Determine if this function has fast exit paths.
- DEBUG(if (ShrinkWrapThisFunction)
- findFastExitPath());
-
- // Limit shrink wrapping via the current iterative bit vector
- // implementation to functions with <= 500 MBBs.
- if (Fn.size() > 500) {
- DEBUG(if (ShrinkWrapThisFunction)
- dbgs() << "DISABLED: " << Fn.getName()
- << ": too large (" << Fn.size() << " MBBs)\n");
- ShrinkWrapThisFunction = false;
- }
-
- // Return now if not shrink wrapping.
- if (! ShrinkWrapThisFunction)
- return false;
-
- // Collect set of used CSRs.
- for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
- UsedCSRegs.set(inx);
- }
-
- // Walk instructions in all MBBs, create CSRUsed[] sets, choose
- // whether or not to shrink wrap this function.
- MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
- MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
- const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
-
- bool allCSRUsesInEntryBlock = true;
- for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
- MBBI != MBBE; ++MBBI) {
- MachineBasicBlock* MBB = MBBI;
- for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
- for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
- unsigned Reg = CSI[inx].getReg();
- // If instruction I reads or modifies Reg, add it to UsedCSRegs,
- // CSRUsed map for the current block.
- for (unsigned opInx = 0, opEnd = I->getNumOperands();
- opInx != opEnd; ++opInx) {
- const MachineOperand &MO = I->getOperand(opInx);
- if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
- continue;
- unsigned MOReg = MO.getReg();
- if (!MOReg)
- continue;
- if (MOReg == Reg ||
- (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
- TargetRegisterInfo::isPhysicalRegister(Reg) &&
- TRI->isSubRegister(Reg, MOReg))) {
- // CSR Reg is defined/used in block MBB.
- CSRUsed[MBB].set(inx);
- // Check for uses in EntryBlock.
- if (MBB != EntryBlock)
- allCSRUsesInEntryBlock = false;
- }
- }
- }
- }
-
- if (CSRUsed[MBB].empty())
- continue;
-
- // Propagate CSRUsed[MBB] in loops
- if (MachineLoop* LP = LI.getLoopFor(MBB)) {
- // Add top level loop to work list.
- MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
- MachineLoop* PLP = getTopLevelLoopParent(LP);
-
- if (! HDR) {
- HDR = PLP->getHeader();
- assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
- MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
- HDR = *PI;
- }
- TLLoops[HDR] = PLP;
-
- // Push uses from inside loop to its parent loops,
- // or to all other MBBs in its loop.
- if (LP->getLoopDepth() > 1) {
- for (MachineLoop* PLP = LP->getParentLoop(); PLP;
- PLP = PLP->getParentLoop()) {
- propagateUsesAroundLoop(MBB, PLP);
- }
- } else {
- propagateUsesAroundLoop(MBB, LP);
- }
- }
- }
-
- if (allCSRUsesInEntryBlock) {
- DEBUG(dbgs() << "DISABLED: " << Fn.getName()
- << ": all CSRs used in EntryBlock\n");
- ShrinkWrapThisFunction = false;
- } else {
- bool allCSRsUsedInEntryFanout = true;
- for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
- SE = EntryBlock->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock* SUCC = *SI;
- if (CSRUsed[SUCC] != UsedCSRegs)
- allCSRsUsedInEntryFanout = false;
- }
- if (allCSRsUsedInEntryFanout) {
- DEBUG(dbgs() << "DISABLED: " << Fn.getName()
- << ": all CSRs used in imm successors of EntryBlock\n");
- ShrinkWrapThisFunction = false;
- }
- }
-
- if (ShrinkWrapThisFunction) {
- // Check if MBB uses CSRs and dominates all exit nodes.
- // Such nodes are equiv. to the entry node w.r.t.
- // CSR uses: every path through the function must
- // pass through this node. If each CSR is used at least
- // once by these nodes, shrink wrapping is disabled.
- CSRegSet CSRUsedInChokePoints;
- for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
- MBBI != MBBE; ++MBBI) {
- MachineBasicBlock* MBB = MBBI;
- if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
- continue;
- bool dominatesExitNodes = true;
- for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
- if (! DT.dominates(MBB, ReturnBlocks[ri])) {
- dominatesExitNodes = false;
- break;
- }
- if (dominatesExitNodes) {
- CSRUsedInChokePoints |= CSRUsed[MBB];
- if (CSRUsedInChokePoints == UsedCSRegs) {
- DEBUG(dbgs() << "DISABLED: " << Fn.getName()
- << ": all CSRs used in choke point(s) at "
- << getBasicBlockName(MBB) << "\n");
- ShrinkWrapThisFunction = false;
- break;
- }
- }
- }
- }
-
- // Return now if we have decided not to apply shrink wrapping
- // to the current function.
- if (! ShrinkWrapThisFunction)
- return false;
-
- DEBUG({
- dbgs() << "ENABLED: " << Fn.getName();
- if (HasFastExitPath)
- dbgs() << " (fast exit path)";
- dbgs() << "\n";
- if (ShrinkWrapDebugging >= BasicInfo) {
- dbgs() << "------------------------------"
- << "-----------------------------\n";
- dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
- if (ShrinkWrapDebugging >= Details) {
- dbgs() << "------------------------------"
- << "-----------------------------\n";
- dumpAllUsed();
- }
- }
- });
-
- // Build initial DF sets to determine minimal regions in the
- // Machine CFG around which CSRs must be spilled and restored.
- calculateAnticAvail(Fn);
-
- return true;
-}
-
-/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
-/// multi-entry, multi-exit (MEME) regions so spill and restore
-/// placement will not break code that enters or leaves a
-/// shrink-wrapped region by inducing spills with no matching
-/// restores or restores with no matching spills. A MEME region
-/// is a subgraph of the MCFG with multiple entry edges, multiple
-/// exit edges, or both. This code propagates use information
-/// through the MCFG until all paths requiring spills and restores
-/// _outside_ the computed minimal placement regions have been covered.
-///
-bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
- SmallVectorImpl<MachineBasicBlock *> &blks) {
- if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
- bool processThisBlock = false;
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock* SUCC = *SI;
- if (SUCC->pred_size() > 1) {
- processThisBlock = true;
- break;
- }
- }
- if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock* PRED = *PI;
- if (PRED->succ_size() > 1) {
- processThisBlock = true;
- break;
- }
- }
- }
- if (! processThisBlock)
- return false;
- }
-
- CSRegSet prop;
- if (!CSRSave[MBB].empty())
- prop = CSRSave[MBB];
- else if (!CSRRestore[MBB].empty())
- prop = CSRRestore[MBB];
- else
- prop = CSRUsed[MBB];
- if (prop.empty())
- return false;
-
- // Propagate selected bits to successors, predecessors of MBB.
- bool addedUses = false;
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock* SUCC = *SI;
- // Self-loop
- if (SUCC == MBB)
- continue;
- if (! CSRUsed[SUCC].contains(prop)) {
- CSRUsed[SUCC] |= prop;
- addedUses = true;
- blks.push_back(SUCC);
- DEBUG(if (ShrinkWrapDebugging >= Iterations)
- dbgs() << getBasicBlockName(MBB)
- << "(" << stringifyCSRegSet(prop) << ")->"
- << "successor " << getBasicBlockName(SUCC) << "\n");
- }
- }
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock* PRED = *PI;
- // Self-loop
- if (PRED == MBB)
- continue;
- if (! CSRUsed[PRED].contains(prop)) {
- CSRUsed[PRED] |= prop;
- addedUses = true;
- blks.push_back(PRED);
- DEBUG(if (ShrinkWrapDebugging >= Iterations)
- dbgs() << getBasicBlockName(MBB)
- << "(" << stringifyCSRegSet(prop) << ")->"
- << "predecessor " << getBasicBlockName(PRED) << "\n");
- }
- }
- return addedUses;
-}
-
-/// addUsesForTopLevelLoops - add uses for CSRs used inside top
-/// level loops to the exit blocks of those loops.
-///
-bool PEI::addUsesForTopLevelLoops(SmallVectorImpl<MachineBasicBlock *> &blks) {
- bool addedUses = false;
-
- // Place restores for top level loops where needed.
- for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
- I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
- MachineBasicBlock* MBB = I->first;
- MachineLoop* LP = I->second;
- MachineBasicBlock* HDR = LP->getHeader();
- SmallVector<MachineBasicBlock*, 4> exitBlocks;
- CSRegSet loopSpills;
-
- loopSpills = CSRSave[MBB];
- if (CSRSave[MBB].empty()) {
- loopSpills = CSRUsed[HDR];
- assert(!loopSpills.empty() && "No CSRs used in loop?");
- } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
- continue;
-
- LP->getExitBlocks(exitBlocks);
- assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
- for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
- MachineBasicBlock* EXB = exitBlocks[i];
- if (! CSRUsed[EXB].contains(loopSpills)) {
- CSRUsed[EXB] |= loopSpills;
- addedUses = true;
- DEBUG(if (ShrinkWrapDebugging >= Iterations)
- dbgs() << "LOOP " << getBasicBlockName(MBB)
- << "(" << stringifyCSRegSet(loopSpills) << ")->"
- << getBasicBlockName(EXB) << "\n");
- if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
- blks.push_back(EXB);
- }
- }
- }
- return addedUses;
-}
-
-/// calcSpillPlacements - determine which CSRs should be spilled
-/// in MBB using AnticIn sets of MBB's predecessors, keeping track
-/// of changes to spilled reg sets. Add MBB to the set of blocks
-/// that need to be processed for propagating use info to cover
-/// multi-entry/exit regions.
-///
-bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
- SmallVectorImpl<MachineBasicBlock *> &blks,
- CSRegBlockMap &prevSpills) {
- bool placedSpills = false;
- // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
- CSRegSet anticInPreds;
- SmallVector<MachineBasicBlock*, 4> predecessors;
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock* PRED = *PI;
- if (PRED != MBB)
- predecessors.push_back(PRED);
- }
- unsigned i = 0, e = predecessors.size();
- if (i != e) {
- MachineBasicBlock* PRED = predecessors[i];
- anticInPreds = UsedCSRegs - AnticIn[PRED];
- for (++i; i != e; ++i) {
- PRED = predecessors[i];
- anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
- }
- } else {
- // Handle uses in entry blocks (which have no predecessors).
- // This is necessary because the DFA formulation assumes the
- // entry and (multiple) exit nodes cannot have CSR uses, which
- // is not the case in the real world.
- anticInPreds = UsedCSRegs;
- }
- // Compute spills required at MBB:
- CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
-
- if (! CSRSave[MBB].empty()) {
- if (MBB == EntryBlock) {
- for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
- CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
- } else {
- // Reset all regs spilled in MBB that are also spilled in EntryBlock.
- if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
- CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
- }
- }
- }
- placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
- prevSpills[MBB] = CSRSave[MBB];
- // Remember this block for adding restores to successor
- // blocks for multi-entry region.
- if (placedSpills)
- blks.push_back(MBB);
-
- DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
- dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
- << stringifyCSRegSet(CSRSave[MBB]) << "\n");
-
- return placedSpills;
-}
-
-/// calcRestorePlacements - determine which CSRs should be restored
-/// in MBB using AvailOut sets of MBB's succcessors, keeping track
-/// of changes to restored reg sets. Add MBB to the set of blocks
-/// that need to be processed for propagating use info to cover
-/// multi-entry/exit regions.
-///
-bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
- SmallVectorImpl<MachineBasicBlock *> &blks,
- CSRegBlockMap &prevRestores) {
- bool placedRestores = false;
- // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
- CSRegSet availOutSucc;
- SmallVector<MachineBasicBlock*, 4> successors;
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock* SUCC = *SI;
- if (SUCC != MBB)
- successors.push_back(SUCC);
- }
- unsigned i = 0, e = successors.size();
- if (i != e) {
- MachineBasicBlock* SUCC = successors[i];
- availOutSucc = UsedCSRegs - AvailOut[SUCC];
- for (++i; i != e; ++i) {
- SUCC = successors[i];
- availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
- }
- } else {
- if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
- // Handle uses in return blocks (which have no successors).
- // This is necessary because the DFA formulation assumes the
- // entry and (multiple) exit nodes cannot have CSR uses, which
- // is not the case in the real world.
- availOutSucc = UsedCSRegs;
- }
- }
- // Compute restores required at MBB:
- CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
-
- // Postprocess restore placements at MBB.
- // Remove the CSRs that are restored in the return blocks.
- // Lest this be confusing, note that:
- // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
- if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
- if (! CSRSave[EntryBlock].empty())
- CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
- }
- placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
- prevRestores[MBB] = CSRRestore[MBB];
- // Remember this block for adding saves to predecessor
- // blocks for multi-entry region.
- if (placedRestores)
- blks.push_back(MBB);
-
- DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
- dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
- << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
-
- return placedRestores;
-}
-
-/// placeSpillsAndRestores - place spills and restores of CSRs
-/// used in MBBs in minimal regions that contain the uses.
-///
-void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
- CSRegBlockMap prevCSRSave;
- CSRegBlockMap prevCSRRestore;
- SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
- bool changed = true;
- unsigned iterations = 0;
-
- // Iterate computation of spill and restore placements in the MCFG until:
- // 1. CSR use info has been fully propagated around the MCFG, and
- // 2. computation of CSRSave[], CSRRestore[] reach fixed points.
- while (changed) {
- changed = false;
- ++iterations;
-
- DEBUG(if (ShrinkWrapDebugging >= Iterations)
- dbgs() << "iter " << iterations
- << " --------------------------------------------------\n");
-
- // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
- // which determines the placements of spills and restores.
- // Keep track of changes to spills, restores in each iteration to
- // minimize the total iterations.
- bool SRChanged = false;
- for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
- MBBI != MBBE; ++MBBI) {
- MachineBasicBlock* MBB = MBBI;
-
- // Place spills for CSRs in MBB.
- SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
-
- // Place restores for CSRs in MBB.
- SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
- }
-
- // Add uses of CSRs used inside loops where needed.
- changed |= addUsesForTopLevelLoops(cvBlocks);
-
- // Add uses for CSRs spilled or restored at branch, join points.
- if (changed || SRChanged) {
- while (! cvBlocks.empty()) {
- MachineBasicBlock* MBB = cvBlocks.pop_back_val();
- changed |= addUsesForMEMERegion(MBB, ncvBlocks);
- }
- if (! ncvBlocks.empty()) {
- cvBlocks = ncvBlocks;
- ncvBlocks.clear();
- }
- }
-
- if (changed) {
- calculateAnticAvail(Fn);
- CSRSave.clear();
- CSRRestore.clear();
- }
- }
-
- // Check for effectiveness:
- // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
- // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
- // Gives a measure of how many CSR spills have been moved from EntryBlock
- // to minimal regions enclosing their uses.
- CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
- unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
- numSRReduced += numSRReducedThisFunc;
- DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
- dbgs() << "-----------------------------------------------------------\n";
- dbgs() << "total iterations = " << iterations << " ( "
- << Fn.getName()
- << " " << numSRReducedThisFunc
- << " " << Fn.size()
- << " )\n";
- dbgs() << "-----------------------------------------------------------\n";
- dumpSRSets();
- dbgs() << "-----------------------------------------------------------\n";
- if (numSRReducedThisFunc)
- verifySpillRestorePlacement();
- });
-}
-
-// Debugging methods.
-#ifndef NDEBUG
-/// findFastExitPath - debugging method used to detect functions
-/// with at least one path from the entry block to a return block
-/// directly or which has a very small number of edges.
-///
-void PEI::findFastExitPath() {
- if (! EntryBlock)
- return;
- // Fina a path from EntryBlock to any return block that does not branch:
- // Entry
- // | ...
- // v |
- // B1<-----+
- // |
- // v
- // Return
- for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
- SE = EntryBlock->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock* SUCC = *SI;
-
- // Assume positive, disprove existence of fast path.
- HasFastExitPath = true;
-
- // Check the immediate successors.
- if (isReturnBlock(SUCC)) {
- if (ShrinkWrapDebugging >= BasicInfo)
- dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
- << "->" << getBasicBlockName(SUCC) << "\n";
- break;
- }
- // Traverse df from SUCC, look for a branch block.
- std::string exitPath = getBasicBlockName(SUCC);
- for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
- BE = df_end(SUCC); BI != BE; ++BI) {
- MachineBasicBlock* SBB = *BI;
- // Reject paths with branch nodes.
- if (SBB->succ_size() > 1) {
- HasFastExitPath = false;
- break;
- }
- exitPath += "->" + getBasicBlockName(SBB);
- }
- if (HasFastExitPath) {
- if (ShrinkWrapDebugging >= BasicInfo)
- dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
- << "->" << exitPath << "\n";
- break;
- }
- }
-}
-
-/// verifySpillRestorePlacement - check the current spill/restore
-/// sets for safety. Attempt to find spills without restores or
-/// restores without spills.
-/// Spills: walk df from each MBB in spill set ensuring that
-/// all CSRs spilled at MMBB are restored on all paths
-/// from MBB to all exit blocks.
-/// Restores: walk idf from each MBB in restore set ensuring that
-/// all CSRs restored at MBB are spilled on all paths
-/// reaching MBB.
-///
-void PEI::verifySpillRestorePlacement() {
- unsigned numReturnBlocks = 0;
- for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
- MBBI != MBBE; ++MBBI) {
- MachineBasicBlock* MBB = MBBI;
- if (isReturnBlock(MBB) || MBB->succ_size() == 0)
- ++numReturnBlocks;
- }
- for (CSRegBlockMap::iterator BI = CSRSave.begin(),
- BE = CSRSave.end(); BI != BE; ++BI) {
- MachineBasicBlock* MBB = BI->first;
- CSRegSet spilled = BI->second;
- CSRegSet restored;
-
- if (spilled.empty())
- continue;
-
- DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
- << stringifyCSRegSet(spilled)
- << " RESTORE[" << getBasicBlockName(MBB) << "] = "
- << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
-
- if (CSRRestore[MBB].intersects(spilled)) {
- restored |= (CSRRestore[MBB] & spilled);
- }
-
- // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
- // we must find restores for all spills w/no intervening spills on all
- // paths from MBB to all return blocks.
- for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
- BE = df_end(MBB); BI != BE; ++BI) {
- MachineBasicBlock* SBB = *BI;
- if (SBB == MBB)
- continue;
- // Stop when we encounter spills of any CSRs spilled at MBB that
- // have not yet been seen to be restored.
- if (CSRSave[SBB].intersects(spilled) &&
- !restored.contains(CSRSave[SBB] & spilled))
- break;
- // Collect the CSRs spilled at MBB that are restored
- // at this DF successor of MBB.
- if (CSRRestore[SBB].intersects(spilled))
- restored |= (CSRRestore[SBB] & spilled);
- // If we are at a retun block, check that the restores
- // we have seen so far exhaust the spills at MBB, then
- // reset the restores.
- if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
- if (restored != spilled) {
- CSRegSet notRestored = (spilled - restored);
- DEBUG(dbgs() << MF->getName() << ": "
- << stringifyCSRegSet(notRestored)
- << " spilled at " << getBasicBlockName(MBB)
- << " are never restored on path to return "
- << getBasicBlockName(SBB) << "\n");
- }
- restored.clear();
- }
- }
- }
-
- // Check restore placements.
- for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
- BE = CSRRestore.end(); BI != BE; ++BI) {
- MachineBasicBlock* MBB = BI->first;
- CSRegSet restored = BI->second;
- CSRegSet spilled;
-
- if (restored.empty())
- continue;
-
- DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
- << stringifyCSRegSet(CSRSave[MBB])
- << " RESTORE[" << getBasicBlockName(MBB) << "] = "
- << stringifyCSRegSet(restored) << "\n");
-
- if (CSRSave[MBB].intersects(restored)) {
- spilled |= (CSRSave[MBB] & restored);
- }
- // Walk inverse depth first from MBB to find spills of all
- // CSRs restored at MBB:
- for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
- BE = idf_end(MBB); BI != BE; ++BI) {
- MachineBasicBlock* PBB = *BI;
- if (PBB == MBB)
- continue;
- // Stop when we encounter restores of any CSRs restored at MBB that
- // have not yet been seen to be spilled.
- if (CSRRestore[PBB].intersects(restored) &&
- !spilled.contains(CSRRestore[PBB] & restored))
- break;
- // Collect the CSRs restored at MBB that are spilled
- // at this DF predecessor of MBB.
- if (CSRSave[PBB].intersects(restored))
- spilled |= (CSRSave[PBB] & restored);
- }
- if (spilled != restored) {
- CSRegSet notSpilled = (restored - spilled);
- DEBUG(dbgs() << MF->getName() << ": "
- << stringifyCSRegSet(notSpilled)
- << " restored at " << getBasicBlockName(MBB)
- << " are never spilled\n");
- }
- }
-}
-
-// Debugging print methods.
-std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
- if (!MBB)
- return "";
-
- if (MBB->getBasicBlock())
- return MBB->getBasicBlock()->getName().str();
-
- std::ostringstream name;
- name << "_MBB_" << MBB->getNumber();
- return name.str();
-}
-
-std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
- const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
- const std::vector<CalleeSavedInfo> CSI =
- MF->getFrameInfo()->getCalleeSavedInfo();
-
- std::ostringstream srep;
- if (CSI.size() == 0) {
- srep << "[]";
- return srep.str();
- }
- srep << "[";
- CSRegSet::iterator I = s.begin(), E = s.end();
- if (I != E) {
- unsigned reg = CSI[*I].getReg();
- srep << TRI->getName(reg);
- for (++I; I != E; ++I) {
- reg = CSI[*I].getReg();
- srep << ",";
- srep << TRI->getName(reg);
- }
- }
- srep << "]";
- return srep.str();
-}
-
-void PEI::dumpSet(const CSRegSet& s) {
- DEBUG(dbgs() << stringifyCSRegSet(s) << "\n");
-}
-
-void PEI::dumpUsed(MachineBasicBlock* MBB) {
- DEBUG({
- if (MBB)
- dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
- << stringifyCSRegSet(CSRUsed[MBB]) << "\n";
- });
-}
-
-void PEI::dumpAllUsed() {
- for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
- MBBI != MBBE; ++MBBI) {
- MachineBasicBlock* MBB = MBBI;
- dumpUsed(MBB);
- }
-}
-
-void PEI::dumpSets(MachineBasicBlock* MBB) {
- DEBUG({
- if (MBB)
- dbgs() << getBasicBlockName(MBB) << " | "
- << stringifyCSRegSet(CSRUsed[MBB]) << " | "
- << stringifyCSRegSet(AnticIn[MBB]) << " | "
- << stringifyCSRegSet(AnticOut[MBB]) << " | "
- << stringifyCSRegSet(AvailIn[MBB]) << " | "
- << stringifyCSRegSet(AvailOut[MBB]) << "\n";
- });
-}
-
-void PEI::dumpSets1(MachineBasicBlock* MBB) {
- DEBUG({
- if (MBB)
- dbgs() << getBasicBlockName(MBB) << " | "
- << stringifyCSRegSet(CSRUsed[MBB]) << " | "
- << stringifyCSRegSet(AnticIn[MBB]) << " | "
- << stringifyCSRegSet(AnticOut[MBB]) << " | "
- << stringifyCSRegSet(AvailIn[MBB]) << " | "
- << stringifyCSRegSet(AvailOut[MBB]) << " | "
- << stringifyCSRegSet(CSRSave[MBB]) << " | "
- << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
- });
-}
-
-void PEI::dumpAllSets() {
- for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
- MBBI != MBBE; ++MBBI) {
- MachineBasicBlock* MBB = MBBI;
- dumpSets1(MBB);
- }
-}
-
-void PEI::dumpSRSets() {
- DEBUG({
- for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
- MBB != E; ++MBB) {
- if (!CSRSave[MBB].empty()) {
- dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
- << stringifyCSRegSet(CSRSave[MBB]);
- if (CSRRestore[MBB].empty())
- dbgs() << '\n';
- }
-
- if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
- dbgs() << " "
- << "RESTORE[" << getBasicBlockName(MBB) << "] = "
- << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
- }
- });
-}
-#endif
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 2fc8f46..da2e710 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -42,41 +42,40 @@ STATISTIC(NumInvokes, "Number of invokes replaced");
STATISTIC(NumSpilled, "Number of registers live across unwind edges");
namespace {
- class SjLjEHPrepare : public FunctionPass {
- const TargetMachine *TM;
- Type *FunctionContextTy;
- Constant *RegisterFn;
- Constant *UnregisterFn;
- Constant *BuiltinSetjmpFn;
- Constant *FrameAddrFn;
- Constant *StackAddrFn;
- Constant *StackRestoreFn;
- Constant *LSDAAddrFn;
- Value *PersonalityFn;
- Constant *CallSiteFn;
- Constant *FuncCtxFn;
- AllocaInst *FuncCtx;
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit SjLjEHPrepare(const TargetMachine *TM)
- : FunctionPass(ID), TM(TM) { }
- bool doInitialization(Module &M);
- bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
- const char *getPassName() const {
- return "SJLJ Exception Handling preparation";
- }
+class SjLjEHPrepare : public FunctionPass {
+ const TargetMachine *TM;
+ Type *FunctionContextTy;
+ Constant *RegisterFn;
+ Constant *UnregisterFn;
+ Constant *BuiltinSetjmpFn;
+ Constant *FrameAddrFn;
+ Constant *StackAddrFn;
+ Constant *StackRestoreFn;
+ Constant *LSDAAddrFn;
+ Value *PersonalityFn;
+ Constant *CallSiteFn;
+ Constant *FuncCtxFn;
+ AllocaInst *FuncCtx;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit SjLjEHPrepare(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {}
+ bool doInitialization(Module &M);
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
+ const char *getPassName() const {
+ return "SJLJ Exception Handling preparation";
+ }
- private:
- bool setupEntryBlockAndCallSites(Function &F);
- void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
- Value *SelVal);
- Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads);
- void lowerIncomingArguments(Function &F);
- void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes);
- void insertCallSiteStore(Instruction *I, int Number);
- };
+private:
+ bool setupEntryBlockAndCallSites(Function &F);
+ void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal);
+ Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads);
+ void lowerIncomingArguments(Function &F);
+ void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes);
+ void insertCallSiteStore(Instruction *I, int Number);
+};
} // end anonymous namespace
char SjLjEHPrepare::ID = 0;
@@ -92,23 +91,19 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
// builtin_setjmp uses a five word jbuf
Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
Type *Int32Ty = Type::getInt32Ty(M.getContext());
- FunctionContextTy =
- StructType::get(VoidPtrTy, // __prev
- Int32Ty, // call_site
- ArrayType::get(Int32Ty, 4), // __data
- VoidPtrTy, // __personality
- VoidPtrTy, // __lsda
- ArrayType::get(VoidPtrTy, 5), // __jbuf
- NULL);
- RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register",
- Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(FunctionContextTy),
- (Type *)0);
- UnregisterFn =
- M.getOrInsertFunction("_Unwind_SjLj_Unregister",
- Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(FunctionContextTy),
- (Type *)0);
+ FunctionContextTy = StructType::get(VoidPtrTy, // __prev
+ Int32Ty, // call_site
+ ArrayType::get(Int32Ty, 4), // __data
+ VoidPtrTy, // __personality
+ VoidPtrTy, // __lsda
+ ArrayType::get(VoidPtrTy, 5), // __jbuf
+ NULL);
+ RegisterFn = M.getOrInsertFunction(
+ "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy), (Type *)0);
+ UnregisterFn = M.getOrInsertFunction(
+ "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy), (Type *)0);
FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
@@ -134,16 +129,17 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site");
// Insert a store of the call-site number
- ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
- Number);
- Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/);
+ ConstantInt *CallSiteNoC =
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), Number);
+ Builder.CreateStore(CallSiteNoC, CallSite, true /*volatile*/);
}
/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
/// we reach blocks we've already seen.
static void MarkBlocksLiveIn(BasicBlock *BB,
- SmallPtrSet<BasicBlock*, 64> &LiveBBs) {
- if (!LiveBBs.insert(BB)) return; // already been here.
+ SmallPtrSet<BasicBlock *, 64> &LiveBBs) {
+ if (!LiveBBs.insert(BB))
+ return; // already been here.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
MarkBlocksLiveIn(*PI, LiveBBs);
@@ -153,12 +149,14 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
/// instruction with those returned by the personality function.
void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
Value *SelVal) {
- SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
+ SmallVector<Value *, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
while (!UseWorkList.empty()) {
Value *Val = UseWorkList.pop_back_val();
ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val);
- if (!EVI) continue;
- if (EVI->getNumIndices() != 1) continue;
+ if (!EVI)
+ continue;
+ if (EVI->getNumIndices() != 1)
+ continue;
if (*EVI->idx_begin() == 0)
EVI->replaceAllUsesWith(ExnVal);
else if (*EVI->idx_begin() == 1)
@@ -167,14 +165,15 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
EVI->eraseFromParent();
}
- if (LPI->getNumUses() == 0) return;
+ if (LPI->getNumUses() == 0)
+ return;
// There are still some uses of LPI. Construct an aggregate with the exception
// values and replace the LPI with that aggregate.
Type *LPadType = LPI->getType();
Value *LPadVal = UndefValue::get(LPadType);
- IRBuilder<>
- Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+ IRBuilder<> Builder(
+ llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
@@ -183,8 +182,8 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
/// setupFunctionContext - Allocate the function context on the stack and fill
/// it with all of the data that we know at this point.
-Value *SjLjEHPrepare::
-setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
+Value *SjLjEHPrepare::setupFunctionContext(Function &F,
+ ArrayRef<LandingPadInst *> LPads) {
BasicBlock *EntryBB = F.begin();
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
@@ -192,9 +191,9 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
// because the value needs to be added to the global context list.
const TargetLowering *TLI = TM->getTargetLowering();
unsigned Align =
- TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
- FuncCtx =
- new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin());
+ TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
+ FuncCtx = new AllocaInst(FunctionContextTy, 0, Align, "fn_context",
+ EntryBB->begin());
// Fill in the function context structure.
for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
@@ -205,13 +204,13 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data");
// The exception values come back in context->__data[0].
- Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0,
- "exception_gep");
+ Value *ExceptionAddr =
+ Builder.CreateConstGEP2_32(FCData, 0, 0, "exception_gep");
Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
- Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1,
- "exn_selector_gep");
+ Value *SelectorAddr =
+ Builder.CreateConstGEP2_32(FCData, 0, 1, "exn_selector_gep");
Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
substituteLPadValues(LPI, ExnVal, SelVal);
@@ -221,11 +220,11 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
IRBuilder<> Builder(EntryBB->getTerminator());
if (!PersonalityFn)
PersonalityFn = LPads[0]->getPersonalityFn();
- Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3,
- "pers_fn_gep");
- Builder.CreateStore(Builder.CreateBitCast(PersonalityFn,
- Builder.getInt8PtrTy()),
- PersonalityFieldPtr, /*isVolatile=*/true);
+ Value *PersonalityFieldPtr =
+ Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep");
+ Builder.CreateStore(
+ Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()),
+ PersonalityFieldPtr, /*isVolatile=*/true);
// LSDA address
Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr");
@@ -245,8 +244,8 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
++AfterAllocaInsPt;
- for (Function::arg_iterator
- AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) {
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
+ ++AI) {
Type *Ty = AI->getType();
// Aggregate types can't be cast, but are legal argument types, so we have
@@ -265,9 +264,8 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
// This is always a no-op cast because we're casting AI to AI->getType()
// so src and destination types are identical. BitCast is the only
// possibility.
- CastInst *NC =
- new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp",
- AfterAllocaInsPt);
+ CastInst *NC = new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp",
+ AfterAllocaInsPt);
AI->replaceAllUsesWith(NC);
// Set the operand of the cast instruction back to the AllocaInst.
@@ -284,20 +282,21 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
/// edge and spill them.
void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
- ArrayRef<InvokeInst*> Invokes) {
+ ArrayRef<InvokeInst *> Invokes) {
// Finally, scan the code looking for instructions with bad live ranges.
- for (Function::iterator
- BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
- for (BasicBlock::iterator
- II = BB->begin(), IIE = BB->end(); II != IIE; ++II) {
+ for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE;
+ ++II) {
// Ignore obvious cases we don't have to handle. In particular, most
// instructions either have no uses or only have a single use inside the
// current block. Ignore them quickly.
Instruction *Inst = II;
- if (Inst->use_empty()) continue;
+ if (Inst->use_empty())
+ continue;
if (Inst->hasOneUse() &&
cast<Instruction>(Inst->use_back())->getParent() == BB &&
- !isa<PHINode>(Inst->use_back())) continue;
+ !isa<PHINode>(Inst->use_back()))
+ continue;
// If this is an alloca in the entry block, it's not a real register
// value.
@@ -306,16 +305,16 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
continue;
// Avoid iterator invalidation by copying users to a temporary vector.
- SmallVector<Instruction*, 16> Users;
- for (Value::use_iterator
- UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) {
+ SmallVector<Instruction *, 16> Users;
+ for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+ UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
if (User->getParent() != BB || isa<PHINode>(User))
Users.push_back(User);
}
// Find all of the blocks that this value is live in.
- SmallPtrSet<BasicBlock*, 64> LiveBBs;
+ SmallPtrSet<BasicBlock *, 64> LiveBBs;
LiveBBs.insert(Inst->getParent());
while (!Users.empty()) {
Instruction *U = Users.back();
@@ -339,7 +338,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around "
- << UnwindBlock->getName() << "\n");
+ << UnwindBlock->getName() << "\n");
NeedsSpill = true;
break;
}
@@ -362,15 +361,16 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
LandingPadInst *LPI = UnwindBlock->getLandingPadInst();
// Place PHIs into a set to avoid invalidating the iterator.
- SmallPtrSet<PHINode*, 8> PHIsToDemote;
- for (BasicBlock::iterator
- PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN)
+ SmallPtrSet<PHINode *, 8> PHIsToDemote;
+ for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN)
PHIsToDemote.insert(cast<PHINode>(PN));
- if (PHIsToDemote.empty()) continue;
+ if (PHIsToDemote.empty())
+ continue;
// Demote the PHIs to the stack.
- for (SmallPtrSet<PHINode*, 8>::iterator
- I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I)
+ for (SmallPtrSet<PHINode *, 8>::iterator I = PHIsToDemote.begin(),
+ E = PHIsToDemote.end();
+ I != E; ++I)
DemotePHIToStack(*I);
// Move the landingpad instruction back to the top of the landing pad block.
@@ -382,9 +382,9 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
/// the function context and marking the call sites with the appropriate
/// values. These values are used by the DWARF EH emitter.
bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
- SmallVector<ReturnInst*, 16> Returns;
- SmallVector<InvokeInst*, 16> Invokes;
- SmallSetVector<LandingPadInst*, 16> LPads;
+ SmallVector<ReturnInst *, 16> Returns;
+ SmallVector<InvokeInst *, 16> Invokes;
+ SmallSetVector<LandingPadInst *, 16> LPads;
// Look through the terminators of the basic blocks to find invokes.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -404,7 +404,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
Returns.push_back(RI);
}
- if (Invokes.empty()) return false;
+ if (Invokes.empty())
+ return false;
NumInvokes += Invokes.size();
@@ -412,7 +413,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
lowerAcrossUnwindEdges(F, Invokes);
Value *FuncCtx =
- setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
+ setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
BasicBlock *EntryBB = F.begin();
IRBuilder<> Builder(EntryBB->getTerminator());
@@ -446,7 +447,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
insertCallSiteStore(Invokes[I], I + 1);
ConstantInt *CallSiteNum =
- ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);
// Record the call site value for the back end so it stays associated with
// the invoke.
@@ -468,8 +469,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
}
// Register the function context and make sure it's known to not throw
- CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "",
- EntryBB->getTerminator());
+ CallInst *Register =
+ CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator());
Register->setDoesNotThrow();
// Following any allocas not in the entry block, update the saved SP in the
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 209792f..d5b3a4a 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -77,7 +77,7 @@ protected:
DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
- assert(li->weight != HUGE_VALF &&
+ assert(li->weight != llvm::huge_valf &&
"Attempting to spill already spilled value.");
assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
@@ -115,15 +115,14 @@ protected:
indices.push_back(i);
}
- // Create a new vreg & interval for this instr.
- LiveInterval *newLI = &LRE.create();
- newLI->weight = HUGE_VALF;
+ // Create a new virtual register for the load and/or store.
+ unsigned NewVReg = LRE.create();
// Update the reg operands & kill flags.
for (unsigned i = 0; i < indices.size(); ++i) {
unsigned mopIdx = indices[i];
MachineOperand &mop = mi->getOperand(mopIdx);
- mop.setReg(newLI->reg);
+ mop.setReg(NewVReg);
if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
mop.setIsKill(true);
}
@@ -133,28 +132,20 @@ protected:
// Insert reload if necessary.
MachineBasicBlock::iterator miItr(mi);
if (hasUse) {
- tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc,
+ MachineInstrSpan MIS(miItr);
+
+ tii->loadRegFromStackSlot(*mi->getParent(), miItr, NewVReg, ss, trc,
tri);
- MachineInstr *loadInstr(prior(miItr));
- SlotIndex loadIndex =
- lis->InsertMachineInstrInMaps(loadInstr).getRegSlot();
- SlotIndex endIndex = loadIndex.getNextIndex();
- VNInfo *loadVNI =
- newLI->getNextValue(loadIndex, lis->getVNInfoAllocator());
- newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
+ lis->InsertMachineInstrRangeInMaps(MIS.begin(), miItr);
}
// Insert store if necessary.
if (hasDef) {
- tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg,
+ MachineInstrSpan MIS(miItr);
+
+ tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), NewVReg,
true, ss, trc, tri);
- MachineInstr *storeInstr(llvm::next(miItr));
- SlotIndex storeIndex =
- lis->InsertMachineInstrInMaps(storeInstr).getRegSlot();
- SlotIndex beginIndex = storeIndex.getPrevIndex();
- VNInfo *storeVNI =
- newLI->getNextValue(beginIndex, lis->getVNInfoAllocator());
- newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
+ lis->InsertMachineInstrRangeInMaps(llvm::next(miItr), MIS.end());
}
}
}
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index e717fac..68a15f7 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -214,7 +214,7 @@ bool SplitAnalysis::calcLiveBlockInfo() {
// When not live in, the first use should be a def.
if (!BI.LiveIn) {
- assert(LVI->start == LVI->valno->def && "Dangling LiveRange start");
+ assert(LVI->start == LVI->valno->def && "Dangling Segment start");
assert(LVI->start == BI.FirstInstr && "First instr should be a def");
BI.FirstDef = BI.FirstInstr;
}
@@ -245,8 +245,8 @@ bool SplitAnalysis::calcLiveBlockInfo() {
BI.FirstInstr = BI.FirstDef = LVI->start;
}
- // A LiveRange that starts in the middle of the block must be a def.
- assert(LVI->start == LVI->valno->def && "Dangling LiveRange start");
+ // A Segment that starts in the middle of the block must be a def.
+ assert(LVI->start == LVI->valno->def && "Dangling Segment start");
if (!BI.FirstDef)
BI.FirstDef = LVI->start;
}
@@ -377,7 +377,7 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
assert(ParentVNI && "Mapping NULL value");
assert(Idx.isValid() && "Invalid SlotIndex");
assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
- LiveInterval *LI = Edit->get(RegIdx);
+ LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
// Create a new value.
VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator());
@@ -395,14 +395,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
// If the previous value was a simple mapping, add liveness for it now.
if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
SlotIndex Def = OldVNI->def;
- LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI));
+ LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), OldVNI));
// No longer a simple mapping. Switch to a complex, non-forced mapping.
InsP.first->second = ValueForcePair();
}
// This is a complex mapping, add liveness for VNI
SlotIndex Def = VNI->def;
- LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
+ LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI));
return VNI;
}
@@ -422,7 +422,8 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
// This was previously a single mapping. Make sure the old def is represented
// by a trivial live range.
SlotIndex Def = VNI->def;
- Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
+ LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
+ LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI));
// Mark as complex mapped, forced.
VFP = ValueForcePair(0, true);
}
@@ -434,7 +435,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
MachineBasicBlock::iterator I) {
MachineInstr *CopyMI = 0;
SlotIndex Def;
- LiveInterval *LI = Edit->get(RegIdx);
+ LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
// We may be trying to avoid interference that ends at a deleted instruction,
// so always begin RegIdx 0 early and all others late.
@@ -462,11 +463,11 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
unsigned SplitEditor::openIntv() {
// Create the complement as index 0.
if (Edit->empty())
- Edit->create();
+ Edit->createEmptyInterval();
// Create the open interval.
OpenIdx = Edit->size();
- Edit->create();
+ Edit->createEmptyInterval();
return OpenIdx;
}
@@ -631,7 +632,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
//===----------------------------------------------------------------------===//
void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
- LiveInterval *LI = Edit->get(0);
+ LiveInterval *LI = &LIS.getInterval(Edit->get(0));
DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n");
RegAssignMap::iterator AssignI;
AssignI.setMap(RegAssign);
@@ -730,7 +731,7 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
void SplitEditor::hoistCopiesForSize() {
// Get the complement interval, always RegIdx 0.
- LiveInterval *LI = Edit->get(0);
+ LiveInterval *LI = &LIS.getInterval(Edit->get(0));
LiveInterval *Parent = &Edit->getParent();
// Track the nearest common dominator for all back-copies for each ParentVNI,
@@ -861,13 +862,13 @@ bool SplitEditor::transferValues() {
// The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
- LiveInterval *LI = Edit->get(RegIdx);
+ LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));
// Check for a simply defined value that can be blitted directly.
ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id));
if (VNInfo *VNI = VFP.getPointer()) {
DEBUG(dbgs() << ':' << VNI->id);
- LI->addRange(LiveRange(Start, End, VNI));
+ LR.addSegment(LiveInterval::Segment(Start, End, VNI));
Start = End;
continue;
}
@@ -891,7 +892,7 @@ bool SplitEditor::transferValues() {
// The first block may be live-in, or it may have its own def.
if (Start != BlockStart) {
- VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End));
+ VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
assert(VNI && "Missing def for complex mapped value");
DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
// MBB has its own def. Is it also live-out?
@@ -911,7 +912,7 @@ bool SplitEditor::transferValues() {
if (BlockStart == ParentVNI->def) {
// This block has the def of a parent PHI, so it isn't live-in.
assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
- VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End));
+ VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
assert(VNI && "Missing def for complex mapped parent PHI");
if (End >= BlockEnd)
LRC.setLiveOutValue(MBB, VNI); // Live-out as well.
@@ -919,10 +920,10 @@ bool SplitEditor::transferValues() {
// This block needs a live-in value. The last block covered may not
// be live-out.
if (End < BlockEnd)
- LRC.addLiveInBlock(LI, MDT[MBB], End);
+ LRC.addLiveInBlock(LR, MDT[MBB], End);
else {
// Live-through, and we don't know the value.
- LRC.addLiveInBlock(LI, MDT[MBB]);
+ LRC.addLiveInBlock(LR, MDT[MBB]);
LRC.setLiveOutValue(MBB, 0);
}
}
@@ -949,7 +950,7 @@ void SplitEditor::extendPHIKillRanges() {
if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
continue;
unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
- LiveInterval *LI = Edit->get(RegIdx);
+ LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));
LiveRangeCalc &LRC = getLRCalc(RegIdx);
MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
@@ -961,7 +962,7 @@ void SplitEditor::extendPHIKillRanges() {
if (Edit->getParent().liveAt(LastUse)) {
assert(RegAssign.lookup(LastUse) == RegIdx &&
"Different register assignment in phi predecessor");
- LRC.extend(LI, End);
+ LRC.extend(LR, End);
}
}
}
@@ -990,7 +991,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
// Rewrite to the mapped register at Idx.
unsigned RegIdx = RegAssign.lookup(Idx);
- LiveInterval *LI = Edit->get(RegIdx);
+ LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
MO.setReg(LI->reg);
DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
<< Idx << ':' << RegIdx << '\t' << *MI);
@@ -1011,14 +1012,14 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
} else
Idx = Idx.getRegSlot(true);
- getLRCalc(RegIdx).extend(LI, Idx.getNextSlot());
+ getLRCalc(RegIdx).extend(*LI, Idx.getNextSlot());
}
}
void SplitEditor::deleteRematVictims() {
SmallVector<MachineInstr*, 8> Dead;
for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){
- LiveInterval *LI = *I;
+ LiveInterval *LI = &LIS.getInterval(*I);
for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end();
LII != LIE; ++LII) {
// Dead defs end at the dead slot.
@@ -1091,8 +1092,10 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
deleteRematVictims();
// Get rid of unused values and set phi-kill flags.
- for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
- (*I)->RenumberValues(LIS);
+ for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) {
+ LiveInterval &LI = LIS.getInterval(*I);
+ LI.RenumberValues();
+ }
// Provide a reverse mapping from original indices to Edit ranges.
if (LRMap) {
@@ -1105,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
ConnectedVNInfoEqClasses ConEQ(LIS);
for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
// Don't use iterators, they are invalidated by create() below.
- LiveInterval *li = Edit->get(i);
+ LiveInterval *li = &LIS.getInterval(Edit->get(i));
unsigned NumComp = ConEQ.Classify(li);
if (NumComp <= 1)
continue;
@@ -1113,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
SmallVector<LiveInterval*, 8> dups;
dups.push_back(li);
for (unsigned j = 1; j != NumComp; ++j)
- dups.push_back(&Edit->create());
+ dups.push_back(&Edit->createEmptyInterval());
ConEQ.Distribute(&dups[0], MRI);
// The new intervals all map back to i.
if (LRMap)
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index faaa6e7..3dbc050 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -170,7 +170,7 @@ private:
/// slots to use the joint slots.
void remapInstructions(DenseMap<int, int> &SlotRemap);
- /// The input program may contain intructions which are not inside lifetime
+ /// The input program may contain instructions which are not inside lifetime
/// markers. This can happen due to a bug in the compiler or due to a bug in
/// user code (for example, returning a reference to a local variable).
/// This procedure checks all of the instructions in the function and
@@ -450,14 +450,14 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
SlotIndex F = Finishes[i];
if (S < F) {
// We have a single consecutive region.
- Intervals[i]->addRange(LiveRange(S, F, ValNum));
+ Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
} else {
// We have two non consecutive regions. This happens when
// LIFETIME_START appears after the LIFETIME_END marker.
SlotIndex NewStart = Indexes->getMBBStartIdx(MBB);
SlotIndex NewFin = Indexes->getMBBEndIdx(MBB);
- Intervals[i]->addRange(LiveRange(NewStart, F, ValNum));
- Intervals[i]->addRange(LiveRange(S, NewFin, ValNum));
+ Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
+ Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
}
}
}
@@ -763,7 +763,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// Merge disjoint slots.
if (!First->overlaps(*Second)) {
Changed = true;
- First->MergeRangesInAsValue(*Second, First->getValNumInfo(0));
+ First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
SlotRemap[SecondSlot] = FirstSlot;
SortedSlots[J] = -1;
DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
new file mode 100644
index 0000000..40893ea
--- /dev/null
+++ b/lib/CodeGen/StackMaps.cpp
@@ -0,0 +1,314 @@
+//===---------------------------- StackMaps.cpp ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackmaps"
+
+#include "llvm/CodeGen/StackMaps.h"
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <iterator>
+
+using namespace llvm;
+
+PatchPointOpers::PatchPointOpers(const MachineInstr *MI):
+ MI(MI),
+ HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+ !MI->getOperand(0).isImplicit()),
+ IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) {
+
+#ifndef NDEBUG
+ {
+ unsigned CheckStartIdx = 0, e = MI->getNumOperands();
+ while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() &&
+ MI->getOperand(CheckStartIdx).isDef() &&
+ !MI->getOperand(CheckStartIdx).isImplicit())
+ ++CheckStartIdx;
+
+ assert(getMetaIdx() == CheckStartIdx &&
+ "Unexpected additonal definition in Patchpoint intrinsic.");
+ }
+#endif
+}
+
+unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const {
+ if (!StartIdx)
+ StartIdx = getVarIdx();
+
+ // Find the next scratch register (implicit def and early clobber)
+ unsigned ScratchIdx = StartIdx, e = MI->getNumOperands();
+ while (ScratchIdx < e &&
+ !(MI->getOperand(ScratchIdx).isReg() &&
+ MI->getOperand(ScratchIdx).isDef() &&
+ MI->getOperand(ScratchIdx).isImplicit() &&
+ MI->getOperand(ScratchIdx).isEarlyClobber()))
+ ++ScratchIdx;
+
+ assert(ScratchIdx != e && "No scratch register available");
+ return ScratchIdx;
+}
+
+void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID,
+ MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE,
+ bool recordResult) {
+
+ MCContext &OutContext = AP.OutStreamer.getContext();
+ MCSymbol *MILabel = OutContext.CreateTempSymbol();
+ AP.OutStreamer.EmitLabel(MILabel);
+
+ LocationVec CallsiteLocs;
+
+ if (recordResult) {
+ std::pair<Location, MachineInstr::const_mop_iterator> ParseResult =
+ OpParser(MI.operands_begin(), llvm::next(MI.operands_begin()), AP.TM);
+
+ Location &Loc = ParseResult.first;
+ assert(Loc.LocType == Location::Register &&
+ "Stackmap return location must be a register.");
+ CallsiteLocs.push_back(Loc);
+ }
+
+ while (MOI != MOE) {
+ std::pair<Location, MachineInstr::const_mop_iterator> ParseResult =
+ OpParser(MOI, MOE, AP.TM);
+
+ Location &Loc = ParseResult.first;
+
+ // Move large constants into the constant pool.
+ if (Loc.LocType == Location::Constant && (Loc.Offset & ~0xFFFFFFFFULL)) {
+ Loc.LocType = Location::ConstantIndex;
+ Loc.Offset = ConstPool.getConstantIndex(Loc.Offset);
+ }
+
+ CallsiteLocs.push_back(Loc);
+ MOI = ParseResult.second;
+ }
+
+ const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub(
+ MCSymbolRefExpr::Create(MILabel, OutContext),
+ MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext),
+ OutContext);
+
+ CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, CallsiteLocs));
+}
+
+static MachineInstr::const_mop_iterator
+getStackMapEndMOP(MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE) {
+ for (; MOI != MOE; ++MOI)
+ if (MOI->isRegMask() || (MOI->isReg() && MOI->isImplicit()))
+ break;
+
+ return MOI;
+}
+
+void StackMaps::recordStackMap(const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::STACKMAP && "exected stackmap");
+
+ int64_t ID = MI.getOperand(0).getImm();
+ assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs");
+ recordStackMapOpers(MI, ID, llvm::next(MI.operands_begin(), 2),
+ getStackMapEndMOP(MI.operands_begin(),
+ MI.operands_end()));
+}
+
+void StackMaps::recordPatchPoint(const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "exected stackmap");
+
+ PatchPointOpers opers(&MI);
+ int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm();
+ assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs");
+ MachineInstr::const_mop_iterator MOI =
+ llvm::next(MI.operands_begin(), opers.getStackMapStartIdx());
+ recordStackMapOpers(MI, ID, MOI, getStackMapEndMOP(MOI, MI.operands_end()),
+ opers.isAnyReg() && opers.hasDef());
+
+#ifndef NDEBUG
+ // verify anyregcc
+ LocationVec &Locations = CSInfos.back().Locations;
+ if (opers.isAnyReg()) {
+ unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm();
+ for (unsigned i = 0, e = (opers.hasDef() ? NArgs+1 : NArgs); i != e; ++i)
+ assert(Locations[i].LocType == Location::Register &&
+ "anyreg arg must be in reg.");
+ }
+#endif
+}
+
+/// serializeToStackMapSection conceptually populates the following fields:
+///
+/// uint32 : Reserved (header)
+/// uint32 : NumConstants
+/// int64 : Constants[NumConstants]
+/// uint32 : NumRecords
+/// StkMapRecord[NumRecords] {
+/// uint32 : PatchPoint ID
+/// uint32 : Instruction Offset
+/// uint16 : Reserved (record flags)
+/// uint16 : NumLocations
+/// Location[NumLocations] {
+/// uint8 : Register | Direct | Indirect | Constant | ConstantIndex
+/// uint8 : Size in Bytes
+/// uint16 : Dwarf RegNum
+/// int32 : Offset
+/// }
+/// }
+///
+/// Location Encoding, Type, Value:
+/// 0x1, Register, Reg (value in register)
+/// 0x2, Direct, Reg + Offset (frame index)
+/// 0x3, Indirect, [Reg + Offset] (spilled value)
+/// 0x4, Constant, Offset (small constant)
+/// 0x5, ConstIndex, Constants[Offset] (large constant)
+///
+void StackMaps::serializeToStackMapSection() {
+ // Bail out if there's no stack map data.
+ if (CSInfos.empty())
+ return;
+
+ MCContext &OutContext = AP.OutStreamer.getContext();
+ const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo();
+
+ // Create the section.
+ const MCSection *StackMapSection =
+ OutContext.getObjectFileInfo()->getStackMapSection();
+ AP.OutStreamer.SwitchSection(StackMapSection);
+
+ // Emit a dummy symbol to force section inclusion.
+ AP.OutStreamer.EmitLabel(
+ OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps")));
+
+ // Serialize data.
+ const char *WSMP = "Stack Maps: ";
+ (void)WSMP;
+ const MCRegisterInfo &MCRI = *OutContext.getRegisterInfo();
+
+ DEBUG(dbgs() << "********** Stack Map Output **********\n");
+
+ // Header.
+ AP.OutStreamer.EmitIntValue(0, 4);
+
+ // Num constants.
+ AP.OutStreamer.EmitIntValue(ConstPool.getNumConstants(), 4);
+
+ // Constant pool entries.
+ for (unsigned i = 0; i < ConstPool.getNumConstants(); ++i)
+ AP.OutStreamer.EmitIntValue(ConstPool.getConstant(i), 8);
+
+ DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << "\n");
+ AP.OutStreamer.EmitIntValue(CSInfos.size(), 4);
+
+ for (CallsiteInfoList::const_iterator CSII = CSInfos.begin(),
+ CSIE = CSInfos.end();
+ CSII != CSIE; ++CSII) {
+
+ unsigned CallsiteID = CSII->ID;
+ const LocationVec &CSLocs = CSII->Locations;
+
+ DEBUG(dbgs() << WSMP << "callsite " << CallsiteID << "\n");
+
+ // Verify stack map entry. It's better to communicate a problem to the
+ // runtime than crash in case of in-process compilation. Currently, we do
+ // simple overflow checks, but we may eventually communicate other
+ // compilation errors this way.
+ if (CSLocs.size() > UINT16_MAX) {
+ AP.OutStreamer.EmitIntValue(UINT32_MAX, 4); // Invalid ID.
+ AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4);
+ AP.OutStreamer.EmitIntValue(0, 2); // Reserved.
+ AP.OutStreamer.EmitIntValue(0, 2); // 0 locations.
+ continue;
+ }
+
+ AP.OutStreamer.EmitIntValue(CallsiteID, 4);
+ AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4);
+
+ // Reserved for flags.
+ AP.OutStreamer.EmitIntValue(0, 2);
+
+ DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n");
+
+ AP.OutStreamer.EmitIntValue(CSLocs.size(), 2);
+
+ unsigned operIdx = 0;
+ for (LocationVec::const_iterator LocI = CSLocs.begin(), LocE = CSLocs.end();
+ LocI != LocE; ++LocI, ++operIdx) {
+ const Location &Loc = *LocI;
+ DEBUG(
+ dbgs() << WSMP << " Loc " << operIdx << ": ";
+ switch (Loc.LocType) {
+ case Location::Unprocessed:
+ dbgs() << "<Unprocessed operand>";
+ break;
+ case Location::Register:
+ dbgs() << "Register " << MCRI.getName(Loc.Reg);
+ break;
+ case Location::Direct:
+ dbgs() << "Direct " << MCRI.getName(Loc.Reg);
+ if (Loc.Offset)
+ dbgs() << " + " << Loc.Offset;
+ break;
+ case Location::Indirect:
+ dbgs() << "Indirect " << MCRI.getName(Loc.Reg)
+ << " + " << Loc.Offset;
+ break;
+ case Location::Constant:
+ dbgs() << "Constant " << Loc.Offset;
+ break;
+ case Location::ConstantIndex:
+ dbgs() << "Constant Index " << Loc.Offset;
+ break;
+ }
+ dbgs() << "\n";
+ );
+
+ unsigned RegNo = 0;
+ int Offset = Loc.Offset;
+ if(Loc.Reg) {
+ RegNo = MCRI.getDwarfRegNum(Loc.Reg, false);
+ for (MCSuperRegIterator SR(Loc.Reg, TRI);
+ SR.isValid() && (int)RegNo < 0; ++SR) {
+ RegNo = TRI->getDwarfRegNum(*SR, false);
+ }
+ // If this is a register location, put the subregister byte offset in
+ // the location offset.
+ if (Loc.LocType == Location::Register) {
+ assert(!Loc.Offset && "Register location should have zero offset");
+ unsigned LLVMRegNo = MCRI.getLLVMRegNum(RegNo, false);
+ unsigned SubRegIdx = MCRI.getSubRegIndex(LLVMRegNo, Loc.Reg);
+ if (SubRegIdx)
+ Offset = MCRI.getSubRegIdxOffset(SubRegIdx);
+ }
+ }
+ else {
+ assert(Loc.LocType != Location::Register &&
+ "Missing location register");
+ }
+ AP.OutStreamer.EmitIntValue(Loc.LocType, 1);
+ AP.OutStreamer.EmitIntValue(Loc.Size, 1);
+ AP.OutStreamer.EmitIntValue(RegNo, 2);
+ AP.OutStreamer.EmitIntValue(Offset, 4);
+ }
+ }
+
+ AP.OutStreamer.AddBlankLine();
+
+ CSInfos.clear();
+}
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 4c56380..9020449 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -15,11 +15,13 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "stack-protector"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -27,12 +29,12 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
#include <cstdlib>
using namespace llvm;
@@ -40,137 +42,93 @@ STATISTIC(NumFunProtected, "Number of functions protected");
STATISTIC(NumAddrTaken, "Number of local variables that have their address"
" taken.");
-namespace {
- class StackProtector : public FunctionPass {
- const TargetMachine *TM;
-
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// target type sizes.
- const TargetLoweringBase *TLI;
- const Triple Trip;
-
- Function *F;
- Module *M;
-
- DominatorTree *DT;
-
- /// \brief The minimum size of buffers that will receive stack smashing
- /// protection when -fstack-protection is used.
- unsigned SSPBufferSize;
-
- /// VisitedPHIs - The set of PHI nodes visited when determining
- /// if a variable's reference has been taken. This set
- /// is maintained to ensure we don't visit the same PHI node multiple
- /// times.
- SmallPtrSet<const PHINode*, 16> VisitedPHIs;
-
- /// InsertStackProtectors - Insert code into the prologue and epilogue of
- /// the function.
- ///
- /// - The prologue code loads and stores the stack guard onto the stack.
- /// - The epilogue checks the value stored in the prologue against the
- /// original value. It calls __stack_chk_fail if they differ.
- bool InsertStackProtectors();
-
- /// CreateFailBB - Create a basic block to jump to when the stack protector
- /// check fails.
- BasicBlock *CreateFailBB();
-
- /// ContainsProtectableArray - Check whether the type either is an array or
- /// contains an array of sufficient size so that we need stack protectors
- /// for it.
- bool ContainsProtectableArray(Type *Ty, bool Strong = false,
- bool InStruct = false) const;
-
- /// \brief Check whether a stack allocation has its address taken.
- bool HasAddressTaken(const Instruction *AI);
-
- /// RequiresStackProtector - Check whether or not this function needs a
- /// stack protector based upon the stack protector level.
- bool RequiresStackProtector();
- public:
- static char ID; // Pass identification, replacement for typeid.
- StackProtector() : FunctionPass(ID), TM(0), TLI(0), SSPBufferSize(0) {
- initializeStackProtectorPass(*PassRegistry::getPassRegistry());
- }
- StackProtector(const TargetMachine *TM)
- : FunctionPass(ID), TM(TM), TLI(0), Trip(TM->getTargetTriple()),
- SSPBufferSize(8) {
- initializeStackProtectorPass(*PassRegistry::getPassRegistry());
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTree>();
- }
-
- virtual bool runOnFunction(Function &Fn);
- };
-} // end anonymous namespace
+static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
+ cl::init(true), cl::Hidden);
char StackProtector::ID = 0;
-INITIALIZE_PASS(StackProtector, "stack-protector",
- "Insert stack protectors", false, false)
+INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors",
+ false, true)
FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) {
return new StackProtector(TM);
}
+StackProtector::SSPLayoutKind
+StackProtector::getSSPLayout(const AllocaInst *AI) const {
+ return AI ? Layout.lookup(AI) : SSPLK_None;
+}
+
bool StackProtector::runOnFunction(Function &Fn) {
F = &Fn;
M = F->getParent();
DT = getAnalysisIfAvailable<DominatorTree>();
TLI = TM->getTargetLowering();
- if (!RequiresStackProtector()) return false;
+ if (!RequiresStackProtector())
+ return false;
- Attribute Attr =
- Fn.getAttributes().getAttribute(AttributeSet::FunctionIndex,
- "stack-protector-buffer-size");
+ Attribute Attr = Fn.getAttributes().getAttribute(
+ AttributeSet::FunctionIndex, "stack-protector-buffer-size");
if (Attr.isStringAttribute())
- SSPBufferSize = atoi(Attr.getValueAsString().data());
+ Attr.getValueAsString().getAsInteger(10, SSPBufferSize);
++NumFunProtected;
return InsertStackProtectors();
}
-/// ContainsProtectableArray - Check whether the type either is an array or
-/// contains a char array of sufficient size so that we need stack protectors
-/// for it.
-bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong,
+/// \param [out] IsLarge is set to true if a protectable array is found and
+/// it is "large" ( >= ssp-buffer-size). In the case of a structure with
+/// multiple arrays, this gets set if any of them is large.
+bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
+ bool Strong,
bool InStruct) const {
- if (!Ty) return false;
+ if (!Ty)
+ return false;
if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
- // In strong mode any array, regardless of type and size, triggers a
- // protector
- if (Strong)
- return true;
if (!AT->getElementType()->isIntegerTy(8)) {
// If we're on a non-Darwin platform or we're inside of a structure, don't
// add stack protectors unless the array is a character array.
- if (InStruct || !Trip.isOSDarwin())
- return false;
+ // However, in strong mode any array, regardless of type and size,
+ // triggers a protector.
+ if (!Strong && (InStruct || !Trip.isOSDarwin()))
+ return false;
}
// If an array has more than SSPBufferSize bytes of allocated space, then we
// emit stack protectors.
- if (SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT))
+ if (SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) {
+ IsLarge = true;
+ return true;
+ }
+
+ if (Strong)
+ // Require a protector for all arrays in strong mode
return true;
}
const StructType *ST = dyn_cast<StructType>(Ty);
- if (!ST) return false;
+ if (!ST)
+ return false;
+ bool NeedsProtector = false;
for (StructType::element_iterator I = ST->element_begin(),
- E = ST->element_end(); I != E; ++I)
- if (ContainsProtectableArray(*I, Strong, true))
- return true;
+ E = ST->element_end();
+ I != E; ++I)
+ if (ContainsProtectableArray(*I, IsLarge, Strong, true)) {
+ // If the element is a protectable array and is large (>= SSPBufferSize)
+ // then we are done. If the protectable array is not large, then
+ // keep looking in case a subsequent element is a large array.
+ if (IsLarge)
+ return true;
+ NeedsProtector = true;
+ }
- return false;
+ return NeedsProtector;
}
bool StackProtector::HasAddressTaken(const Instruction *AI) {
for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE; ++UI) {
+ UI != UE; ++UI) {
const User *U = *UI;
if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (AI == SI->getValueOperand())
@@ -217,11 +175,13 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
/// address taken.
bool StackProtector::RequiresStackProtector() {
bool Strong = false;
+ bool NeedsProtector = false;
if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq))
- return true;
- else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectStrong))
+ Attribute::StackProtectReq)) {
+ NeedsProtector = true;
+ Strong = true; // Use the same heuristic as strong to determine SSPLayout
+ } else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectStrong))
Strong = true;
else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::StackProtect))
@@ -230,39 +190,116 @@ bool StackProtector::RequiresStackProtector() {
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
BasicBlock *BB = I;
- for (BasicBlock::iterator
- II = BB->begin(), IE = BB->end(); II != IE; ++II) {
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;
+ ++II) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (AI->isArrayAllocation()) {
// SSP-Strong: Enable protectors for any call to alloca, regardless
// of size.
if (Strong)
return true;
-
+
if (const ConstantInt *CI =
- dyn_cast<ConstantInt>(AI->getArraySize())) {
- if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize)
+ dyn_cast<ConstantInt>(AI->getArraySize())) {
+ if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
// A call to alloca with size >= SSPBufferSize requires
// stack protectors.
- return true;
+ Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
+ NeedsProtector = true;
+ } else if (Strong) {
+ // Require protectors for all alloca calls in strong mode.
+ Layout.insert(std::make_pair(AI, SSPLK_SmallArray));
+ NeedsProtector = true;
+ }
} else {
// A call to alloca with a variable size requires protectors.
- return true;
+ Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
+ NeedsProtector = true;
}
+ continue;
}
- if (ContainsProtectableArray(AI->getAllocatedType(), Strong))
- return true;
+ bool IsLarge = false;
+ if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) {
+ Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray
+ : SSPLK_SmallArray));
+ NeedsProtector = true;
+ continue;
+ }
if (Strong && HasAddressTaken(AI)) {
- ++NumAddrTaken;
- return true;
+ ++NumAddrTaken;
+ Layout.insert(std::make_pair(AI, SSPLK_AddrOf));
+ NeedsProtector = true;
}
}
}
}
- return false;
+ return NeedsProtector;
+}
+
+static bool InstructionWillNotHaveChain(const Instruction *I) {
+ return !I->mayHaveSideEffects() && !I->mayReadFromMemory() &&
+ isSafeToSpeculativelyExecute(I);
+}
+
+/// Identify if RI has a previous instruction in the "Tail Position" and return
+/// it. Otherwise return 0.
+///
+/// This is based off of the code in llvm::isInTailCallPosition. The difference
+/// is that it inverts the first part of llvm::isInTailCallPosition since
+/// isInTailCallPosition is checking if a call is in a tail call position, and
+/// we are searching for an unknown tail call that might be in the tail call
+/// position. Once we find the call though, the code uses the same refactored
+/// code, returnTypeIsEligibleForTailCall.
+static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI,
+ const TargetLoweringBase *TLI) {
+ // Establish a reasonable upper bound on the maximum amount of instructions we
+ // will look through to find a tail call.
+ unsigned SearchCounter = 0;
+ const unsigned MaxSearch = 4;
+ bool NoInterposingChain = true;
+
+ for (BasicBlock::reverse_iterator I = llvm::next(BB->rbegin()),
+ E = BB->rend();
+ I != E && SearchCounter < MaxSearch; ++I) {
+ Instruction *Inst = &*I;
+
+ // Skip over debug intrinsics and do not allow them to affect our MaxSearch
+ // counter.
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
+ // If we find a call and the following conditions are satisifed, then we
+ // have found a tail call that satisfies at least the target independent
+ // requirements of a tail call:
+ //
+ // 1. The call site has the tail marker.
+ //
+ // 2. The call site either will not cause the creation of a chain or if a
+ // chain is necessary there are no instructions in between the callsite and
+ // the call which would create an interposing chain.
+ //
+ // 3. The return type of the function does not impede tail call
+ // optimization.
+ if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
+ if (CI->isTailCall() &&
+ (InstructionWillNotHaveChain(CI) || NoInterposingChain) &&
+ returnTypeIsEligibleForTailCall(BB->getParent(), CI, RI, *TLI))
+ return CI;
+ }
+
+ // If we did not find a call see if we have an instruction that may create
+ // an interposing chain.
+ NoInterposingChain =
+ NoInterposingChain && InstructionWillNotHaveChain(Inst);
+
+ // Increment max search.
+ SearchCounter++;
+ }
+
+ return 0;
}
/// Insert code into the entry block that stores the __stack_chk_guard
@@ -273,36 +310,36 @@ bool StackProtector::RequiresStackProtector() {
/// StackGuard = load __stack_chk_guard
/// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
///
-static void CreatePrologue(Function *F, Module *M, ReturnInst *RI,
+/// Returns true if the platform/triple supports the stackprotectorcreate pseudo
+/// node.
+static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
const TargetLoweringBase *TLI, const Triple &Trip,
AllocaInst *&AI, Value *&StackGuardVar) {
+ bool SupportsSelectionDAGSP = false;
PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
unsigned AddressSpace, Offset;
if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
Constant *OffsetVal =
- ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
-
- StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
- PointerType::get(PtrTy,
- AddressSpace));
+ ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
+
+ StackGuardVar = ConstantExpr::getIntToPtr(
+ OffsetVal, PointerType::get(PtrTy, AddressSpace));
} else if (Trip.getOS() == llvm::Triple::OpenBSD) {
StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy);
cast<GlobalValue>(StackGuardVar)
- ->setVisibility(GlobalValue::HiddenVisibility);
+ ->setVisibility(GlobalValue::HiddenVisibility);
} else {
+ SupportsSelectionDAGSP = true;
StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
}
-
- BasicBlock &Entry = F->getEntryBlock();
- Instruction *InsPt = &Entry.front();
-
- AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt);
- LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt);
-
- Value *Args[] = { LI, AI };
- CallInst::
- Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
- Args, "", InsPt);
+
+ IRBuilder<> B(&F->getEntryBlock().front());
+ AI = B.CreateAlloca(PtrTy, 0, "StackGuardSlot");
+ LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard");
+ B.CreateCall2(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), LI,
+ AI);
+
+ return SupportsSelectionDAGSP;
}
/// InsertStackProtectors - Insert code into the prologue and epilogue of the
@@ -312,72 +349,102 @@ static void CreatePrologue(Function *F, Module *M, ReturnInst *RI,
/// - The epilogue checks the value stored in the prologue against the original
/// value. It calls __stack_chk_fail if they differ.
bool StackProtector::InsertStackProtectors() {
- BasicBlock *FailBB = 0; // The basic block to jump to if check fails.
- BasicBlock *FailBBDom = 0; // FailBB's dominator.
- AllocaInst *AI = 0; // Place on stack that stores the stack guard.
- Value *StackGuardVar = 0; // The stack guard variable.
+ bool HasPrologue = false;
+ bool SupportsSelectionDAGSP =
+ EnableSelectionDAGSP && !TM->Options.EnableFastISel;
+ AllocaInst *AI = 0; // Place on stack that stores the stack guard.
+ Value *StackGuardVar = 0; // The stack guard variable.
- for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
+ for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
BasicBlock *BB = I++;
ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
- if (!RI) continue;
+ if (!RI)
+ continue;
- if (!FailBB) {
- CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar);
- // Create the basic block to jump to when the guard check fails.
- FailBB = CreateFailBB();
+ if (!HasPrologue) {
+ HasPrologue = true;
+ SupportsSelectionDAGSP &=
+ CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar);
}
- // For each block with a return instruction, convert this:
- //
- // return:
- // ...
- // ret ...
- //
- // into this:
- //
- // return:
- // ...
- // %1 = load __stack_chk_guard
- // %2 = load StackGuardSlot
- // %3 = cmp i1 %1, %2
- // br i1 %3, label %SP_return, label %CallStackCheckFailBlk
- //
- // SP_return:
- // ret ...
- //
- // CallStackCheckFailBlk:
- // call void @__stack_chk_fail()
- // unreachable
-
- // Split the basic block before the return instruction.
- BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+ if (SupportsSelectionDAGSP) {
+ // Since we have a potential tail call, insert the special stack check
+ // intrinsic.
+ Instruction *InsertionPt = 0;
+ if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) {
+ InsertionPt = CI;
+ } else {
+ InsertionPt = RI;
+ // At this point we know that BB has a return statement so it *DOES*
+ // have a terminator.
+ assert(InsertionPt != 0 && "BB must have a terminator instruction at "
+ "this point.");
+ }
- if (DT && DT->isReachableFromEntry(BB)) {
- DT->addNewBlock(NewBB, BB);
- FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB;
- }
+ Function *Intrinsic =
+ Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck);
+ CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt);
+
+ } else {
+ // If we do not support SelectionDAG based tail calls, generate IR level
+ // tail calls.
+ //
+ // For each block with a return instruction, convert this:
+ //
+ // return:
+ // ...
+ // ret ...
+ //
+ // into this:
+ //
+ // return:
+ // ...
+ // %1 = load __stack_chk_guard
+ // %2 = load StackGuardSlot
+ // %3 = cmp i1 %1, %2
+ // br i1 %3, label %SP_return, label %CallStackCheckFailBlk
+ //
+ // SP_return:
+ // ret ...
+ //
+ // CallStackCheckFailBlk:
+ // call void @__stack_chk_fail()
+ // unreachable
+
+ // Create the FailBB. We duplicate the BB every time since the MI tail
+ // merge pass will merge together all of the various BB into one including
+ // fail BB generated by the stack protector pseudo instruction.
+ BasicBlock *FailBB = CreateFailBB();
+
+ // Split the basic block before the return instruction.
+ BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+
+ // Update the dominator tree if we need to.
+ if (DT && DT->isReachableFromEntry(BB)) {
+ DT->addNewBlock(NewBB, BB);
+ DT->addNewBlock(FailBB, BB);
+ }
- // Remove default branch instruction to the new BB.
- BB->getTerminator()->eraseFromParent();
+ // Remove default branch instruction to the new BB.
+ BB->getTerminator()->eraseFromParent();
- // Move the newly created basic block to the point right after the old basic
- // block so that it's in the "fall through" position.
- NewBB->moveAfter(BB);
+ // Move the newly created basic block to the point right after the old
+ // basic block so that it's in the "fall through" position.
+ NewBB->moveAfter(BB);
- // Generate the stack protector instructions in the old basic block.
- LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB);
- LoadInst *LI2 = new LoadInst(AI, "", true, BB);
- ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, "");
- BranchInst::Create(NewBB, FailBB, Cmp, BB);
+ // Generate the stack protector instructions in the old basic block.
+ IRBuilder<> B(BB);
+ LoadInst *LI1 = B.CreateLoad(StackGuardVar);
+ LoadInst *LI2 = B.CreateLoad(AI);
+ Value *Cmp = B.CreateICmpEQ(LI1, LI2);
+ B.CreateCondBr(Cmp, NewBB, FailBB);
+ }
}
// Return if we didn't modify any basic blocks. I.e., there are no return
// statements in the function.
- if (!FailBB) return false;
-
- if (DT && FailBBDom)
- DT->addNewBlock(FailBB, FailBBDom);
+ if (!HasPrologue)
+ return false;
return true;
}
@@ -387,29 +454,18 @@ bool StackProtector::InsertStackProtectors() {
BasicBlock *StackProtector::CreateFailBB() {
LLVMContext &Context = F->getContext();
BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F);
+ IRBuilder<> B(FailBB);
if (Trip.getOS() == llvm::Triple::OpenBSD) {
Constant *StackChkFail = M->getOrInsertFunction(
"__stack_smash_handler", Type::getVoidTy(Context),
Type::getInt8PtrTy(Context), NULL);
- Constant *NameStr = ConstantDataArray::getString(Context, F->getName());
- Constant *FuncName =
- new GlobalVariable(*M, NameStr->getType(), true,
- GlobalVariable::PrivateLinkage, NameStr, "SSH");
-
- SmallVector<Constant *, 2> IdxList;
- IdxList.push_back(ConstantInt::get(Type::getInt8Ty(Context), 0));
- IdxList.push_back(ConstantInt::get(Type::getInt8Ty(Context), 0));
-
- SmallVector<Value *, 1> Args;
- Args.push_back(ConstantExpr::getGetElementPtr(FuncName, IdxList));
-
- CallInst::Create(StackChkFail, Args, "", FailBB);
+ B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
} else {
Constant *StackChkFail = M->getOrInsertFunction(
"__stack_chk_fail", Type::getVoidTy(Context), NULL);
- CallInst::Create(StackChkFail, "", FailBB);
+ B.CreateCall(StackChkFail);
}
- new UnreachableInst(Context, FailBB);
+ B.CreateUnreachable();
return FailBB;
}
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
deleted file mode 100644
index b337c53..0000000
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ /dev/null
@@ -1,825 +0,0 @@
-//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass eliminates PHI instructions by aggressively coalescing the copies
-// that would be inserted by a naive algorithm and only inserting the copies
-// that are necessary. The coalescing technique initially assumes that all
-// registers appearing in a PHI instruction do not interfere. It then eliminates
-// proven interferences, using dominators to only perform a linear number of
-// interference tests instead of the quadratic number of interference tests
-// that this would naively require. This is a technique derived from:
-//
-// Budimlic, et al. Fast copy coalescing and live-range identification.
-// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
-// Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
-// PLDI '02. ACM, New York, NY, 25-32.
-//
-// The original implementation constructs a data structure they call a dominance
-// forest for this purpose. The dominance forest was shown to be unnecessary,
-// as it is possible to emulate the creation and traversal of a dominance forest
-// by directly using the dominator tree, rather than actually constructing the
-// dominance forest. This technique is explained in:
-//
-// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
-// Quality and Efficiency,
-// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
-// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
-// CGO '09. IEEE, Washington, DC, 114-125.
-//
-// Careful implementation allows for all of the dominator forest interference
-// checks to be performed at once in a single depth-first traversal of the
-// dominator tree, which is what is implemented here.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "strongphielim"
-#include "llvm/CodeGen/Passes.h"
-#include "PHIEliminationUtils.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetInstrInfo.h"
-using namespace llvm;
-
-namespace {
- class StrongPHIElimination : public MachineFunctionPass {
- public:
- static char ID; // Pass identification, replacement for typeid
- StrongPHIElimination() : MachineFunctionPass(ID) {
- initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
- }
-
- virtual void getAnalysisUsage(AnalysisUsage&) const;
- bool runOnMachineFunction(MachineFunction&);
-
- private:
- /// This struct represents a single node in the union-find data structure
- /// representing the variable congruence classes. There is one difference
- /// from a normal union-find data structure. We steal two bits from the parent
- /// pointer . One of these bits is used to represent whether the register
- /// itself has been isolated, and the other is used to represent whether the
- /// PHI with that register as its destination has been isolated.
- ///
- /// Note that this leads to the strange situation where the leader of a
- /// congruence class may no longer logically be a member, due to being
- /// isolated.
- struct Node {
- enum Flags {
- kRegisterIsolatedFlag = 1,
- kPHIIsolatedFlag = 2
- };
- Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
-
- Node *getLeader();
-
- PointerIntPair<Node*, 2> parent;
- unsigned value;
- unsigned rank;
- };
-
- /// Add a register in a new congruence class containing only itself.
- void addReg(unsigned);
-
- /// Join the congruence classes of two registers. This function is biased
- /// towards the left argument, i.e. after
- ///
- /// addReg(r2);
- /// unionRegs(r1, r2);
- ///
- /// the leader of the unioned congruence class is the same as the leader of
- /// r1's congruence class prior to the union. This is actually relied upon
- /// in the copy insertion code.
- void unionRegs(unsigned, unsigned);
-
- /// Get the color of a register. The color is 0 if the register has been
- /// isolated.
- unsigned getRegColor(unsigned);
-
- // Isolate a register.
- void isolateReg(unsigned);
-
- /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
- /// isolated. Otherwise, it is the original color of its destination and
- /// all of its operands (before they were isolated, if they were).
- unsigned getPHIColor(MachineInstr*);
-
- /// Isolate a PHI.
- void isolatePHI(MachineInstr*);
-
- /// Traverses a basic block, splitting any interferences found between
- /// registers in the same congruence class. It takes two DenseMaps as
- /// arguments that it also updates: CurrentDominatingParent, which maps
- /// a color to the register in that congruence class whose definition was
- /// most recently seen, and ImmediateDominatingParent, which maps a register
- /// to the register in the same congruence class that most immediately
- /// dominates it.
- ///
- /// This function assumes that it is being called in a depth-first traversal
- /// of the dominator tree.
- void SplitInterferencesForBasicBlock(
- MachineBasicBlock&,
- DenseMap<unsigned, unsigned> &CurrentDominatingParent,
- DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
-
- // Lowers a PHI instruction, inserting copies of the source and destination
- // registers as necessary.
- void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
-
- // Merges the live interval of Reg into NewReg and renames Reg to NewReg
- // everywhere that Reg appears. Requires Reg and NewReg to have non-
- // overlapping lifetimes.
- void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
-
- MachineRegisterInfo *MRI;
- const TargetInstrInfo *TII;
- MachineDominatorTree *DT;
- LiveIntervals *LI;
-
- BumpPtrAllocator Allocator;
-
- DenseMap<unsigned, Node*> RegNodeMap;
-
- // Maps a basic block to a list of its defs of registers that appear as PHI
- // sources.
- DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
-
- // Maps a color to a pair of a MachineInstr* and a virtual register, which
- // is the operand of that PHI corresponding to the current basic block.
- DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
-
- // FIXME: Can these two data structures be combined? Would a std::multimap
- // be any better?
-
- // Stores pairs of predecessor basic blocks and the source registers of
- // inserted copy instructions.
- typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
- SrcCopySet InsertedSrcCopySet;
-
- // Maps pairs of predecessor basic blocks and colors to their defining copy
- // instructions.
- typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
- SrcCopyMap;
- SrcCopyMap InsertedSrcCopyMap;
-
- // Maps inserted destination copy registers to their defining copy
- // instructions.
- typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
- DestCopyMap InsertedDestCopies;
- };
-
- struct MIIndexCompare {
- MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
-
- bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
- return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
- }
-
- LiveIntervals *LI;
- };
-} // namespace
-
-STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
-STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
-STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
-
-char StrongPHIElimination::ID = 0;
-INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
- "Eliminate PHI nodes for register allocation, intelligently", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
- "Eliminate PHI nodes for register allocation, intelligently", false, false)
-
-char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
-
-void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<SlotIndexes>();
- AU.addPreserved<SlotIndexes>();
- AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
- MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
- // FIXME: This only needs to check from the first terminator, as only the
- // first terminator can use a virtual register.
- for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
- assert (RI != MBB->rend());
- MachineInstr *MI = &*RI;
-
- for (MachineInstr::mop_iterator OI = MI->operands_begin(),
- OE = MI->operands_end(); OI != OE; ++OI) {
- MachineOperand &MO = *OI;
- if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
- return &MO;
- }
- }
-}
-
-bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
- MRI = &MF.getRegInfo();
- TII = MF.getTarget().getInstrInfo();
- DT = &getAnalysis<MachineDominatorTree>();
- LI = &getAnalysis<LiveIntervals>();
-
- for (MachineFunction::iterator I = MF.begin(), E = MF.end();
- I != E; ++I) {
- for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
- BBI != BBE && BBI->isPHI(); ++BBI) {
- unsigned DestReg = BBI->getOperand(0).getReg();
- addReg(DestReg);
- PHISrcDefs[I].push_back(BBI);
-
- for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
- MachineOperand &SrcMO = BBI->getOperand(i);
- unsigned SrcReg = SrcMO.getReg();
- addReg(SrcReg);
- unionRegs(DestReg, SrcReg);
-
- MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
- if (DefMI)
- PHISrcDefs[DefMI->getParent()].push_back(DefMI);
- }
- }
- }
-
- // Perform a depth-first traversal of the dominator tree, splitting
- // interferences amongst PHI-congruence classes.
- DenseMap<unsigned, unsigned> CurrentDominatingParent;
- DenseMap<unsigned, unsigned> ImmediateDominatingParent;
- for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
- DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
- SplitInterferencesForBasicBlock(*DI->getBlock(),
- CurrentDominatingParent,
- ImmediateDominatingParent);
- }
-
- // Insert copies for all PHI source and destination registers.
- for (MachineFunction::iterator I = MF.begin(), E = MF.end();
- I != E; ++I) {
- for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
- BBI != BBE && BBI->isPHI(); ++BBI) {
- InsertCopiesForPHI(BBI, I);
- }
- }
-
- // FIXME: Preserve the equivalence classes during copy insertion and use
- // the preversed equivalence classes instead of recomputing them.
- RegNodeMap.clear();
- for (MachineFunction::iterator I = MF.begin(), E = MF.end();
- I != E; ++I) {
- for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
- BBI != BBE && BBI->isPHI(); ++BBI) {
- unsigned DestReg = BBI->getOperand(0).getReg();
- addReg(DestReg);
-
- for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
- unsigned SrcReg = BBI->getOperand(i).getReg();
- addReg(SrcReg);
- unionRegs(DestReg, SrcReg);
- }
- }
- }
-
- DenseMap<unsigned, unsigned> RegRenamingMap;
- bool Changed = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end();
- I != E; ++I) {
- MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
- while (BBI != BBE && BBI->isPHI()) {
- MachineInstr *PHI = BBI;
-
- assert(PHI->getNumOperands() > 0);
-
- unsigned SrcReg = PHI->getOperand(1).getReg();
- unsigned SrcColor = getRegColor(SrcReg);
- unsigned NewReg = RegRenamingMap[SrcColor];
- if (!NewReg) {
- NewReg = SrcReg;
- RegRenamingMap[SrcColor] = SrcReg;
- }
- MergeLIsAndRename(SrcReg, NewReg);
-
- unsigned DestReg = PHI->getOperand(0).getReg();
- if (!InsertedDestCopies.count(DestReg))
- MergeLIsAndRename(DestReg, NewReg);
-
- for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
- unsigned SrcReg = PHI->getOperand(i).getReg();
- MergeLIsAndRename(SrcReg, NewReg);
- }
-
- ++BBI;
- LI->RemoveMachineInstrFromMaps(PHI);
- PHI->eraseFromParent();
- Changed = true;
- }
- }
-
- // Due to the insertion of copies to split live ranges, the live intervals are
- // guaranteed to not overlap, except in one case: an original PHI source and a
- // PHI destination copy. In this case, they have the same value and thus don't
- // truly intersect, so we merge them into the value live at that point.
- // FIXME: Is there some better way we can handle this?
- for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
- E = InsertedDestCopies.end(); I != E; ++I) {
- unsigned DestReg = I->first;
- unsigned DestColor = getRegColor(DestReg);
- unsigned NewReg = RegRenamingMap[DestColor];
-
- LiveInterval &DestLI = LI->getInterval(DestReg);
- LiveInterval &NewLI = LI->getInterval(NewReg);
-
- assert(DestLI.ranges.size() == 1
- && "PHI destination copy's live interval should be a single live "
- "range from the beginning of the BB to the copy instruction.");
- LiveRange *DestLR = DestLI.begin();
- VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
- if (!NewVNI) {
- NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
- MachineInstr *CopyInstr = I->second;
- CopyInstr->getOperand(1).setIsKill(true);
- }
-
- LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
- NewLI.addRange(NewLR);
-
- LI->removeInterval(DestReg);
- MRI->replaceRegWith(DestReg, NewReg);
- }
-
- // Adjust the live intervals of all PHI source registers to handle the case
- // where the PHIs in successor blocks were the only later uses of the source
- // register.
- for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
- E = InsertedSrcCopySet.end(); I != E; ++I) {
- MachineBasicBlock *MBB = I->first;
- unsigned SrcReg = I->second;
- if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
- SrcReg = RenamedRegister;
-
- LiveInterval &SrcLI = LI->getInterval(SrcReg);
-
- bool isLiveOut = false;
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI) {
- if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
- isLiveOut = true;
- break;
- }
- }
-
- if (isLiveOut)
- continue;
-
- MachineOperand *LastUse = findLastUse(MBB, SrcReg);
- assert(LastUse);
- SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
- SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB));
- LastUse->setIsKill(true);
- }
-
- Allocator.Reset();
- RegNodeMap.clear();
- PHISrcDefs.clear();
- InsertedSrcCopySet.clear();
- InsertedSrcCopyMap.clear();
- InsertedDestCopies.clear();
-
- return Changed;
-}
-
-void StrongPHIElimination::addReg(unsigned Reg) {
- Node *&N = RegNodeMap[Reg];
- if (!N)
- N = new (Allocator) Node(Reg);
-}
-
-StrongPHIElimination::Node*
-StrongPHIElimination::Node::getLeader() {
- Node *N = this;
- Node *Parent = parent.getPointer();
- Node *Grandparent = Parent->parent.getPointer();
-
- while (Parent != Grandparent) {
- N->parent.setPointer(Grandparent);
- N = Grandparent;
- Parent = Parent->parent.getPointer();
- Grandparent = Parent->parent.getPointer();
- }
-
- return Parent;
-}
-
-unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
- DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
- if (RI == RegNodeMap.end())
- return 0;
- Node *Node = RI->second;
- if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
- return 0;
- return Node->getLeader()->value;
-}
-
-void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
- Node *Node1 = RegNodeMap[Reg1]->getLeader();
- Node *Node2 = RegNodeMap[Reg2]->getLeader();
-
- if (Node1->rank > Node2->rank) {
- Node2->parent.setPointer(Node1->getLeader());
- } else if (Node1->rank < Node2->rank) {
- Node1->parent.setPointer(Node2->getLeader());
- } else if (Node1 != Node2) {
- Node2->parent.setPointer(Node1->getLeader());
- Node1->rank++;
- }
-}
-
-void StrongPHIElimination::isolateReg(unsigned Reg) {
- Node *Node = RegNodeMap[Reg];
- Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
-}
-
-unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
- assert(PHI->isPHI());
-
- unsigned DestReg = PHI->getOperand(0).getReg();
- Node *DestNode = RegNodeMap[DestReg];
- if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
- return 0;
-
- for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
- unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
- if (SrcColor)
- return SrcColor;
- }
- return 0;
-}
-
-void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
- assert(PHI->isPHI());
- Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
- Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
-}
-
-/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
-/// interferences found between registers in the same congruence class. It
-/// takes two DenseMaps as arguments that it also updates:
-///
-/// 1) CurrentDominatingParent, which maps a color to the register in that
-/// congruence class whose definition was most recently seen.
-///
-/// 2) ImmediateDominatingParent, which maps a register to the register in the
-/// same congruence class that most immediately dominates it.
-///
-/// This function assumes that it is being called in a depth-first traversal
-/// of the dominator tree.
-///
-/// The algorithm used here is a generalization of the dominance-based SSA test
-/// for two variables. If there are variables a_1, ..., a_n such that
-///
-/// def(a_1) dom ... dom def(a_n),
-///
-/// then we can test for an interference between any two a_i by only using O(n)
-/// interference tests between pairs of variables. If i < j and a_i and a_j
-/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
-/// Thus, in order to test for an interference involving a_i, we need only check
-/// for a potential interference with a_i+1.
-///
-/// This method can be generalized to arbitrary sets of variables by performing
-/// a depth-first traversal of the dominator tree. As we traverse down a branch
-/// of the dominator tree, we keep track of the current dominating variable and
-/// only perform an interference test with that variable. However, when we go to
-/// another branch of the dominator tree, the definition of the current dominating
-/// variable may no longer dominate the current block. In order to correct this,
-/// we need to use a stack of past choices of the current dominating variable
-/// and pop from this stack until we find a variable whose definition actually
-/// dominates the current block.
-///
-/// There will be one push on this stack for each variable that has become the
-/// current dominating variable, so instead of using an explicit stack we can
-/// simply associate the previous choice for a current dominating variable with
-/// the new choice. This works better in our implementation, where we test for
-/// interference in multiple distinct sets at once.
-void
-StrongPHIElimination::SplitInterferencesForBasicBlock(
- MachineBasicBlock &MBB,
- DenseMap<unsigned, unsigned> &CurrentDominatingParent,
- DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
- // Sort defs by their order in the original basic block, as the code below
- // assumes that it is processing definitions in dominance order.
- std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
- std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
-
- for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
- BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
- for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
- E = (*BBI)->operands_end(); I != E; ++I) {
- const MachineOperand &MO = *I;
-
- // FIXME: This would be faster if it were possible to bail out of checking
- // an instruction's operands after the explicit defs, but this is incorrect
- // for variadic instructions, which may appear before register allocation
- // in the future.
- if (!MO.isReg() || !MO.isDef())
- continue;
-
- unsigned DestReg = MO.getReg();
- if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
- continue;
-
- // If the virtual register being defined is not used in any PHI or has
- // already been isolated, then there are no more interferences to check.
- unsigned DestColor = getRegColor(DestReg);
- if (!DestColor)
- continue;
-
- // The input to this pass sometimes is not in SSA form in every basic
- // block, as some virtual registers have redefinitions. We could eliminate
- // this by fixing the passes that generate the non-SSA code, or we could
- // handle it here by tracking defining machine instructions rather than
- // virtual registers. For now, we just handle the situation conservatively
- // in a way that will possibly lead to false interferences.
- unsigned &CurrentParent = CurrentDominatingParent[DestColor];
- unsigned NewParent = CurrentParent;
- if (NewParent == DestReg)
- continue;
-
- // Pop registers from the stack represented by ImmediateDominatingParent
- // until we find a parent that dominates the current instruction.
- while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
- || !getRegColor(NewParent)))
- NewParent = ImmediateDominatingParent[NewParent];
-
- // If NewParent is nonzero, then its definition dominates the current
- // instruction, so it is only necessary to check for the liveness of
- // NewParent in order to check for an interference.
- if (NewParent
- && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
- // If there is an interference, always isolate the new register. This
- // could be improved by using a heuristic that decides which of the two
- // registers to isolate.
- isolateReg(DestReg);
- CurrentParent = NewParent;
- } else {
- // If there is no interference, update ImmediateDominatingParent and set
- // the CurrentDominatingParent for this color to the current register.
- ImmediateDominatingParent[DestReg] = NewParent;
- CurrentParent = DestReg;
- }
- }
- }
-
- // We now walk the PHIs in successor blocks and check for interferences. This
- // is necessary because the use of a PHI's operands are logically contained in
- // the predecessor block. The def of a PHI's destination register is processed
- // along with the other defs in a basic block.
-
- CurrentPHIForColor.clear();
-
- for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
- SE = MBB.succ_end(); SI != SE; ++SI) {
- for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
- BBI != BBE && BBI->isPHI(); ++BBI) {
- MachineInstr *PHI = BBI;
-
- // If a PHI is already isolated, either by being isolated directly or
- // having all of its operands isolated, ignore it.
- unsigned Color = getPHIColor(PHI);
- if (!Color)
- continue;
-
- // Find the index of the PHI operand that corresponds to this basic block.
- unsigned PredIndex;
- for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
- if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
- break;
- }
- assert(PredIndex < PHI->getNumOperands());
- unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
-
- // Pop registers from the stack represented by ImmediateDominatingParent
- // until we find a parent that dominates the current instruction.
- unsigned &CurrentParent = CurrentDominatingParent[Color];
- unsigned NewParent = CurrentParent;
- while (NewParent
- && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
- || !getRegColor(NewParent)))
- NewParent = ImmediateDominatingParent[NewParent];
- CurrentParent = NewParent;
-
- // If there is an interference with a register, always isolate the
- // register rather than the PHI. It is also possible to isolate the
- // PHI, but that introduces copies for all of the registers involved
- // in that PHI.
- if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
- && NewParent != PredOperandReg)
- isolateReg(NewParent);
-
- std::pair<MachineInstr*, unsigned>
- &CurrentPHI = CurrentPHIForColor[Color];
-
- // If two PHIs have the same operand from every shared predecessor, then
- // they don't actually interfere. Otherwise, isolate the current PHI. This
- // could possibly be improved, e.g. we could isolate the PHI with the
- // fewest operands.
- if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
- isolatePHI(PHI);
- else
- CurrentPHI = std::make_pair(PHI, PredOperandReg);
- }
- }
-}
-
-void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
- MachineBasicBlock *MBB) {
- assert(PHI->isPHI());
- ++NumPHIsLowered;
- unsigned PHIColor = getPHIColor(PHI);
-
- for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
- MachineOperand &SrcMO = PHI->getOperand(i);
-
- // If a source is defined by an implicit def, there is no need to insert a
- // copy in the predecessor.
- if (SrcMO.isUndef())
- continue;
-
- unsigned SrcReg = SrcMO.getReg();
- assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- "Machine PHI Operands must all be virtual registers!");
-
- MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
- unsigned SrcColor = getRegColor(SrcReg);
-
- // If neither the PHI nor the operand were isolated, then we only need to
- // set the phi-kill flag on the VNInfo at this PHI.
- if (PHIColor && SrcColor == PHIColor) {
- LiveInterval &SrcInterval = LI->getInterval(SrcReg);
- SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
- VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex);
- (void)SrcVNI;
- assert(SrcVNI);
- continue;
- }
-
- unsigned CopyReg = 0;
- if (PHIColor) {
- SrcCopyMap::const_iterator I
- = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
- CopyReg
- = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
- }
-
- if (!CopyReg) {
- const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
- CopyReg = MRI->createVirtualRegister(RC);
-
- MachineBasicBlock::iterator
- CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
- unsigned SrcSubReg = SrcMO.getSubReg();
- MachineInstr *CopyInstr = BuildMI(*PredBB,
- CopyInsertPoint,
- PHI->getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- CopyReg).addReg(SrcReg, 0, SrcSubReg);
- LI->InsertMachineInstrInMaps(CopyInstr);
- ++NumSrcCopiesInserted;
-
- // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
- // the newly added range.
- LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
- InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
-
- addReg(CopyReg);
- if (PHIColor) {
- unionRegs(PHIColor, CopyReg);
- assert(getRegColor(CopyReg) != CopyReg);
- } else {
- PHIColor = CopyReg;
- assert(getRegColor(CopyReg) == CopyReg);
- }
-
- // Insert into map if not already there.
- InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor),
- CopyInstr));
- }
-
- SrcMO.setReg(CopyReg);
-
- // If SrcReg is not live beyond the PHI, trim its interval so that it is no
- // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
- // processed later, but this is still correct to do at this point because we
- // never rely on LiveIntervals being correct while inserting copies.
- // FIXME: Should this just count uses at PHIs like the normal PHIElimination
- // pass does?
- LiveInterval &SrcLI = LI->getInterval(SrcReg);
- SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
- SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
- SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
- if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
- SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
- }
-
- unsigned DestReg = PHI->getOperand(0).getReg();
- unsigned DestColor = getRegColor(DestReg);
-
- if (PHIColor && DestColor == PHIColor) {
- LiveInterval &DestLI = LI->getInterval(DestReg);
-
- // Set the phi-def flag for the VN at this PHI.
- SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
- VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
- assert(DestVNI);
-
- // Prior to PHI elimination, the live ranges of PHIs begin at their defining
- // instruction. After PHI elimination, PHI instructions are replaced by VNs
- // with the phi-def flag set, and the live ranges of these VNs start at the
- // beginning of the basic block.
- SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
- DestVNI->def = MBBStartIndex;
- DestLI.addRange(LiveRange(MBBStartIndex,
- PHIIndex.getRegSlot(),
- DestVNI));
- return;
- }
-
- const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
- unsigned CopyReg = MRI->createVirtualRegister(RC);
-
- MachineInstr *CopyInstr = BuildMI(*MBB,
- MBB->SkipPHIsAndLabels(MBB->begin()),
- PHI->getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- DestReg).addReg(CopyReg);
- LI->InsertMachineInstrInMaps(CopyInstr);
- PHI->getOperand(0).setReg(CopyReg);
- ++NumDestCopiesInserted;
-
- // Add the region from the beginning of MBB to the copy instruction to
- // CopyReg's live interval, and give the VNInfo the phidef flag.
- LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
- SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
- SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
- VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
- LI->getVNInfoAllocator());
- CopyLI.addRange(LiveRange(MBBStartIndex,
- DestCopyIndex.getRegSlot(),
- CopyVNI));
-
- // Adjust DestReg's live interval to adjust for its new definition at
- // CopyInstr.
- LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
- SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
- DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot());
-
- VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
- assert(DestVNI);
- DestVNI->def = DestCopyIndex.getRegSlot();
-
- InsertedDestCopies[CopyReg] = CopyInstr;
-}
-
-void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
- if (Reg == NewReg)
- return;
-
- LiveInterval &OldLI = LI->getInterval(Reg);
- LiveInterval &NewLI = LI->getInterval(NewReg);
-
- // Merge the live ranges of the two registers.
- DenseMap<VNInfo*, VNInfo*> VNMap;
- for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
- LRI != LRE; ++LRI) {
- LiveRange OldLR = *LRI;
- VNInfo *OldVN = OldLR.valno;
-
- VNInfo *&NewVN = VNMap[OldVN];
- if (!NewVN) {
- NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
- VNMap[OldVN] = NewVN;
- }
-
- LiveRange LR(OldLR.start, OldLR.end, NewVN);
- NewLI.addRange(LR);
- }
-
- // Remove the LiveInterval for the register being renamed and replace all
- // of its defs and uses with the new register.
- LI->removeInterval(Reg);
- MRI->replaceRegWith(Reg, NewReg);
-}
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 8a1d567..ff0181e 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -638,8 +638,6 @@ bothUsedInPHI(const MachineBasicBlock &A,
bool
TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
- SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
-
for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
PE = BB.pred_end(); PI != PE; ++PI) {
MachineBasicBlock *PredBB = *PI;
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index bb8bd42..bf4fd65 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
@@ -276,6 +277,36 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
return false;
}
+bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
+ unsigned SubIdx, unsigned &Size,
+ unsigned &Offset,
+ const TargetMachine *TM) const {
+ if (!SubIdx) {
+ Size = RC->getSize();
+ Offset = 0;
+ return true;
+ }
+ unsigned BitSize = TM->getRegisterInfo()->getSubRegIdxSize(SubIdx);
+ // Convert bit size to byte size to be consistent with
+ // MCRegisterClass::getSize().
+ if (BitSize % 8)
+ return false;
+
+ int BitOffset = TM->getRegisterInfo()->getSubRegIdxOffset(SubIdx);
+ if (BitOffset < 0 || BitOffset % 8)
+ return false;
+
+ Size = BitSize /= 8;
+ Offset = (unsigned)BitOffset / 8;
+
+ assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
+
+ if (!TM->getDataLayout()->isLittleEndian()) {
+ Offset = RC->getSize() - (Offset + Size);
+ }
+ return true;
+}
+
void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg,
@@ -364,6 +395,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
// Ask the target to do the actual folding.
if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
+ NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
// Add a memory operand, foldMemoryOperandImpl doesn't do that.
assert((!(Flags & MachineMemOperand::MOStore) ||
NewMI->mayStore()) &&
@@ -424,9 +456,19 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
NewMI = MBB.insert(MI, NewMI);
// Copy the memoperands from the load to the folded instruction.
- NewMI->setMemRefs(LoadMI->memoperands_begin(),
- LoadMI->memoperands_end());
-
+ if (MI->memoperands_empty()) {
+ NewMI->setMemRefs(LoadMI->memoperands_begin(),
+ LoadMI->memoperands_end());
+ }
+ else {
+ // Handle the rare case of folding multiple loads.
+ NewMI->setMemRefs(MI->memoperands_begin(),
+ MI->memoperands_end());
+ for (MachineInstr::mmo_iterator I = LoadMI->memoperands_begin(),
+ E = LoadMI->memoperands_end(); I != E; ++I) {
+ NewMI->addMemOperand(MF, *I);
+ }
+ }
return NewMI;
}
@@ -630,6 +672,10 @@ unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
return 1;
}
+unsigned TargetInstrInfo::getPredicationCost(const MachineInstr *) const {
+ return 0;
+}
+
unsigned TargetInstrInfo::
getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 8d8f81b..30305af 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -191,6 +191,11 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
Names[RTLIB::NEARBYINT_F128] = "nearbyintl";
Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+ Names[RTLIB::ROUND_F32] = "roundf";
+ Names[RTLIB::ROUND_F64] = "round";
+ Names[RTLIB::ROUND_F80] = "roundl";
+ Names[RTLIB::ROUND_F128] = "roundl";
+ Names[RTLIB::ROUND_PPCF128] = "roundl";
Names[RTLIB::FLOOR_F32] = "floorf";
Names[RTLIB::FLOOR_F64] = "floor";
Names[RTLIB::FLOOR_F80] = "floorl";
@@ -313,34 +318,62 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = "__sync_val_compare_and_swap_16";
Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = "__sync_lock_test_and_set_16";
Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_16] = "__sync_fetch_and_add_16";
Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_16] = "__sync_fetch_and_sub_16";
Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
+ Names[RTLIB::SYNC_FETCH_AND_AND_16] = "__sync_fetch_and_and_16";
Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
+ Names[RTLIB::SYNC_FETCH_AND_OR_16] = "__sync_fetch_and_or_16";
Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4";
Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_16] = "__sync_fetch_and_xor_16";
Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_16] = "__sync_fetch_and_nand_16";
+ Names[RTLIB::SYNC_FETCH_AND_MAX_1] = "__sync_fetch_and_max_1";
+ Names[RTLIB::SYNC_FETCH_AND_MAX_2] = "__sync_fetch_and_max_2";
+ Names[RTLIB::SYNC_FETCH_AND_MAX_4] = "__sync_fetch_and_max_4";
+ Names[RTLIB::SYNC_FETCH_AND_MAX_8] = "__sync_fetch_and_max_8";
+ Names[RTLIB::SYNC_FETCH_AND_MAX_16] = "__sync_fetch_and_max_16";
+ Names[RTLIB::SYNC_FETCH_AND_UMAX_1] = "__sync_fetch_and_umax_1";
+ Names[RTLIB::SYNC_FETCH_AND_UMAX_2] = "__sync_fetch_and_umax_2";
+ Names[RTLIB::SYNC_FETCH_AND_UMAX_4] = "__sync_fetch_and_umax_4";
+ Names[RTLIB::SYNC_FETCH_AND_UMAX_8] = "__sync_fetch_and_umax_8";
+ Names[RTLIB::SYNC_FETCH_AND_UMAX_16] = "__sync_fetch_and_umax_16";
+ Names[RTLIB::SYNC_FETCH_AND_MIN_1] = "__sync_fetch_and_min_1";
+ Names[RTLIB::SYNC_FETCH_AND_MIN_2] = "__sync_fetch_and_min_2";
+ Names[RTLIB::SYNC_FETCH_AND_MIN_4] = "__sync_fetch_and_min_4";
+ Names[RTLIB::SYNC_FETCH_AND_MIN_8] = "__sync_fetch_and_min_8";
+ Names[RTLIB::SYNC_FETCH_AND_MIN_16] = "__sync_fetch_and_min_16";
+ Names[RTLIB::SYNC_FETCH_AND_UMIN_1] = "__sync_fetch_and_umin_1";
+ Names[RTLIB::SYNC_FETCH_AND_UMIN_2] = "__sync_fetch_and_umin_2";
+ Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4";
+ Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8";
+ Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16";
if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) {
Names[RTLIB::SINCOS_F32] = "sincosf";
@@ -356,6 +389,13 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
Names[RTLIB::SINCOS_F128] = 0;
Names[RTLIB::SINCOS_PPCF128] = 0;
}
+
+ if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) {
+ Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail";
+ } else {
+ // These are generally not available.
+ Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = 0;
+ }
}
/// InitLibcallCallingConvs - Set default libcall CallingConvs.
@@ -624,7 +664,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
// Perform these initializations only once.
IsLittleEndian = TD->isLittleEndian();
- PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
= MaxStoresPerMemmoveOptSize = 4;
@@ -682,6 +721,14 @@ void TargetLoweringBase::initActions() {
// These operations default to expand.
setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+
+ // These library functions default to expand.
+ setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand);
+
+ // These operations default to expand for vector types.
+ if (VT >= MVT::FIRST_VECTOR_VALUETYPE &&
+ VT <= MVT::LAST_VECTOR_VALUETYPE)
+ setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
@@ -747,6 +794,19 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
}
+MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
+ return MVT::getIntegerVT(getPointerSizeInBits(AS));
+}
+
+unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const {
+ return TD->getPointerSizeInBits(AS);
+}
+
+unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
+ assert(Ty->isPointerTy());
+ return getPointerSizeInBits(Ty->getPointerAddressSpace());
+}
+
MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
return MVT::getIntegerVT(8*TD->getPointerSize(0));
}
@@ -1162,7 +1222,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i)
- Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0));
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0));
}
}
@@ -1228,6 +1288,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
case PtrToInt: return ISD::BITCAST;
case IntToPtr: return ISD::BITCAST;
case BitCast: return ISD::BITCAST;
+ case AddrSpaceCast: return ISD::ADDRSPACECAST;
case ICmp: return ISD::SETCC;
case FCmp: return ISD::SETCC;
case PHI: return 0;
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 07cf871..59d7b57 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -52,10 +52,10 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
default:
report_fatal_error("We do not support this DWARF encoding yet!");
case dwarf::DW_EH_PE_absptr:
- return Mang->getSymbol(GV);
+ return getSymbol(*Mang, GV);
case dwarf::DW_EH_PE_pcrel: {
return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
- Mang->getSymbol(GV)->getName());
+ getSymbol(*Mang, GV)->getName());
}
}
}
@@ -104,7 +104,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
if (StubSym.getPointer() == 0) {
- MCSymbol *Sym = Mang->getSymbol(GV);
+ MCSymbol *Sym = getSymbol(*Mang, GV);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
}
@@ -252,7 +252,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Prefix = getSectionPrefixForGlobal(Kind);
SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
- MCSymbol *Sym = Mang->getSymbol(GV);
+ MCSymbol *Sym = getSymbol(*Mang, GV);
Name.append(Sym->getName().begin(), Sym->getName().end());
StringRef Group = "";
unsigned Flags = getELFSectionFlags(Kind);
@@ -523,6 +523,11 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
const MCSection *TargetLoweringObjectFileMachO::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
+
+ // Handle thread local data.
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+ if (Kind.isThreadData()) return TLSDataSection;
+
if (Kind.isText())
return GV->isWeakForLinker() ? TextCoalSection : TextSection;
@@ -575,10 +580,6 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
if (Kind.isBSSLocal())
return DataBSSSection;
- // Handle thread local data.
- if (Kind.isThreadBSS()) return TLSBSSSection;
- if (Kind.isThreadData()) return TLSDataSection;
-
// Otherwise, just drop the variable in the normal data section.
return DataSection;
}
@@ -613,7 +614,7 @@ shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
// FIXME: ObjC metadata is currently emitted as internal symbols that have
// \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and
// this horrible hack can go away.
- MCSymbol *Sym = Mang->getSymbol(GV);
+ MCSymbol *Sym = getSymbol(*Mang, GV);
if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l')
return false;
}
@@ -642,7 +643,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
MachOMMI.getGVStubEntry(SSym);
if (StubSym.getPointer() == 0) {
- MCSymbol *Sym = Mang->getSymbol(GV);
+ MCSymbol *Sym = getSymbol(*Mang, GV);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
}
@@ -671,7 +672,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
if (StubSym.getPointer() == 0) {
- MCSymbol *Sym = Mang->getSymbol(GV);
+ MCSymbol *Sym = getSymbol(*Mang, GV);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
}
@@ -732,6 +733,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
return getContext().getCOFFSection(Name,
Characteristics,
Kind,
+ "",
Selection);
}
@@ -767,16 +769,22 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
return getContext().getCOFFSection(Name.str(), Characteristics,
- Kind, COFF::IMAGE_COMDAT_SELECT_ANY);
+ Kind, "", COFF::IMAGE_COMDAT_SELECT_ANY);
}
if (Kind.isText())
- return getTextSection();
+ return TextSection;
if (Kind.isThreadLocal())
- return getTLSDataSection();
+ return TLSDataSection;
- return getDataSection();
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ if (Kind.isBSS())
+ return BSSSection;
+
+ return DataSection;
}
void TargetLoweringObjectFileCOFF::
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index 7a39a4c..f7bf86b 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -22,10 +22,8 @@ using namespace llvm;
bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
// Check to see if we should eliminate non-leaf frame pointers and then
// check to see if we should eliminate all frame pointers.
- bool NoFramePointerElimNonLeaf =
- MF.getFunction()->getFnAttribute("no-frame-pointer-elim-non-leaf")
- .getValueAsString() == "true";
- if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+ if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf") &&
+ !NoFramePointerElim) {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MFI->hasCalls();
}
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index ffcee1f..5a15243 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -73,6 +73,14 @@ void PrintRegUnit::print(raw_ostream &OS) const {
OS << '~' << TRI->getName(*Roots);
}
+void PrintVRegOrUnit::print(raw_ostream &OS) const {
+ if (TRI && TRI->isVirtualRegister(Unit)) {
+ OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit);
+ return;
+ }
+ PrintRegUnit::print(OS);
+}
+
/// getAllocatableClass - Return the maximal subclass of the given register
/// class that is alloctable, or NULL.
const TargetRegisterClass *
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 64ee9d1..b0f2ca6 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -210,7 +210,8 @@ unsigned TargetSchedModel::computeOperandLatency(
// unit latency (defaultDefLatency may be too conservative).
#ifndef NDEBUG
if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
- && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) {
+ && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
+ && SchedModel.isComplete()) {
std::string Err;
raw_string_ostream ss(Err);
ss << "DefIdx " << DefIdx << " exceeds machine model writes for "
@@ -224,10 +225,13 @@ unsigned TargetSchedModel::computeOperandLatency(
return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI);
}
-unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
+unsigned
+TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
+ bool UseDefaultDefLatency) const {
// For the itinerary model, fall back to the old subtarget hook.
// Allow subtargets to compute Bundle latencies outside the machine model.
- if (hasInstrItineraries() || MI->isBundle())
+ if (hasInstrItineraries() || MI->isBundle() ||
+ (!hasInstrSchedModel() && !UseDefaultDefLatency))
return TII->getInstrLatency(&InstrItins, MI);
if (hasInstrSchedModel()) {
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index c52e675..b9a6b47 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1400,7 +1400,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
SlotIndex endIdx =
LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber);
- LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI));
+ LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI));
}
}
@@ -1457,7 +1457,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber);
if (I->end == UseIdx)
- LI.removeRange(LastCopyIdx, UseIdx);
+ LI.removeSegment(LastCopyIdx, UseIdx);
}
} else if (RemovedKillFlag) {
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index a95ebcd..f735ef2 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -50,7 +49,6 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
- AU.addPreserved<ProfileInfo>();
}
};
}
@@ -87,9 +85,7 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
}
// Actually remove the blocks now.
- ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>();
for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
- if (PI) PI->removeBlock(DeadBlocks[i]);
DeadBlocks[i]->eraseFromParent();
}
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index cd012d2..e0aa405 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@@ -264,15 +265,36 @@ void VirtRegRewriter::rewrite() {
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
+ SmallPtrSet<const MachineInstr *, 4> NoReturnInsts;
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
DEBUG(MBBI->print(dbgs(), Indexes));
+ bool IsExitBB = MBBI->succ_empty();
for (MachineBasicBlock::instr_iterator
MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
MachineInstr *MI = MII;
++MII;
+ // Check if this instruction is a call to a noreturn function.
+ // If so, all the definitions set by this instruction can be ignored.
+ if (IsExitBB && MI->isCall())
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ MachineOperand &MO = *MOI;
+ if (!MO.isGlobal())
+ continue;
+ const Function *Func = dyn_cast<Function>(MO.getGlobal());
+ if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) ||
+ // We need to keep correct unwind information
+ // even if the function will not return, since the
+ // runtime may need it.
+ !Func->hasFnAttribute(Attribute::NoUnwind))
+ continue;
+ NoReturnInsts.insert(MI);
+ break;
+ }
+
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
MachineOperand &MO = *MOI;
@@ -353,7 +375,25 @@ void VirtRegRewriter::rewrite() {
}
// Tell MRI about physical registers in use.
- for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
- if (!MRI->reg_nodbg_empty(Reg))
- MRI->setPhysRegUsed(Reg);
+ if (NoReturnInsts.empty()) {
+ for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
+ if (!MRI->reg_nodbg_empty(Reg))
+ MRI->setPhysRegUsed(Reg);
+ } else {
+ for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) {
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ // Check if this register has a use that will impact the rest of the
+ // code. Uses in debug and noreturn instructions do not impact the
+ // generated code.
+ for (MachineRegisterInfo::reg_nodbg_iterator It =
+ MRI->reg_nodbg_begin(Reg),
+ EndIt = MRI->reg_nodbg_end(); It != EndIt; ++It) {
+ if (!NoReturnInsts.count(&(*It))) {
+ MRI->setPhysRegUsed(Reg);
+ break;
+ }
+ }
+ }
+ }
}
diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt
index 4a6221d..61a3fb0 100644
--- a/lib/DebugInfo/CMakeLists.txt
+++ b/lib/DebugInfo/CMakeLists.txt
@@ -12,4 +12,6 @@ add_llvm_library(LLVMDebugInfo
DWARFDebugLoc.cpp
DWARFDebugRangeList.cpp
DWARFFormValue.cpp
+ DWARFTypeUnit.cpp
+ DWARFUnit.cpp
)
diff --git a/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp
index 2de62ab..f46fd58 100644
--- a/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp
+++ b/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp
@@ -14,37 +14,51 @@
using namespace llvm;
using namespace dwarf;
-bool
-DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr){
- return extract(data, offset_ptr, data.getULEB128(offset_ptr));
+void DWARFAbbreviationDeclaration::clear() {
+ Code = 0;
+ Tag = 0;
+ HasChildren = false;
+ Attributes.clear();
}
-bool
-DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr,
- uint32_t code) {
- Code = code;
- Attribute.clear();
- if (Code) {
- Tag = data.getULEB128(offset_ptr);
- HasChildren = data.getU8(offset_ptr);
+DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() {
+ clear();
+}
- while (data.isValidOffset(*offset_ptr)) {
- uint16_t attr = data.getULEB128(offset_ptr);
- uint16_t form = data.getULEB128(offset_ptr);
+bool
+DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint32_t* OffsetPtr) {
+ clear();
+ Code = Data.getULEB128(OffsetPtr);
+ if (Code == 0) {
+ return false;
+ }
+ Tag = Data.getULEB128(OffsetPtr);
+ uint8_t ChildrenByte = Data.getU8(OffsetPtr);
+ HasChildren = (ChildrenByte == DW_CHILDREN_yes);
- if (attr && form)
- Attribute.push_back(DWARFAttribute(attr, form));
- else
- break;
+ while (true) {
+ uint32_t CurOffset = *OffsetPtr;
+ uint16_t Attr = Data.getULEB128(OffsetPtr);
+ if (CurOffset == *OffsetPtr) {
+ clear();
+ return false;
}
-
- return Tag != 0;
- } else {
- Tag = 0;
- HasChildren = false;
+ CurOffset = *OffsetPtr;
+ uint16_t Form = Data.getULEB128(OffsetPtr);
+ if (CurOffset == *OffsetPtr) {
+ clear();
+ return false;
+ }
+ if (Attr == 0 && Form == 0)
+ break;
+ Attributes.push_back(AttributeSpec(Attr, Form));
}
- return false;
+ if (Tag == 0) {
+ clear();
+ return false;
+ }
+ return true;
}
void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const {
@@ -55,19 +69,19 @@ void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const {
else
OS << format("DW_TAG_Unknown_%x", getTag());
OS << "\tDW_CHILDREN_" << (hasChildren() ? "yes" : "no") << '\n';
- for (unsigned i = 0, e = Attribute.size(); i != e; ++i) {
+ for (unsigned i = 0, e = Attributes.size(); i != e; ++i) {
OS << '\t';
- const char *attrString = AttributeString(Attribute[i].getAttribute());
+ const char *attrString = AttributeString(Attributes[i].Attr);
if (attrString)
OS << attrString;
else
- OS << format("DW_AT_Unknown_%x", Attribute[i].getAttribute());
+ OS << format("DW_AT_Unknown_%x", Attributes[i].Attr);
OS << '\t';
- const char *formString = FormEncodingString(Attribute[i].getForm());
+ const char *formString = FormEncodingString(Attributes[i].Form);
if (formString)
OS << formString;
else
- OS << format("DW_FORM_Unknown_%x", Attribute[i].getForm());
+ OS << format("DW_FORM_Unknown_%x", Attributes[i].Form);
OS << '\n';
}
OS << '\n';
@@ -75,8 +89,8 @@ void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const {
uint32_t
DWARFAbbreviationDeclaration::findAttributeIndex(uint16_t attr) const {
- for (uint32_t i = 0, e = Attribute.size(); i != e; ++i) {
- if (Attribute[i].getAttribute() == attr)
+ for (uint32_t i = 0, e = Attributes.size(); i != e; ++i) {
+ if (Attributes[i].Attr == attr)
return i;
}
return -1U;
diff --git a/lib/DebugInfo/DWARFAbbreviationDeclaration.h b/lib/DebugInfo/DWARFAbbreviationDeclaration.h
index 9a3fcd8..e9b072e 100644
--- a/lib/DebugInfo/DWARFAbbreviationDeclaration.h
+++ b/lib/DebugInfo/DWARFAbbreviationDeclaration.h
@@ -10,7 +10,6 @@
#ifndef LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
#define LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
-#include "DWARFAttribute.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DataExtractor.h"
@@ -22,31 +21,33 @@ class DWARFAbbreviationDeclaration {
uint32_t Code;
uint32_t Tag;
bool HasChildren;
- SmallVector<DWARFAttribute, 8> Attribute;
+
+ struct AttributeSpec {
+ AttributeSpec(uint16_t Attr, uint16_t Form) : Attr(Attr), Form(Form) {}
+ uint16_t Attr;
+ uint16_t Form;
+ };
+ SmallVector<AttributeSpec, 8> Attributes;
public:
- enum { InvalidCode = 0 };
- DWARFAbbreviationDeclaration()
- : Code(InvalidCode), Tag(0), HasChildren(0) {}
+ DWARFAbbreviationDeclaration();
uint32_t getCode() const { return Code; }
uint32_t getTag() const { return Tag; }
bool hasChildren() const { return HasChildren; }
- uint32_t getNumAttributes() const { return Attribute.size(); }
+ uint32_t getNumAttributes() const { return Attributes.size(); }
uint16_t getAttrByIndex(uint32_t idx) const {
- return Attribute.size() > idx ? Attribute[idx].getAttribute() : 0;
+ return idx < Attributes.size() ? Attributes[idx].Attr : 0;
}
uint16_t getFormByIndex(uint32_t idx) const {
- return Attribute.size() > idx ? Attribute[idx].getForm() : 0;
+ return idx < Attributes.size() ? Attributes[idx].Form : 0;
}
uint32_t findAttributeIndex(uint16_t attr) const;
- bool extract(DataExtractor data, uint32_t* offset_ptr);
- bool extract(DataExtractor data, uint32_t* offset_ptr, uint32_t code);
- bool isValid() const { return Code != 0 && Tag != 0; }
+ bool extract(DataExtractor Data, uint32_t* OffsetPtr);
void dump(raw_ostream &OS) const;
- const SmallVectorImpl<DWARFAttribute> &getAttributes() const {
- return Attribute;
- }
+
+private:
+ void clear();
};
}
diff --git a/lib/DebugInfo/DWARFAttribute.h b/lib/DebugInfo/DWARFAttribute.h
deleted file mode 100644
index 6f49b63..0000000
--- a/lib/DebugInfo/DWARFAttribute.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//===-- DWARFAttribute.h ----------------------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_DWARFATTRIBUTE_H
-#define LLVM_DEBUGINFO_DWARFATTRIBUTE_H
-
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-class DWARFAttribute {
- uint16_t Attribute;
- uint16_t Form;
- public:
- DWARFAttribute(uint16_t attr, uint16_t form)
- : Attribute(attr), Form(form) {}
-
- uint16_t getAttribute() const { return Attribute; }
- uint16_t getForm() const { return Form; }
-};
-
-}
-
-#endif
diff --git a/lib/DebugInfo/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARFCompileUnit.cpp
index 93b1622..33869d8 100644
--- a/lib/DebugInfo/DWARFCompileUnit.cpp
+++ b/lib/DebugInfo/DWARFCompileUnit.cpp
@@ -8,96 +8,18 @@
//===----------------------------------------------------------------------===//
#include "DWARFCompileUnit.h"
-#include "DWARFContext.h"
-#include "llvm/DebugInfo/DWARFFormValue.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-using namespace dwarf;
-
-DataExtractor DWARFCompileUnit::getDebugInfoExtractor() const {
- return DataExtractor(InfoSection, isLittleEndian, AddrSize);
-}
-
-bool DWARFCompileUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) {
- clear();
-
- Offset = *offset_ptr;
-
- if (debug_info.isValidOffset(*offset_ptr)) {
- uint64_t abbrOffset;
- Length = debug_info.getU32(offset_ptr);
- Version = debug_info.getU16(offset_ptr);
- abbrOffset = debug_info.getU32(offset_ptr);
- AddrSize = debug_info.getU8(offset_ptr);
-
- bool lengthOK = debug_info.isValidOffset(getNextCompileUnitOffset()-1);
- bool versionOK = DWARFContext::isSupportedVersion(Version);
- bool abbrOffsetOK = AbbrevSection.size() > abbrOffset;
- bool addrSizeOK = AddrSize == 4 || AddrSize == 8;
-
- if (lengthOK && versionOK && addrSizeOK && abbrOffsetOK && Abbrev != NULL) {
- Abbrevs = Abbrev->getAbbreviationDeclarationSet(abbrOffset);
- return true;
- }
-
- // reset the offset to where we tried to parse from if anything went wrong
- *offset_ptr = Offset;
- }
-
- return false;
-}
-uint32_t
-DWARFCompileUnit::extract(uint32_t offset, DataExtractor debug_info_data,
- const DWARFAbbreviationDeclarationSet *abbrevs) {
- clear();
-
- Offset = offset;
-
- if (debug_info_data.isValidOffset(offset)) {
- Length = debug_info_data.getU32(&offset);
- Version = debug_info_data.getU16(&offset);
- bool abbrevsOK = debug_info_data.getU32(&offset) == abbrevs->getOffset();
- Abbrevs = abbrevs;
- AddrSize = debug_info_data.getU8(&offset);
-
- bool versionOK = DWARFContext::isSupportedVersion(Version);
- bool addrSizeOK = AddrSize == 4 || AddrSize == 8;
-
- if (versionOK && addrSizeOK && abbrevsOK &&
- debug_info_data.isValidOffset(offset))
- return offset;
- }
- return 0;
-}
-
-bool DWARFCompileUnit::extractRangeList(uint32_t RangeListOffset,
- DWARFDebugRangeList &RangeList) const {
- // Require that compile unit is extracted.
- assert(DieArray.size() > 0);
- DataExtractor RangesData(RangeSection, isLittleEndian, AddrSize);
- return RangeList.extract(RangesData, &RangeListOffset);
-}
-
-void DWARFCompileUnit::clear() {
- Offset = 0;
- Length = 0;
- Version = 0;
- Abbrevs = 0;
- AddrSize = 0;
- BaseAddr = 0;
- clearDIEs(false);
-}
+using namespace llvm;
void DWARFCompileUnit::dump(raw_ostream &OS) {
- OS << format("0x%08x", Offset) << ": Compile Unit:"
- << " length = " << format("0x%08x", Length)
- << " version = " << format("0x%04x", Version)
- << " abbr_offset = " << format("0x%04x", Abbrevs->getOffset())
- << " addr_size = " << format("0x%02x", AddrSize)
- << " (next CU at " << format("0x%08x", getNextCompileUnitOffset())
+ OS << format("0x%08x", getOffset()) << ": Compile Unit:"
+ << " length = " << format("0x%08x", getLength())
+ << " version = " << format("0x%04x", getVersion())
+ << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset())
+ << " addr_size = " << format("0x%02x", getAddressByteSize())
+ << " (next unit at " << format("0x%08x", getNextUnitOffset())
<< ")\n";
const DWARFDebugInfoEntryMinimal *CU = getCompileUnitDIE(false);
@@ -105,174 +27,6 @@ void DWARFCompileUnit::dump(raw_ostream &OS) {
CU->dump(OS, this, -1U);
}
-const char *DWARFCompileUnit::getCompilationDir() {
- extractDIEsIfNeeded(true);
- if (DieArray.empty())
- return 0;
- return DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, 0);
-}
-
-void DWARFCompileUnit::setDIERelations() {
- if (DieArray.empty())
- return;
- DWARFDebugInfoEntryMinimal *die_array_begin = &DieArray.front();
- DWARFDebugInfoEntryMinimal *die_array_end = &DieArray.back();
- DWARFDebugInfoEntryMinimal *curr_die;
- // We purposely are skipping the last element in the array in the loop below
- // so that we can always have a valid next item
- for (curr_die = die_array_begin; curr_die < die_array_end; ++curr_die) {
- // Since our loop doesn't include the last element, we can always
- // safely access the next die in the array.
- DWARFDebugInfoEntryMinimal *next_die = curr_die + 1;
-
- const DWARFAbbreviationDeclaration *curr_die_abbrev =
- curr_die->getAbbreviationDeclarationPtr();
-
- if (curr_die_abbrev) {
- // Normal DIE
- if (curr_die_abbrev->hasChildren())
- next_die->setParent(curr_die);
- else
- curr_die->setSibling(next_die);
- } else {
- // NULL DIE that terminates a sibling chain
- DWARFDebugInfoEntryMinimal *parent = curr_die->getParent();
- if (parent)
- parent->setSibling(next_die);
- }
- }
-
- // Since we skipped the last element, we need to fix it up!
- if (die_array_begin < die_array_end)
- curr_die->setParent(die_array_begin);
-}
-
-void DWARFCompileUnit::extractDIEsToVector(
- bool AppendCUDie, bool AppendNonCUDies,
- std::vector<DWARFDebugInfoEntryMinimal> &Dies) const {
- if (!AppendCUDie && !AppendNonCUDies)
- return;
-
- // Set the offset to that of the first DIE and calculate the start of the
- // next compilation unit header.
- uint32_t Offset = getFirstDIEOffset();
- uint32_t NextCUOffset = getNextCompileUnitOffset();
- DWARFDebugInfoEntryMinimal DIE;
- uint32_t Depth = 0;
- const uint8_t *FixedFormSizes =
- DWARFFormValue::getFixedFormSizes(getAddressByteSize(), getVersion());
- bool IsCUDie = true;
-
- while (Offset < NextCUOffset &&
- DIE.extractFast(this, FixedFormSizes, &Offset)) {
- if (IsCUDie) {
- if (AppendCUDie)
- Dies.push_back(DIE);
- if (!AppendNonCUDies)
- break;
- // The average bytes per DIE entry has been seen to be
- // around 14-20 so let's pre-reserve the needed memory for
- // our DIE entries accordingly.
- Dies.reserve(Dies.size() + getDebugInfoSize() / 14);
- IsCUDie = false;
- } else {
- Dies.push_back(DIE);
- }
-
- const DWARFAbbreviationDeclaration *AbbrDecl =
- DIE.getAbbreviationDeclarationPtr();
- if (AbbrDecl) {
- // Normal DIE
- if (AbbrDecl->hasChildren())
- ++Depth;
- } else {
- // NULL DIE.
- if (Depth > 0)
- --Depth;
- if (Depth == 0)
- break; // We are done with this compile unit!
- }
- }
-
- // Give a little bit of info if we encounter corrupt DWARF (our offset
- // should always terminate at or before the start of the next compilation
- // unit header).
- if (Offset > NextCUOffset)
- fprintf(stderr, "warning: DWARF compile unit extends beyond its "
- "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), Offset);
-}
-
-size_t DWARFCompileUnit::extractDIEsIfNeeded(bool CUDieOnly) {
- if ((CUDieOnly && DieArray.size() > 0) ||
- DieArray.size() > 1)
- return 0; // Already parsed.
-
- extractDIEsToVector(DieArray.empty(), !CUDieOnly, DieArray);
-
- // Set the base address of current compile unit.
- if (!DieArray.empty()) {
- uint64_t BaseAddr =
- DieArray[0].getAttributeValueAsUnsigned(this, DW_AT_low_pc, -1U);
- if (BaseAddr == -1U)
- BaseAddr = DieArray[0].getAttributeValueAsUnsigned(this, DW_AT_entry_pc, 0);
- setBaseAddress(BaseAddr);
- }
-
- setDIERelations();
- return DieArray.size();
-}
-
-void DWARFCompileUnit::clearDIEs(bool KeepCUDie) {
- if (DieArray.size() > (unsigned)KeepCUDie) {
- // std::vectors never get any smaller when resized to a smaller size,
- // or when clear() or erase() are called, the size will report that it
- // is smaller, but the memory allocated remains intact (call capacity()
- // to see this). So we need to create a temporary vector and swap the
- // contents which will cause just the internal pointers to be swapped
- // so that when temporary vector goes out of scope, it will destroy the
- // contents.
- std::vector<DWARFDebugInfoEntryMinimal> TmpArray;
- DieArray.swap(TmpArray);
- // Save at least the compile unit DIE
- if (KeepCUDie)
- DieArray.push_back(TmpArray.front());
- }
-}
-
-void
-DWARFCompileUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges,
- bool clear_dies_if_already_not_parsed){
- // This function is usually called if there in no .debug_aranges section
- // in order to produce a compile unit level set of address ranges that
- // is accurate. If the DIEs weren't parsed, then we don't want all dies for
- // all compile units to stay loaded when they weren't needed. So we can end
- // up parsing the DWARF and then throwing them all away to keep memory usage
- // down.
- const bool clear_dies = extractDIEsIfNeeded(false) > 1 &&
- clear_dies_if_already_not_parsed;
- DieArray[0].buildAddressRangeTable(this, debug_aranges);
-
- // Keep memory down by clearing DIEs if this generate function
- // caused them to be parsed.
- if (clear_dies)
- clearDIEs(true);
-}
-
-DWARFDebugInfoEntryInlinedChain
-DWARFCompileUnit::getInlinedChainForAddress(uint64_t Address) {
- // First, find a subprogram that contains the given address (the root
- // of inlined chain).
- extractDIEsIfNeeded(false);
- const DWARFDebugInfoEntryMinimal *SubprogramDIE = 0;
- for (size_t i = 0, n = DieArray.size(); i != n; i++) {
- if (DieArray[i].isSubprogramDIE() &&
- DieArray[i].addressRangeContainsAddress(this, Address)) {
- SubprogramDIE = &DieArray[i];
- break;
- }
- }
- // Get inlined chain rooted at this subprogram DIE.
- if (!SubprogramDIE)
- return DWARFDebugInfoEntryInlinedChain();
- return SubprogramDIE->getInlinedChainForAddress(this, Address);
+// VTable anchor.
+DWARFCompileUnit::~DWARFCompileUnit() {
}
diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h
index dc2214b..1c9573b 100644
--- a/lib/DebugInfo/DWARFCompileUnit.h
+++ b/lib/DebugInfo/DWARFCompileUnit.h
@@ -10,118 +10,19 @@
#ifndef LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H
#define LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H
-#include "DWARFDebugAbbrev.h"
-#include "DWARFDebugInfoEntry.h"
-#include "DWARFDebugRangeList.h"
-#include "DWARFRelocMap.h"
-#include <vector>
+#include "DWARFUnit.h"
namespace llvm {
-class DWARFDebugAbbrev;
-class StringRef;
-class raw_ostream;
-
-class DWARFCompileUnit {
- const DWARFDebugAbbrev *Abbrev;
- StringRef InfoSection;
- StringRef AbbrevSection;
- StringRef RangeSection;
- StringRef StringSection;
- StringRef StringOffsetSection;
- StringRef AddrOffsetSection;
- const RelocAddrMap *RelocMap;
- bool isLittleEndian;
-
- uint32_t Offset;
- uint32_t Length;
- uint16_t Version;
- const DWARFAbbreviationDeclarationSet *Abbrevs;
- uint8_t AddrSize;
- uint64_t BaseAddr;
- // The compile unit debug information entry items.
- std::vector<DWARFDebugInfoEntryMinimal> DieArray;
+class DWARFCompileUnit : public DWARFUnit {
public:
-
DWARFCompileUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS,
StringRef RS, StringRef SS, StringRef SOS, StringRef AOS,
- const RelocAddrMap *M, bool LE) :
- Abbrev(DA), InfoSection(IS), AbbrevSection(AS),
- RangeSection(RS), StringSection(SS), StringOffsetSection(SOS),
- AddrOffsetSection(AOS), RelocMap(M), isLittleEndian(LE) {
- clear();
- }
-
- StringRef getStringSection() const { return StringSection; }
- StringRef getStringOffsetSection() const { return StringOffsetSection; }
- StringRef getAddrOffsetSection() const { return AddrOffsetSection; }
- const RelocAddrMap *getRelocMap() const { return RelocMap; }
- DataExtractor getDebugInfoExtractor() const;
-
- bool extract(DataExtractor debug_info, uint32_t* offset_ptr);
- uint32_t extract(uint32_t offset, DataExtractor debug_info_data,
- const DWARFAbbreviationDeclarationSet *abbrevs);
-
- /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it
- /// hasn't already been done. Returns the number of DIEs parsed at this call.
- size_t extractDIEsIfNeeded(bool CUDieOnly);
- /// extractRangeList - extracts the range list referenced by this compile
- /// unit from .debug_ranges section. Returns true on success.
- /// Requires that compile unit is already extracted.
- bool extractRangeList(uint32_t RangeListOffset,
- DWARFDebugRangeList &RangeList) const;
- void clear();
+ const RelocAddrMap *M, bool LE)
+ : DWARFUnit(DA, IS, AS, RS, SS, SOS, AOS, M, LE) {}
void dump(raw_ostream &OS);
- uint32_t getOffset() const { return Offset; }
- /// Size in bytes of the compile unit header.
- uint32_t getSize() const { return 11; }
- bool containsDIEOffset(uint32_t die_offset) const {
- return die_offset >= getFirstDIEOffset() &&
- die_offset < getNextCompileUnitOffset();
- }
- uint32_t getFirstDIEOffset() const { return Offset + getSize(); }
- uint32_t getNextCompileUnitOffset() const { return Offset + Length + 4; }
- /// Size in bytes of the .debug_info data associated with this compile unit.
- size_t getDebugInfoSize() const { return Length + 4 - getSize(); }
- uint32_t getLength() const { return Length; }
- uint16_t getVersion() const { return Version; }
- const DWARFAbbreviationDeclarationSet *getAbbreviations() const {
- return Abbrevs;
- }
- uint8_t getAddressByteSize() const { return AddrSize; }
- uint64_t getBaseAddress() const { return BaseAddr; }
-
- void setBaseAddress(uint64_t base_addr) {
- BaseAddr = base_addr;
- }
-
- const DWARFDebugInfoEntryMinimal *
- getCompileUnitDIE(bool extract_cu_die_only = true) {
- extractDIEsIfNeeded(extract_cu_die_only);
- return DieArray.empty() ? NULL : &DieArray[0];
- }
-
- const char *getCompilationDir();
-
- /// setDIERelations - We read in all of the DIE entries into our flat list
- /// of DIE entries and now we need to go back through all of them and set the
- /// parent, sibling and child pointers for quick DIE navigation.
- void setDIERelations();
-
- void buildAddressRangeTable(DWARFDebugAranges *debug_aranges,
- bool clear_dies_if_already_not_parsed);
-
- /// getInlinedChainForAddress - fetches inlined chain for a given address.
- /// Returns empty chain if there is no subprogram containing address. The
- /// chain is valid as long as parsed compile unit DIEs are not cleared.
- DWARFDebugInfoEntryInlinedChain getInlinedChainForAddress(uint64_t Address);
-
-private:
- /// extractDIEsToVector - Appends all parsed DIEs to a vector.
- void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
- std::vector<DWARFDebugInfoEntryMinimal> &DIEs) const;
- /// clearDIEs - Clear parsed DIEs to keep memory usage low.
- void clearDIEs(bool KeepCUDie);
+ // VTable anchor.
+ ~DWARFCompileUnit() LLVM_OVERRIDE;
};
}
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index 51ad645..e477190 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -23,6 +23,41 @@ using namespace object;
typedef DWARFDebugLine::LineTable DWARFLineTable;
+DWARFContext::~DWARFContext() {
+ DeleteContainerPointers(CUs);
+ DeleteContainerPointers(TUs);
+ DeleteContainerPointers(DWOCUs);
+}
+
+static void dumpPubSection(raw_ostream &OS, StringRef Name, StringRef Data,
+ bool LittleEndian, bool GnuStyle) {
+ OS << "\n." << Name << " contents:\n";
+ DataExtractor pubNames(Data, LittleEndian, 0);
+ uint32_t offset = 0;
+ OS << "length = " << format("0x%08x", pubNames.getU32(&offset));
+ OS << " version = " << format("0x%04x", pubNames.getU16(&offset));
+ OS << " unit_offset = " << format("0x%08x", pubNames.getU32(&offset));
+ OS << " unit_size = " << format("0x%08x", pubNames.getU32(&offset)) << '\n';
+ if (GnuStyle)
+ OS << "Offset Linkage Kind Name\n";
+ else
+ OS << "Offset Name\n";
+
+ while (offset < Data.size()) {
+ uint32_t dieRef = pubNames.getU32(&offset);
+ if (dieRef == 0)
+ break;
+ OS << format("0x%8.8x ", dieRef);
+ if (GnuStyle) {
+ PubIndexEntryDescriptor desc(pubNames.getU8(&offset));
+ OS << format("%-8s", dwarf::GDBIndexEntryLinkageString(desc.Linkage))
+ << ' ' << format("%-8s", dwarf::GDBIndexEntryKindString(desc.Kind))
+ << ' ';
+ }
+ OS << '\"' << pubNames.getCStr(&offset) << "\"\n";
+ }
+}
+
void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
if (DumpType == DIDT_All || DumpType == DIDT_Abbrev) {
OS << ".debug_abbrev contents:\n";
@@ -35,8 +70,14 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
getCompileUnitAtIndex(i)->dump(OS);
}
+ if (DumpType == DIDT_All || DumpType == DIDT_Types) {
+ OS << "\n.debug_types contents:\n";
+ for (unsigned i = 0, e = getNumTypeUnits(); i != e; ++i)
+ getTypeUnitAtIndex(i)->dump(OS);
+ }
+
if (DumpType == DIDT_All || DumpType == DIDT_Loc) {
- OS << ".debug_loc contents:\n";
+ OS << "\n.debug_loc contents:\n";
getDebugLoc()->dump(OS);
}
@@ -61,13 +102,13 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
DWARFCompileUnit *cu = getCompileUnitAtIndex(i);
savedAddressByteSize = cu->getAddressByteSize();
unsigned stmtOffset =
- cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list,
- -1U);
+ cu->getCompileUnitDIE()->getAttributeValueAsSectionOffset(
+ cu, DW_AT_stmt_list, -1U);
if (stmtOffset != -1U) {
- DataExtractor lineData(getLineSection(), isLittleEndian(),
+ DataExtractor lineData(getLineSection().Data, isLittleEndian(),
savedAddressByteSize);
DWARFDebugLine::DumpingState state(OS);
- DWARFDebugLine::parseStatementTable(lineData, &lineRelocMap(), &stmtOffset, state);
+ DWARFDebugLine::parseStatementTable(lineData, &getLineSection().Relocs, &stmtOffset, state);
}
}
}
@@ -97,23 +138,21 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
rangeList.dump(OS);
}
- if (DumpType == DIDT_All || DumpType == DIDT_Pubnames) {
- OS << "\n.debug_pubnames contents:\n";
- DataExtractor pubNames(getPubNamesSection(), isLittleEndian(), 0);
- offset = 0;
- OS << "Length: " << pubNames.getU32(&offset) << "\n";
- OS << "Version: " << pubNames.getU16(&offset) << "\n";
- OS << "Offset in .debug_info: " << pubNames.getU32(&offset) << "\n";
- OS << "Size: " << pubNames.getU32(&offset) << "\n";
- OS << "\n Offset Name\n";
- while (offset < getPubNamesSection().size()) {
- uint32_t n = pubNames.getU32(&offset);
- if (n == 0)
- break;
- OS << format("%8x ", n);
- OS << pubNames.getCStr(&offset) << "\n";
- }
- }
+ if (DumpType == DIDT_All || DumpType == DIDT_Pubnames)
+ dumpPubSection(OS, "debug_pubnames", getPubNamesSection(),
+ isLittleEndian(), false);
+
+ if (DumpType == DIDT_All || DumpType == DIDT_Pubtypes)
+ dumpPubSection(OS, "debug_pubtypes", getPubTypesSection(),
+ isLittleEndian(), false);
+
+ if (DumpType == DIDT_All || DumpType == DIDT_GnuPubnames)
+ dumpPubSection(OS, "debug_gnu_pubnames", getGnuPubNamesSection(),
+ isLittleEndian(), true /* GnuStyle */);
+
+ if (DumpType == DIDT_All || DumpType == DIDT_GnuPubtypes)
+ dumpPubSection(OS, "debug_gnu_pubtypes", getGnuPubTypesSection(),
+ isLittleEndian(), true /* GnuStyle */);
if (DumpType == DIDT_All || DumpType == DIDT_AbbrevDwo) {
const DWARFDebugAbbrev *D = getDebugAbbrevDWO();
@@ -180,8 +219,8 @@ const DWARFDebugLoc *DWARFContext::getDebugLoc() {
if (Loc)
return Loc.get();
- DataExtractor LocData(getLocSection(), isLittleEndian(), 0);
- Loc.reset(new DWARFDebugLoc(locRelocMap()));
+ DataExtractor LocData(getLocSection().Data, isLittleEndian(), 0);
+ Loc.reset(new DWARFDebugLoc(getLocSection().Relocs));
// assume all compile units have the same address byte size
if (getNumCompileUnits())
Loc->parse(LocData, getCompileUnitAtIndex(0)->getAddressByteSize());
@@ -192,13 +231,7 @@ const DWARFDebugAranges *DWARFContext::getDebugAranges() {
if (Aranges)
return Aranges.get();
- DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0);
-
Aranges.reset(new DWARFDebugAranges());
- Aranges->extract(arangesData);
- // Generate aranges from DIEs: even if .debug_aranges section is present,
- // it may describe only a small subset of compilation units, so we need to
- // manually build aranges for the rest of them.
Aranges->generate(this);
return Aranges.get();
}
@@ -226,11 +259,11 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() {
const DWARFLineTable *
DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
if (!Line)
- Line.reset(new DWARFDebugLine(&lineRelocMap()));
+ Line.reset(new DWARFDebugLine(&getLineSection().Relocs));
unsigned stmtOffset =
- cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list,
- -1U);
+ cu->getCompileUnitDIE()->getAttributeValueAsSectionOffset(
+ cu, DW_AT_stmt_list, -1U);
if (stmtOffset == -1U)
return 0; // No line table for this compile unit.
@@ -239,64 +272,79 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
return lt;
// We have to parse it first.
- DataExtractor lineData(getLineSection(), isLittleEndian(),
+ DataExtractor lineData(getLineSection().Data, isLittleEndian(),
cu->getAddressByteSize());
return Line->getOrParseLineTable(lineData, stmtOffset);
}
void DWARFContext::parseCompileUnits() {
uint32_t offset = 0;
- const DataExtractor &DIData = DataExtractor(getInfoSection(),
+ const DataExtractor &DIData = DataExtractor(getInfoSection().Data,
isLittleEndian(), 0);
while (DIData.isValidOffset(offset)) {
- CUs.push_back(DWARFCompileUnit(getDebugAbbrev(), getInfoSection(),
- getAbbrevSection(), getRangeSection(),
- getStringSection(), StringRef(),
- getAddrSection(),
- &infoRelocMap(),
- isLittleEndian()));
- if (!CUs.back().extract(DIData, &offset)) {
- CUs.pop_back();
+ OwningPtr<DWARFCompileUnit> CU(new DWARFCompileUnit(
+ getDebugAbbrev(), getInfoSection().Data, getAbbrevSection(),
+ getRangeSection(), getStringSection(), StringRef(), getAddrSection(),
+ &getInfoSection().Relocs, isLittleEndian()));
+ if (!CU->extract(DIData, &offset)) {
break;
}
+ CUs.push_back(CU.take());
+ offset = CUs.back()->getNextUnitOffset();
+ }
+}
- offset = CUs.back().getNextCompileUnitOffset();
+void DWARFContext::parseTypeUnits() {
+ const std::map<object::SectionRef, Section> &Sections = getTypesSections();
+ for (std::map<object::SectionRef, Section>::const_iterator
+ I = Sections.begin(),
+ E = Sections.end();
+ I != E; ++I) {
+ uint32_t offset = 0;
+ const DataExtractor &DIData =
+ DataExtractor(I->second.Data, isLittleEndian(), 0);
+ while (DIData.isValidOffset(offset)) {
+ OwningPtr<DWARFTypeUnit> TU(new DWARFTypeUnit(
+ getDebugAbbrev(), I->second.Data, getAbbrevSection(),
+ getRangeSection(), getStringSection(), StringRef(), getAddrSection(),
+ &I->second.Relocs, isLittleEndian()));
+ if (!TU->extract(DIData, &offset))
+ break;
+ TUs.push_back(TU.take());
+ offset = TUs.back()->getNextUnitOffset();
+ }
}
}
void DWARFContext::parseDWOCompileUnits() {
uint32_t offset = 0;
- const DataExtractor &DIData = DataExtractor(getInfoDWOSection(),
- isLittleEndian(), 0);
+ const DataExtractor &DIData =
+ DataExtractor(getInfoDWOSection().Data, isLittleEndian(), 0);
while (DIData.isValidOffset(offset)) {
- DWOCUs.push_back(DWARFCompileUnit(getDebugAbbrevDWO(), getInfoDWOSection(),
- getAbbrevDWOSection(),
- getRangeDWOSection(),
- getStringDWOSection(),
- getStringOffsetDWOSection(),
- getAddrSection(),
- &infoDWORelocMap(),
- isLittleEndian()));
- if (!DWOCUs.back().extract(DIData, &offset)) {
- DWOCUs.pop_back();
+ OwningPtr<DWARFCompileUnit> DWOCU(new DWARFCompileUnit(
+ getDebugAbbrevDWO(), getInfoDWOSection().Data, getAbbrevDWOSection(),
+ getRangeDWOSection(), getStringDWOSection(),
+ getStringOffsetDWOSection(), getAddrSection(),
+ &getInfoDWOSection().Relocs, isLittleEndian()));
+ if (!DWOCU->extract(DIData, &offset)) {
break;
}
-
- offset = DWOCUs.back().getNextCompileUnitOffset();
+ DWOCUs.push_back(DWOCU.take());
+ offset = DWOCUs.back()->getNextUnitOffset();
}
}
namespace {
struct OffsetComparator {
- bool operator()(const DWARFCompileUnit &LHS,
- const DWARFCompileUnit &RHS) const {
- return LHS.getOffset() < RHS.getOffset();
+ bool operator()(const DWARFCompileUnit *LHS,
+ const DWARFCompileUnit *RHS) const {
+ return LHS->getOffset() < RHS->getOffset();
}
- bool operator()(const DWARFCompileUnit &LHS, uint32_t RHS) const {
- return LHS.getOffset() < RHS;
+ bool operator()(const DWARFCompileUnit *LHS, uint32_t RHS) const {
+ return LHS->getOffset() < RHS;
}
- bool operator()(uint32_t LHS, const DWARFCompileUnit &RHS) const {
- return LHS < RHS.getOffset();
+ bool operator()(uint32_t LHS, const DWARFCompileUnit *RHS) const {
+ return LHS < RHS->getOffset();
}
};
}
@@ -305,10 +353,11 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) {
if (CUs.empty())
parseCompileUnits();
- DWARFCompileUnit *CU = std::lower_bound(CUs.begin(), CUs.end(), Offset,
- OffsetComparator());
- if (CU != CUs.end())
- return &*CU;
+ DWARFCompileUnit **CU =
+ std::lower_bound(CUs.begin(), CUs.end(), Offset, OffsetComparator());
+ if (CU != CUs.end()) {
+ return *CU;
+ }
return 0;
}
@@ -380,7 +429,7 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address,
CU->getInlinedChainForAddress(Address);
if (InlinedChain.DIEs.size() > 0) {
const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain.DIEs[0];
- if (const char *Name = TopFunctionDIE.getSubroutineName(InlinedChain.CU))
+ if (const char *Name = TopFunctionDIE.getSubroutineName(InlinedChain.U))
FunctionName = Name;
}
}
@@ -413,19 +462,16 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange(uint64_t Address,
CU->getInlinedChainForAddress(Address);
if (InlinedChain.DIEs.size() > 0) {
const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain.DIEs[0];
- if (const char *Name = TopFunctionDIE.getSubroutineName(InlinedChain.CU))
+ if (const char *Name = TopFunctionDIE.getSubroutineName(InlinedChain.U))
FunctionName = Name;
}
}
- StringRef FuncNameRef = StringRef(FunctionName);
-
// If the Specifier says we don't need FileLineInfo, just
// return the top-most function at the starting address.
if (!Specifier.needs(DILineInfoSpecifier::FileLineInfo)) {
- Lines.push_back(std::make_pair(Address,
- DILineInfo(StringRef("<invalid>"),
- FuncNameRef, 0, 0)));
+ Lines.push_back(
+ std::make_pair(Address, DILineInfo("<invalid>", FunctionName, 0, 0)));
return Lines;
}
@@ -446,9 +492,8 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange(uint64_t Address,
std::string FileName = "<invalid>";
getFileNameForCompileUnit(CU, LineTable, Row.File,
NeedsAbsoluteFilePath, FileName);
- Lines.push_back(std::make_pair(Row.Address,
- DILineInfo(StringRef(FileName),
- FuncNameRef, Row.Line, Row.Column)));
+ Lines.push_back(std::make_pair(
+ Row.Address, DILineInfo(FileName, FunctionName, Row.Line, Row.Column)));
}
return Lines;
@@ -476,7 +521,7 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
uint32_t Column = 0;
// Get function name if necessary.
if (Specifier.needs(DILineInfoSpecifier::FunctionName)) {
- if (const char *Name = FunctionDIE.getSubroutineName(InlinedChain.CU))
+ if (const char *Name = FunctionDIE.getSubroutineName(InlinedChain.U))
FunctionName = Name;
}
if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) {
@@ -500,7 +545,7 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
}
// Get call file/line/column of a current DIE.
if (i + 1 < n) {
- FunctionDIE.getCallerFrame(InlinedChain.CU, CallFile, CallLine,
+ FunctionDIE.getCallerFrame(InlinedChain.U, CallFile, CallLine,
CallColumn);
}
}
@@ -557,29 +602,37 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
UncompressedSections.push_back(UncompressedSection.take());
}
- StringRef *Section = StringSwitch<StringRef*>(name)
- .Case("debug_info", &InfoSection)
- .Case("debug_abbrev", &AbbrevSection)
- .Case("debug_loc", &LocSection)
- .Case("debug_line", &LineSection)
- .Case("debug_aranges", &ARangeSection)
- .Case("debug_frame", &DebugFrameSection)
- .Case("debug_str", &StringSection)
- .Case("debug_ranges", &RangeSection)
- .Case("debug_pubnames", &PubNamesSection)
- .Case("debug_info.dwo", &InfoDWOSection)
- .Case("debug_abbrev.dwo", &AbbrevDWOSection)
- .Case("debug_str.dwo", &StringDWOSection)
- .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
- .Case("debug_addr", &AddrSection)
- // Any more debug info sections go here.
- .Default(0);
+ StringRef *Section =
+ StringSwitch<StringRef *>(name)
+ .Case("debug_info", &InfoSection.Data)
+ .Case("debug_abbrev", &AbbrevSection)
+ .Case("debug_loc", &LocSection.Data)
+ .Case("debug_line", &LineSection.Data)
+ .Case("debug_aranges", &ARangeSection)
+ .Case("debug_frame", &DebugFrameSection)
+ .Case("debug_str", &StringSection)
+ .Case("debug_ranges", &RangeSection)
+ .Case("debug_pubnames", &PubNamesSection)
+ .Case("debug_pubtypes", &PubTypesSection)
+ .Case("debug_gnu_pubnames", &GnuPubNamesSection)
+ .Case("debug_gnu_pubtypes", &GnuPubTypesSection)
+ .Case("debug_info.dwo", &InfoDWOSection.Data)
+ .Case("debug_abbrev.dwo", &AbbrevDWOSection)
+ .Case("debug_str.dwo", &StringDWOSection)
+ .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
+ .Case("debug_addr", &AddrSection)
+ // Any more debug info sections go here.
+ .Default(0);
if (Section) {
*Section = data;
if (name == "debug_ranges") {
// FIXME: Use the other dwo range section when we emit it.
RangeDWOSection = data;
}
+ } else if (name == "debug_types") {
+ // Find debug_types data by section rather than name as there are
+ // multiple, comdat grouped, debug_types sections.
+ TypesSections[*i].Data = data;
}
section_iterator RelocatedSection = i->getRelocatedSection();
@@ -594,13 +647,18 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
// TODO: Add support for relocations in other sections as needed.
// Record relocations for the debug_info and debug_line sections.
RelocAddrMap *Map = StringSwitch<RelocAddrMap*>(RelSecName)
- .Case("debug_info", &InfoRelocMap)
- .Case("debug_loc", &LocRelocMap)
- .Case("debug_info.dwo", &InfoDWORelocMap)
- .Case("debug_line", &LineRelocMap)
+ .Case("debug_info", &InfoSection.Relocs)
+ .Case("debug_loc", &LocSection.Relocs)
+ .Case("debug_info.dwo", &InfoDWOSection.Relocs)
+ .Case("debug_line", &LineSection.Relocs)
.Default(0);
- if (!Map)
- continue;
+ if (!Map) {
+ if (RelSecName != "debug_types")
+ continue;
+ // Find debug_types relocs by section rather than name as there are
+ // multiple, comdat grouped, debug_types sections.
+ Map = &TypesSections[*RelocatedSection].Relocs;
+ }
if (i->begin_relocations() != i->end_relocations()) {
uint64_t SectionSize;
diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h
index 5d8f714..03863ab 100644
--- a/lib/DebugInfo/DWARFContext.h
+++ b/lib/DebugInfo/DWARFContext.h
@@ -16,6 +16,7 @@
#include "DWARFDebugLine.h"
#include "DWARFDebugLoc.h"
#include "DWARFDebugRangeList.h"
+#include "DWARFTypeUnit.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/DIContext.h"
@@ -27,14 +28,15 @@ namespace llvm {
/// information parsing. The actual data is supplied through pure virtual
/// methods that a concrete implementation provides.
class DWARFContext : public DIContext {
- SmallVector<DWARFCompileUnit, 1> CUs;
+ SmallVector<DWARFCompileUnit *, 1> CUs;
+ SmallVector<DWARFTypeUnit *, 1> TUs;
OwningPtr<DWARFDebugAbbrev> Abbrev;
OwningPtr<DWARFDebugLoc> Loc;
OwningPtr<DWARFDebugAranges> Aranges;
OwningPtr<DWARFDebugLine> Line;
OwningPtr<DWARFDebugFrame> DebugFrame;
- SmallVector<DWARFCompileUnit, 1> DWOCUs;
+ SmallVector<DWARFCompileUnit *, 1> DWOCUs;
OwningPtr<DWARFDebugAbbrev> AbbrevDWO;
DWARFContext(DWARFContext &) LLVM_DELETED_FUNCTION;
@@ -43,12 +45,21 @@ class DWARFContext : public DIContext {
/// Read compile units from the debug_info section and store them in CUs.
void parseCompileUnits();
+ /// Read type units from the debug_types sections and store them in CUs.
+ void parseTypeUnits();
+
/// Read compile units from the debug_info.dwo section and store them in
/// DWOCUs.
void parseDWOCompileUnits();
public:
+ struct Section {
+ StringRef Data;
+ RelocAddrMap Relocs;
+ };
+
DWARFContext() : DIContext(CK_DWARF) {}
+ virtual ~DWARFContext();
static bool classof(const DIContext *DICtx) {
return DICtx->getKind() == CK_DWARF;
@@ -63,6 +74,13 @@ public:
return CUs.size();
}
+ /// Get the number of compile units in this context.
+ unsigned getNumTypeUnits() {
+ if (TUs.empty())
+ parseTypeUnits();
+ return TUs.size();
+ }
+
/// Get the number of compile units in the DWO context.
unsigned getNumDWOCompileUnits() {
if (DWOCUs.empty())
@@ -74,14 +92,21 @@ public:
DWARFCompileUnit *getCompileUnitAtIndex(unsigned index) {
if (CUs.empty())
parseCompileUnits();
- return &CUs[index];
+ return CUs[index];
+ }
+
+ /// Get the type unit at the specified index for this compile unit.
+ DWARFTypeUnit *getTypeUnitAtIndex(unsigned index) {
+ if (TUs.empty())
+ parseTypeUnits();
+ return TUs[index];
}
/// Get the compile unit at the specified index for the DWO compile units.
DWARFCompileUnit *getDWOCompileUnitAtIndex(unsigned index) {
if (DWOCUs.empty())
parseDWOCompileUnits();
- return &DWOCUs[index];
+ return DWOCUs[index];
}
/// Get a pointer to the parsed DebugAbbrev object.
@@ -112,27 +137,27 @@ public:
virtual bool isLittleEndian() const = 0;
virtual uint8_t getAddressSize() const = 0;
- virtual const RelocAddrMap &infoRelocMap() const = 0;
- virtual const RelocAddrMap &lineRelocMap() const = 0;
- virtual const RelocAddrMap &locRelocMap() const = 0;
- virtual StringRef getInfoSection() = 0;
+ virtual const Section &getInfoSection() = 0;
+ virtual const std::map<object::SectionRef, Section> &getTypesSections() = 0;
virtual StringRef getAbbrevSection() = 0;
- virtual StringRef getLocSection() = 0;
+ virtual const Section &getLocSection() = 0;
virtual StringRef getARangeSection() = 0;
virtual StringRef getDebugFrameSection() = 0;
- virtual StringRef getLineSection() = 0;
+ virtual const Section &getLineSection() = 0;
virtual StringRef getStringSection() = 0;
virtual StringRef getRangeSection() = 0;
virtual StringRef getPubNamesSection() = 0;
+ virtual StringRef getPubTypesSection() = 0;
+ virtual StringRef getGnuPubNamesSection() = 0;
+ virtual StringRef getGnuPubTypesSection() = 0;
// Sections for DWARF5 split dwarf proposal.
- virtual StringRef getInfoDWOSection() = 0;
+ virtual const Section &getInfoDWOSection() = 0;
virtual StringRef getAbbrevDWOSection() = 0;
virtual StringRef getStringDWOSection() = 0;
virtual StringRef getStringOffsetDWOSection() = 0;
virtual StringRef getRangeDWOSection() = 0;
virtual StringRef getAddrSection() = 0;
- virtual const RelocAddrMap &infoDWORelocMap() const = 0;
static bool isSupportedVersion(unsigned version) {
return version == 2 || version == 3 || version == 4;
@@ -153,22 +178,22 @@ class DWARFContextInMemory : public DWARFContext {
virtual void anchor();
bool IsLittleEndian;
uint8_t AddressSize;
- RelocAddrMap InfoRelocMap;
- RelocAddrMap LocRelocMap;
- RelocAddrMap LineRelocMap;
- StringRef InfoSection;
+ Section InfoSection;
+ std::map<object::SectionRef, Section> TypesSections;
StringRef AbbrevSection;
- StringRef LocSection;
+ Section LocSection;
StringRef ARangeSection;
StringRef DebugFrameSection;
- StringRef LineSection;
+ Section LineSection;
StringRef StringSection;
StringRef RangeSection;
StringRef PubNamesSection;
+ StringRef PubTypesSection;
+ StringRef GnuPubNamesSection;
+ StringRef GnuPubTypesSection;
// Sections for DWARF5 split dwarf proposal.
- RelocAddrMap InfoDWORelocMap;
- StringRef InfoDWOSection;
+ Section InfoDWOSection;
StringRef AbbrevDWOSection;
StringRef StringDWOSection;
StringRef StringOffsetDWOSection;
@@ -182,21 +207,24 @@ public:
~DWARFContextInMemory();
virtual bool isLittleEndian() const { return IsLittleEndian; }
virtual uint8_t getAddressSize() const { return AddressSize; }
- virtual const RelocAddrMap &infoRelocMap() const { return InfoRelocMap; }
- virtual const RelocAddrMap &locRelocMap() const { return LocRelocMap; }
- virtual const RelocAddrMap &lineRelocMap() const { return LineRelocMap; }
- virtual StringRef getInfoSection() { return InfoSection; }
+ virtual const Section &getInfoSection() { return InfoSection; }
+ virtual const std::map<object::SectionRef, Section> &getTypesSections() {
+ return TypesSections;
+ }
virtual StringRef getAbbrevSection() { return AbbrevSection; }
- virtual StringRef getLocSection() { return LocSection; }
+ virtual const Section &getLocSection() { return LocSection; }
virtual StringRef getARangeSection() { return ARangeSection; }
virtual StringRef getDebugFrameSection() { return DebugFrameSection; }
- virtual StringRef getLineSection() { return LineSection; }
+ virtual const Section &getLineSection() { return LineSection; }
virtual StringRef getStringSection() { return StringSection; }
virtual StringRef getRangeSection() { return RangeSection; }
virtual StringRef getPubNamesSection() { return PubNamesSection; }
+ virtual StringRef getPubTypesSection() { return PubTypesSection; }
+ virtual StringRef getGnuPubNamesSection() { return GnuPubNamesSection; }
+ virtual StringRef getGnuPubTypesSection() { return GnuPubTypesSection; }
// Sections for DWARF5 split dwarf proposal.
- virtual StringRef getInfoDWOSection() { return InfoDWOSection; }
+ virtual const Section &getInfoDWOSection() { return InfoDWOSection; }
virtual StringRef getAbbrevDWOSection() { return AbbrevDWOSection; }
virtual StringRef getStringDWOSection() { return StringDWOSection; }
virtual StringRef getStringOffsetDWOSection() {
@@ -206,9 +234,6 @@ public:
virtual StringRef getAddrSection() {
return AddrSection;
}
- virtual const RelocAddrMap &infoDWORelocMap() const {
- return InfoDWORelocMap;
- }
};
}
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARFDebugArangeSet.cpp
index 7dff9ff..229376e 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.cpp
+++ b/lib/DebugInfo/DWARFDebugArangeSet.cpp
@@ -20,32 +20,6 @@ void DWARFDebugArangeSet::clear() {
ArangeDescriptors.clear();
}
-void DWARFDebugArangeSet::compact() {
- if (ArangeDescriptors.empty())
- return;
-
- // Iterate through all arange descriptors and combine any ranges that
- // overlap or have matching boundaries. The ArangeDescriptors are assumed
- // to be in ascending order.
- uint32_t i = 0;
- while (i + 1 < ArangeDescriptors.size()) {
- if (ArangeDescriptors[i].getEndAddress() >= ArangeDescriptors[i+1].Address){
- // The current range ends at or exceeds the start of the next address
- // range. Compute the max end address between the two and use that to
- // make the new length.
- const uint64_t max_end_addr =
- std::max(ArangeDescriptors[i].getEndAddress(),
- ArangeDescriptors[i+1].getEndAddress());
- ArangeDescriptors[i].Length = max_end_addr - ArangeDescriptors[i].Address;
- // Now remove the next entry as it was just combined with the previous one
- ArangeDescriptors.erase(ArangeDescriptors.begin()+i+1);
- } else {
- // Discontiguous address range, just proceed to the next one.
- ++i;
- }
- }
-}
-
bool
DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
if (data.isValidOffset(*offset_ptr)) {
@@ -126,26 +100,3 @@ void DWARFDebugArangeSet::dump(raw_ostream &OS) const {
<< format(" 0x%*.*" PRIx64 ")\n",
hex_width, hex_width, pos->getEndAddress());
}
-
-
-namespace {
- class DescriptorContainsAddress {
- const uint64_t Address;
- public:
- DescriptorContainsAddress(uint64_t address) : Address(address) {}
- bool operator()(const DWARFDebugArangeSet::Descriptor &desc) const {
- return Address >= desc.Address && Address < (desc.Address + desc.Length);
- }
- };
-}
-
-uint32_t DWARFDebugArangeSet::findAddress(uint64_t address) const {
- DescriptorConstIter end = ArangeDescriptors.end();
- DescriptorConstIter pos =
- std::find_if(ArangeDescriptors.begin(), end, // Range
- DescriptorContainsAddress(address)); // Predicate
- if (pos != end)
- return HeaderData.CuOffset;
-
- return -1U;
-}
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h
index d768676..49a7132 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.h
+++ b/lib/DebugInfo/DWARFDebugArangeSet.h
@@ -44,7 +44,6 @@ public:
private:
typedef std::vector<Descriptor> DescriptorColl;
- typedef DescriptorColl::iterator DescriptorIter;
typedef DescriptorColl::const_iterator DescriptorConstIter;
uint32_t Offset;
@@ -54,15 +53,11 @@ private:
public:
DWARFDebugArangeSet() { clear(); }
void clear();
- void compact();
bool extract(DataExtractor data, uint32_t *offset_ptr);
void dump(raw_ostream &OS) const;
uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; }
- uint32_t getOffsetOfNextEntry() const { return Offset + HeaderData.Length + 4; }
- uint32_t findAddress(uint64_t address) const;
uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); }
- const struct Header &getHeader() const { return HeaderData; }
const Descriptor *getDescriptor(uint32_t i) const {
if (i < ArangeDescriptors.size())
return &ArangeDescriptors[i];
diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp
index f79862d..591d4bd 100644
--- a/lib/DebugInfo/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARFDebugAranges.cpp
@@ -16,128 +16,79 @@
#include <cassert>
using namespace llvm;
-// Compare function DWARFDebugAranges::Range structures
-static bool RangeLessThan(const DWARFDebugAranges::Range &range1,
- const DWARFDebugAranges::Range &range2) {
- return range1.LoPC < range2.LoPC;
-}
-
-namespace {
- class CountArangeDescriptors {
- public:
- CountArangeDescriptors(uint32_t &count_ref) : Count(count_ref) {}
- void operator()(const DWARFDebugArangeSet &Set) {
- Count += Set.getNumDescriptors();
- }
- uint32_t &Count;
- };
-
- class AddArangeDescriptors {
- public:
- AddArangeDescriptors(DWARFDebugAranges::RangeColl &Ranges,
- DWARFDebugAranges::ParsedCUOffsetColl &CUOffsets)
- : RangeCollection(Ranges),
- CUOffsetCollection(CUOffsets) {}
- void operator()(const DWARFDebugArangeSet &Set) {
- DWARFDebugAranges::Range Range;
- Range.Offset = Set.getCompileUnitDIEOffset();
- CUOffsetCollection.insert(Range.Offset);
-
- for (uint32_t i = 0, n = Set.getNumDescriptors(); i < n; ++i) {
- const DWARFDebugArangeSet::Descriptor *ArangeDescPtr =
- Set.getDescriptor(i);
- Range.LoPC = ArangeDescPtr->Address;
- Range.Length = ArangeDescPtr->Length;
-
- // Insert each item in increasing address order so binary searching
- // can later be done!
- DWARFDebugAranges::RangeColl::iterator InsertPos =
- std::lower_bound(RangeCollection.begin(), RangeCollection.end(),
- Range, RangeLessThan);
- RangeCollection.insert(InsertPos, Range);
- }
-
- }
- DWARFDebugAranges::RangeColl &RangeCollection;
- DWARFDebugAranges::ParsedCUOffsetColl &CUOffsetCollection;
- };
-}
-
-bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) {
- if (debug_aranges_data.isValidOffset(0)) {
- uint32_t offset = 0;
-
- typedef std::vector<DWARFDebugArangeSet> SetCollection;
- SetCollection sets;
-
- DWARFDebugArangeSet set;
- Range range;
- while (set.extract(debug_aranges_data, &offset))
- sets.push_back(set);
-
- uint32_t count = 0;
-
- std::for_each(sets.begin(), sets.end(), CountArangeDescriptors(count));
-
- if (count > 0) {
- Aranges.reserve(count);
- AddArangeDescriptors range_adder(Aranges, ParsedCUOffsets);
- std::for_each(sets.begin(), sets.end(), range_adder);
- }
+void DWARFDebugAranges::extract(DataExtractor DebugArangesData) {
+ if (!DebugArangesData.isValidOffset(0))
+ return;
+ uint32_t Offset = 0;
+ typedef std::vector<DWARFDebugArangeSet> RangeSetColl;
+ RangeSetColl Sets;
+ DWARFDebugArangeSet Set;
+ uint32_t TotalRanges = 0;
+
+ while (Set.extract(DebugArangesData, &Offset)) {
+ Sets.push_back(Set);
+ TotalRanges += Set.getNumDescriptors();
}
- return false;
-}
+ if (TotalRanges == 0)
+ return;
-bool DWARFDebugAranges::generate(DWARFContext *ctx) {
- if (ctx) {
- const uint32_t num_compile_units = ctx->getNumCompileUnits();
- for (uint32_t cu_idx = 0; cu_idx < num_compile_units; ++cu_idx) {
- if (DWARFCompileUnit *cu = ctx->getCompileUnitAtIndex(cu_idx)) {
- uint32_t CUOffset = cu->getOffset();
- if (ParsedCUOffsets.insert(CUOffset).second)
- cu->buildAddressRangeTable(this, true);
- }
+ Aranges.reserve(TotalRanges);
+ for (RangeSetColl::const_iterator I = Sets.begin(), E = Sets.end(); I != E;
+ ++I) {
+ uint32_t CUOffset = I->getCompileUnitDIEOffset();
+
+ for (uint32_t i = 0, n = I->getNumDescriptors(); i < n; ++i) {
+ const DWARFDebugArangeSet::Descriptor *ArangeDescPtr =
+ I->getDescriptor(i);
+ uint64_t LowPC = ArangeDescPtr->Address;
+ uint64_t HighPC = LowPC + ArangeDescPtr->Length;
+ appendRange(CUOffset, LowPC, HighPC);
}
}
- sort(true, /* overlap size */ 0);
- return !isEmpty();
}
-void DWARFDebugAranges::dump(raw_ostream &OS) const {
- const uint32_t num_ranges = getNumRanges();
- for (uint32_t i = 0; i < num_ranges; ++i) {
- const Range &range = Aranges[i];
- OS << format("0x%8.8x: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n",
- range.Offset, (uint64_t)range.LoPC, (uint64_t)range.HiPC());
+void DWARFDebugAranges::generate(DWARFContext *CTX) {
+ clear();
+ if (!CTX)
+ return;
+
+ // Extract aranges from .debug_aranges section.
+ DataExtractor ArangesData(CTX->getARangeSection(), CTX->isLittleEndian(), 0);
+ extract(ArangesData);
+
+ // Generate aranges from DIEs: even if .debug_aranges section is present,
+ // it may describe only a small subset of compilation units, so we need to
+ // manually build aranges for the rest of them.
+ for (uint32_t i = 0, n = CTX->getNumCompileUnits(); i < n; ++i) {
+ if (DWARFCompileUnit *CU = CTX->getCompileUnitAtIndex(i)) {
+ uint32_t CUOffset = CU->getOffset();
+ if (ParsedCUOffsets.insert(CUOffset).second)
+ CU->buildAddressRangeTable(this, true, CUOffset);
+ }
}
-}
-void DWARFDebugAranges::Range::dump(raw_ostream &OS) const {
- OS << format("{0x%8.8x}: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n",
- Offset, LoPC, HiPC());
+ sortAndMinimize();
}
-void DWARFDebugAranges::appendRange(uint32_t offset, uint64_t low_pc,
- uint64_t high_pc) {
+void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC,
+ uint64_t HighPC) {
if (!Aranges.empty()) {
- if (Aranges.back().Offset == offset && Aranges.back().HiPC() == low_pc) {
- Aranges.back().setHiPC(high_pc);
+ if (Aranges.back().CUOffset == CUOffset &&
+ Aranges.back().HighPC() == LowPC) {
+ Aranges.back().setHighPC(HighPC);
return;
}
}
- Aranges.push_back(Range(low_pc, high_pc, offset));
+ Aranges.push_back(Range(LowPC, HighPC, CUOffset));
}
-void DWARFDebugAranges::sort(bool minimize, uint32_t n) {
+void DWARFDebugAranges::sortAndMinimize() {
const size_t orig_arange_size = Aranges.size();
// Size of one? If so, no sorting is needed
if (orig_arange_size <= 1)
return;
// Sort our address range entries
- std::stable_sort(Aranges.begin(), Aranges.end(), RangeLessThan);
-
- if (!minimize)
- return;
+ std::stable_sort(Aranges.begin(), Aranges.end());
// Most address ranges are contiguous from function to function
// so our new ranges will likely be smaller. We calculate the size
@@ -151,7 +102,7 @@ void DWARFDebugAranges::sort(bool minimize, uint32_t n) {
// copy the new minimal stuff over to the new collection.
size_t minimal_size = 1;
for (size_t i = 1; i < orig_arange_size; ++i) {
- if (!Range::SortedOverlapCheck(Aranges[i-1], Aranges[i], n))
+ if (!Range::SortedOverlapCheck(Aranges[i-1], Aranges[i]))
++minimal_size;
}
@@ -166,14 +117,14 @@ void DWARFDebugAranges::sort(bool minimize, uint32_t n) {
uint32_t j = 0;
minimal_aranges[j] = Aranges[0];
for (size_t i = 1; i < orig_arange_size; ++i) {
- if(Range::SortedOverlapCheck (minimal_aranges[j], Aranges[i], n)) {
- minimal_aranges[j].setHiPC (Aranges[i].HiPC());
+ if (Range::SortedOverlapCheck(minimal_aranges[j], Aranges[i])) {
+ minimal_aranges[j].setHighPC(Aranges[i].HighPC());
} else {
// Only increment j if we aren't merging
minimal_aranges[++j] = Aranges[i];
}
}
- assert (j+1 == minimal_size);
+ assert(j+1 == minimal_size);
// Now swap our new minimal aranges into place. The local
// minimal_aranges will then contian the old big collection
@@ -181,50 +132,21 @@ void DWARFDebugAranges::sort(bool minimize, uint32_t n) {
minimal_aranges.swap(Aranges);
}
-uint32_t DWARFDebugAranges::findAddress(uint64_t address) const {
+uint32_t DWARFDebugAranges::findAddress(uint64_t Address) const {
if (!Aranges.empty()) {
- Range range(address);
+ Range range(Address);
RangeCollIterator begin = Aranges.begin();
RangeCollIterator end = Aranges.end();
- RangeCollIterator pos = std::lower_bound(begin, end, range, RangeLessThan);
+ RangeCollIterator pos =
+ std::lower_bound(begin, end, range);
- if (pos != end && pos->LoPC <= address && address < pos->HiPC()) {
- return pos->Offset;
+ if (pos != end && pos->containsAddress(Address)) {
+ return pos->CUOffset;
} else if (pos != begin) {
--pos;
- if (pos->LoPC <= address && address < pos->HiPC())
- return (*pos).Offset;
+ if (pos->containsAddress(Address))
+ return pos->CUOffset;
}
}
return -1U;
}
-
-bool
-DWARFDebugAranges::allRangesAreContiguous(uint64_t &LoPC, uint64_t &HiPC) const{
- if (Aranges.empty())
- return false;
-
- uint64_t next_addr = 0;
- RangeCollIterator begin = Aranges.begin();
- for (RangeCollIterator pos = begin, end = Aranges.end(); pos != end;
- ++pos) {
- if (pos != begin && pos->LoPC != next_addr)
- return false;
- next_addr = pos->HiPC();
- }
- // We checked for empty at the start of function so front() will be valid.
- LoPC = Aranges.front().LoPC;
- // We checked for empty at the start of function so back() will be valid.
- HiPC = Aranges.back().HiPC();
- return true;
-}
-
-bool DWARFDebugAranges::getMaxRange(uint64_t &LoPC, uint64_t &HiPC) const {
- if (Aranges.empty())
- return false;
- // We checked for empty at the start of function so front() will be valid.
- LoPC = Aranges.front().LoPC;
- // We checked for empty at the start of function so back() will be valid.
- HiPC = Aranges.back().HiPC();
- return true;
-}
diff --git a/lib/DebugInfo/DWARFDebugAranges.h b/lib/DebugInfo/DWARFDebugAranges.h
index 1509ffa..35ad8e5 100644
--- a/lib/DebugInfo/DWARFDebugAranges.h
+++ b/lib/DebugInfo/DWARFDebugAranges.h
@@ -20,81 +20,61 @@ class DWARFContext;
class DWARFDebugAranges {
public:
+ void clear() {
+ Aranges.clear();
+ ParsedCUOffsets.clear();
+ }
+
+ void generate(DWARFContext *CTX);
+
+ // Use appendRange multiple times and then call sortAndMinimize.
+ void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC);
+
+ uint32_t findAddress(uint64_t Address) const;
+
+private:
+ void extract(DataExtractor DebugArangesData);
+ void sortAndMinimize();
+
struct Range {
- explicit Range(uint64_t lo = -1ULL, uint64_t hi = -1ULL,
- uint32_t off = -1U)
- : LoPC(lo), Length(hi-lo), Offset(off) {}
-
- void clear() {
- LoPC = -1ULL;
- Length = 0;
- Offset = -1U;
- }
+ explicit Range(uint64_t LowPC = -1ULL, uint64_t HighPC = -1ULL,
+ uint32_t CUOffset = -1U)
+ : LowPC(LowPC), Length(HighPC - LowPC), CUOffset(CUOffset) {}
- void setHiPC(uint64_t HiPC) {
- if (HiPC == -1ULL || HiPC <= LoPC)
+ void setHighPC(uint64_t HighPC) {
+ if (HighPC == -1ULL || HighPC <= LowPC)
Length = 0;
else
- Length = HiPC - LoPC;
+ Length = HighPC - LowPC;
}
- uint64_t HiPC() const {
+ uint64_t HighPC() const {
if (Length)
- return LoPC + Length;
+ return LowPC + Length;
return -1ULL;
}
- bool isValidRange() const { return Length > 0; }
+ bool containsAddress(uint64_t Address) const {
+ return LowPC <= Address && Address < HighPC();
+ }
- static bool SortedOverlapCheck(const Range &curr_range,
- const Range &next_range, uint32_t n) {
- if (curr_range.Offset != next_range.Offset)
- return false;
- return curr_range.HiPC() + n >= next_range.LoPC;
+ bool operator <(const Range &other) const {
+ return LowPC < other.LowPC;
}
- bool contains(const Range &range) const {
- return LoPC <= range.LoPC && range.HiPC() <= HiPC();
+ static bool SortedOverlapCheck(const Range &Left, const Range &Right) {
+ if (Left.CUOffset != Right.CUOffset)
+ return false;
+ return Left.HighPC() >= Right.LowPC;
}
- void dump(raw_ostream &OS) const;
- uint64_t LoPC; // Start of address range
- uint32_t Length; // End of address range (not including this address)
- uint32_t Offset; // Offset of the compile unit or die
+ uint64_t LowPC; // Start of address range.
+ uint32_t Length; // End of address range (not including this address).
+ uint32_t CUOffset; // Offset of the compile unit or die.
};
- void clear() {
- Aranges.clear();
- ParsedCUOffsets.clear();
- }
- bool allRangesAreContiguous(uint64_t& LoPC, uint64_t& HiPC) const;
- bool getMaxRange(uint64_t& LoPC, uint64_t& HiPC) const;
- bool extract(DataExtractor debug_aranges_data);
- bool generate(DWARFContext *ctx);
-
- // Use append range multiple times and then call sort
- void appendRange(uint32_t cu_offset, uint64_t low_pc, uint64_t high_pc);
- void sort(bool minimize, uint32_t n);
-
- const Range *rangeAtIndex(uint32_t idx) const {
- if (idx < Aranges.size())
- return &Aranges[idx];
- return NULL;
- }
- void dump(raw_ostream &OS) const;
- uint32_t findAddress(uint64_t address) const;
- bool isEmpty() const { return Aranges.empty(); }
- uint32_t getNumRanges() const { return Aranges.size(); }
-
- uint32_t offsetAtIndex(uint32_t idx) const {
- if (idx < Aranges.size())
- return Aranges[idx].Offset;
- return -1U;
- }
-
typedef std::vector<Range> RangeColl;
typedef RangeColl::const_iterator RangeCollIterator;
typedef DenseSet<uint32_t> ParsedCUOffsetColl;
-private:
RangeColl Aranges;
ParsedCUOffsetColl ParsedCUOffsets;
};
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
index 0c7b7e3..babfd2e 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -19,11 +19,10 @@
using namespace llvm;
using namespace dwarf;
-void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS,
- const DWARFCompileUnit *cu,
+void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, const DWARFUnit *u,
unsigned recurseDepth,
unsigned indent) const {
- DataExtractor debug_info_data = cu->getDebugInfoExtractor();
+ DataExtractor debug_info_data = u->getDebugInfoExtractor();
uint32_t offset = Offset;
if (debug_info_data.isValidOffset(offset)) {
@@ -45,13 +44,13 @@ void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS,
for (uint32_t i = 0; i != numAttributes; ++i) {
uint16_t attr = AbbrevDecl->getAttrByIndex(i);
uint16_t form = AbbrevDecl->getFormByIndex(i);
- dumpAttribute(OS, cu, &offset, attr, form, indent);
+ dumpAttribute(OS, u, &offset, attr, form, indent);
}
const DWARFDebugInfoEntryMinimal *child = getFirstChild();
if (recurseDepth > 0 && child) {
while (child) {
- child->dump(OS, cu, recurseDepth-1, indent+2);
+ child->dump(OS, u, recurseDepth-1, indent+2);
child = child->getSibling();
}
}
@@ -66,12 +65,11 @@ void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS,
}
void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
- const DWARFCompileUnit *cu,
- uint32_t* offset_ptr,
- uint16_t attr,
- uint16_t form,
+ const DWARFUnit *u,
+ uint32_t *offset_ptr,
+ uint16_t attr, uint16_t form,
unsigned indent) const {
- OS << format("0x%8.8x: ", *offset_ptr);
+ OS << " ";
OS.indent(indent+2);
const char *attrString = AttributeString(attr);
if (attrString)
@@ -86,57 +84,20 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
DWARFFormValue formValue(form);
- if (!formValue.extractValue(cu->getDebugInfoExtractor(), offset_ptr, cu))
+ if (!formValue.extractValue(u->getDebugInfoExtractor(), offset_ptr, u))
return;
OS << "\t(";
- formValue.dump(OS, cu);
+ formValue.dump(OS, u);
OS << ")\n";
}
-bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *CU,
- const uint8_t *FixedFormSizes,
+bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U,
uint32_t *OffsetPtr) {
Offset = *OffsetPtr;
- DataExtractor DebugInfoData = CU->getDebugInfoExtractor();
- uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
- if (0 == AbbrCode) {
- // NULL debug tag entry.
- AbbrevDecl = NULL;
- return true;
- }
- AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
- assert(AbbrevDecl);
- assert(FixedFormSizes); // For best performance this should be specified!
-
- // Skip all data in the .debug_info for the attributes
- for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) {
- uint16_t Form = AbbrevDecl->getFormByIndex(i);
-
- // FIXME: Currently we're checking if this is less than the last
- // entry in the fixed_form_sizes table, but this should be changed
- // to use dynamic dispatch.
- uint8_t FixedFormSize =
- (Form < DW_FORM_ref_sig8) ? FixedFormSizes[Form] : 0;
- if (FixedFormSize)
- *OffsetPtr += FixedFormSize;
- else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr,
- CU)) {
- // Restore the original offset.
- *OffsetPtr = Offset;
- return false;
- }
- }
- return true;
-}
-
-bool
-DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *CU,
- uint32_t *OffsetPtr) {
- DataExtractor DebugInfoData = CU->getDebugInfoExtractor();
- const uint32_t CUEndOffset = CU->getNextCompileUnitOffset();
- Offset = *OffsetPtr;
- if ((Offset >= CUEndOffset) || !DebugInfoData.isValidOffset(Offset))
+ DataExtractor DebugInfoData = U->getDebugInfoExtractor();
+ uint32_t UEndOffset = U->getNextUnitOffset();
+ if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset))
return false;
uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
if (0 == AbbrCode) {
@@ -144,31 +105,25 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *CU,
AbbrevDecl = NULL;
return true;
}
- AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
+ AbbrevDecl = U->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
if (0 == AbbrevDecl) {
// Restore the original offset.
*OffsetPtr = Offset;
return false;
}
- bool IsCompileUnitTag = (AbbrevDecl->getTag() == DW_TAG_compile_unit);
- if (IsCompileUnitTag)
- const_cast<DWARFCompileUnit*>(CU)->setBaseAddress(0);
+ ArrayRef<uint8_t> FixedFormSizes = DWARFFormValue::getFixedFormSizes(
+ U->getAddressByteSize(), U->getVersion());
+ assert(FixedFormSizes.size() > 0);
// Skip all data in the .debug_info for the attributes
for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) {
- uint16_t Attr = AbbrevDecl->getAttrByIndex(i);
uint16_t Form = AbbrevDecl->getFormByIndex(i);
- if (IsCompileUnitTag &&
- ((Attr == DW_AT_entry_pc) || (Attr == DW_AT_low_pc))) {
- DWARFFormValue FormValue(Form);
- if (FormValue.extractValue(DebugInfoData, OffsetPtr, CU)) {
- if (Attr == DW_AT_low_pc || Attr == DW_AT_entry_pc)
- const_cast<DWARFCompileUnit*>(CU)
- ->setBaseAddress(FormValue.getUnsigned());
- }
- } else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr,
- CU)) {
+ uint8_t FixedFormSize =
+ (Form < FixedFormSizes.size()) ? FixedFormSizes[Form] : 0;
+ if (FixedFormSize)
+ *OffsetPtr += FixedFormSize;
+ else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, U)) {
// Restore the original offset.
*OffsetPtr = Offset;
return false;
@@ -187,190 +142,179 @@ bool DWARFDebugInfoEntryMinimal::isSubroutineDIE() const {
Tag == DW_TAG_inlined_subroutine;
}
-uint32_t
-DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFCompileUnit *cu,
- const uint16_t attr,
- DWARFFormValue &form_value,
- uint32_t *end_attr_offset_ptr)
- const {
- if (AbbrevDecl) {
- uint32_t attr_idx = AbbrevDecl->findAttributeIndex(attr);
-
- if (attr_idx != -1U) {
- uint32_t offset = getOffset();
+bool DWARFDebugInfoEntryMinimal::getAttributeValue(
+ const DWARFUnit *U, const uint16_t Attr, DWARFFormValue &FormValue) const {
+ if (!AbbrevDecl)
+ return false;
- DataExtractor debug_info_data = cu->getDebugInfoExtractor();
+ uint32_t AttrIdx = AbbrevDecl->findAttributeIndex(Attr);
+ if (AttrIdx == -1U)
+ return false;
- // Skip the abbreviation code so we are at the data for the attributes
- debug_info_data.getULEB128(&offset);
+ DataExtractor DebugInfoData = U->getDebugInfoExtractor();
+ uint32_t DebugInfoOffset = getOffset();
- uint32_t idx = 0;
- while (idx < attr_idx)
- DWARFFormValue::skipValue(AbbrevDecl->getFormByIndex(idx++),
- debug_info_data, &offset, cu);
+ // Skip the abbreviation code so we are at the data for the attributes
+ DebugInfoData.getULEB128(&DebugInfoOffset);
- const uint32_t attr_offset = offset;
- form_value = DWARFFormValue(AbbrevDecl->getFormByIndex(idx));
- if (form_value.extractValue(debug_info_data, &offset, cu)) {
- if (end_attr_offset_ptr)
- *end_attr_offset_ptr = offset;
- return attr_offset;
- }
- }
+ // Skip preceding attribute values.
+ for (uint32_t i = 0; i < AttrIdx; ++i) {
+ DWARFFormValue::skipValue(AbbrevDecl->getFormByIndex(i),
+ DebugInfoData, &DebugInfoOffset, U);
}
- return 0;
+ FormValue = DWARFFormValue(AbbrevDecl->getFormByIndex(AttrIdx));
+ return FormValue.extractValue(DebugInfoData, &DebugInfoOffset, U);
}
-const char*
-DWARFDebugInfoEntryMinimal::getAttributeValueAsString(
- const DWARFCompileUnit* cu,
- const uint16_t attr,
- const char* fail_value)
- const {
- DWARFFormValue form_value;
- if (getAttributeValue(cu, attr, form_value)) {
- DataExtractor stringExtractor(cu->getStringSection(), false, 0);
- return form_value.getAsCString(&stringExtractor);
- }
- return fail_value;
+const char *DWARFDebugInfoEntryMinimal::getAttributeValueAsString(
+ const DWARFUnit *U, const uint16_t Attr, const char *FailValue) const {
+ DWARFFormValue FormValue;
+ if (!getAttributeValue(U, Attr, FormValue))
+ return FailValue;
+ Optional<const char *> Result = FormValue.getAsCString(U);
+ return Result.hasValue() ? Result.getValue() : FailValue;
+}
+
+uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsAddress(
+ const DWARFUnit *U, const uint16_t Attr, uint64_t FailValue) const {
+ DWARFFormValue FormValue;
+ if (!getAttributeValue(U, Attr, FormValue))
+ return FailValue;
+ Optional<uint64_t> Result = FormValue.getAsAddress(U);
+ return Result.hasValue() ? Result.getValue() : FailValue;
}
-uint64_t
-DWARFDebugInfoEntryMinimal::getAttributeValueAsUnsigned(
- const DWARFCompileUnit* cu,
- const uint16_t attr,
- uint64_t fail_value) const {
- DWARFFormValue form_value;
- if (getAttributeValue(cu, attr, form_value))
- return form_value.getUnsigned();
- return fail_value;
+uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsUnsignedConstant(
+ const DWARFUnit *U, const uint16_t Attr, uint64_t FailValue) const {
+ DWARFFormValue FormValue;
+ if (!getAttributeValue(U, Attr, FormValue))
+ return FailValue;
+ Optional<uint64_t> Result = FormValue.getAsUnsignedConstant();
+ return Result.hasValue() ? Result.getValue() : FailValue;
}
-int64_t
-DWARFDebugInfoEntryMinimal::getAttributeValueAsSigned(
- const DWARFCompileUnit* cu,
- const uint16_t attr,
- int64_t fail_value) const {
- DWARFFormValue form_value;
- if (getAttributeValue(cu, attr, form_value))
- return form_value.getSigned();
- return fail_value;
+uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsReference(
+ const DWARFUnit *U, const uint16_t Attr, uint64_t FailValue) const {
+ DWARFFormValue FormValue;
+ if (!getAttributeValue(U, Attr, FormValue))
+ return FailValue;
+ Optional<uint64_t> Result = FormValue.getAsReference(U);
+ return Result.hasValue() ? Result.getValue() : FailValue;
}
-uint64_t
-DWARFDebugInfoEntryMinimal::getAttributeValueAsReference(
- const DWARFCompileUnit* cu,
- const uint16_t attr,
- uint64_t fail_value)
- const {
- DWARFFormValue form_value;
- if (getAttributeValue(cu, attr, form_value))
- return form_value.getReference(cu);
- return fail_value;
+uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsSectionOffset(
+ const DWARFUnit *U, const uint16_t Attr, uint64_t FailValue) const {
+ DWARFFormValue FormValue;
+ if (!getAttributeValue(U, Attr, FormValue))
+ return FailValue;
+ Optional<uint64_t> Result = FormValue.getAsSectionOffset();
+ return Result.hasValue() ? Result.getValue() : FailValue;
}
-bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFCompileUnit *CU,
+bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFUnit *U,
uint64_t &LowPC,
uint64_t &HighPC) const {
- HighPC = -1ULL;
- LowPC = getAttributeValueAsUnsigned(CU, DW_AT_low_pc, -1ULL);
- if (LowPC != -1ULL)
- HighPC = getAttributeValueAsUnsigned(CU, DW_AT_high_pc, -1ULL);
+ LowPC = getAttributeValueAsAddress(U, DW_AT_low_pc, -1ULL);
+ if (LowPC == -1ULL)
+ return false;
+ HighPC = getAttributeValueAsAddress(U, DW_AT_high_pc, -1ULL);
+ if (HighPC == -1ULL) {
+ // Since DWARF4, DW_AT_high_pc may also be of class constant, in which case
+ // it represents function size.
+ HighPC = getAttributeValueAsUnsignedConstant(U, DW_AT_high_pc, -1ULL);
+ if (HighPC != -1ULL)
+ HighPC += LowPC;
+ }
return (HighPC != -1ULL);
}
-void
-DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *CU,
- DWARFDebugAranges *DebugAranges)
- const {
+void DWARFDebugInfoEntryMinimal::buildAddressRangeTable(
+ const DWARFUnit *U, DWARFDebugAranges *DebugAranges,
+ uint32_t UOffsetInAranges) const {
if (AbbrevDecl) {
if (isSubprogramDIE()) {
uint64_t LowPC, HighPC;
- if (getLowAndHighPC(CU, LowPC, HighPC)) {
- DebugAranges->appendRange(CU->getOffset(), LowPC, HighPC);
- }
+ if (getLowAndHighPC(U, LowPC, HighPC))
+ DebugAranges->appendRange(UOffsetInAranges, LowPC, HighPC);
// FIXME: try to append ranges from .debug_ranges section.
}
- const DWARFDebugInfoEntryMinimal *child = getFirstChild();
- while (child) {
- child->buildAddressRangeTable(CU, DebugAranges);
- child = child->getSibling();
+ const DWARFDebugInfoEntryMinimal *Child = getFirstChild();
+ while (Child) {
+ Child->buildAddressRangeTable(U, DebugAranges, UOffsetInAranges);
+ Child = Child->getSibling();
}
}
}
-bool
-DWARFDebugInfoEntryMinimal::addressRangeContainsAddress(
- const DWARFCompileUnit *CU,
- const uint64_t Address)
- const {
+bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress(
+ const DWARFUnit *U, const uint64_t Address) const {
if (isNULL())
return false;
uint64_t LowPC, HighPC;
- if (getLowAndHighPC(CU, LowPC, HighPC))
+ if (getLowAndHighPC(U, LowPC, HighPC))
return (LowPC <= Address && Address <= HighPC);
// Try to get address ranges from .debug_ranges section.
- uint32_t RangesOffset = getAttributeValueAsReference(CU, DW_AT_ranges, -1U);
+ uint32_t RangesOffset =
+ getAttributeValueAsSectionOffset(U, DW_AT_ranges, -1U);
if (RangesOffset != -1U) {
DWARFDebugRangeList RangeList;
- if (CU->extractRangeList(RangesOffset, RangeList))
- return RangeList.containsAddress(CU->getBaseAddress(), Address);
+ if (U->extractRangeList(RangesOffset, RangeList))
+ return RangeList.containsAddress(U->getBaseAddress(), Address);
}
return false;
}
-const char*
-DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFCompileUnit *CU)
- const {
+const char *
+DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U) const {
if (!isSubroutineDIE())
return 0;
// Try to get mangled name if possible.
if (const char *name =
- getAttributeValueAsString(CU, DW_AT_MIPS_linkage_name, 0))
+ getAttributeValueAsString(U, DW_AT_MIPS_linkage_name, 0))
return name;
- if (const char *name = getAttributeValueAsString(CU, DW_AT_linkage_name, 0))
+ if (const char *name = getAttributeValueAsString(U, DW_AT_linkage_name, 0))
return name;
- if (const char *name = getAttributeValueAsString(CU, DW_AT_name, 0))
+ if (const char *name = getAttributeValueAsString(U, DW_AT_name, 0))
return name;
// Try to get name from specification DIE.
uint32_t spec_ref =
- getAttributeValueAsReference(CU, DW_AT_specification, -1U);
+ getAttributeValueAsReference(U, DW_AT_specification, -1U);
if (spec_ref != -1U) {
DWARFDebugInfoEntryMinimal spec_die;
- if (spec_die.extract(CU, &spec_ref)) {
- if (const char *name = spec_die.getSubroutineName(CU))
+ if (spec_die.extractFast(U, &spec_ref)) {
+ if (const char *name = spec_die.getSubroutineName(U))
return name;
}
}
// Try to get name from abstract origin DIE.
uint32_t abs_origin_ref =
- getAttributeValueAsReference(CU, DW_AT_abstract_origin, -1U);
+ getAttributeValueAsReference(U, DW_AT_abstract_origin, -1U);
if (abs_origin_ref != -1U) {
DWARFDebugInfoEntryMinimal abs_origin_die;
- if (abs_origin_die.extract(CU, &abs_origin_ref)) {
- if (const char *name = abs_origin_die.getSubroutineName(CU))
+ if (abs_origin_die.extractFast(U, &abs_origin_ref)) {
+ if (const char *name = abs_origin_die.getSubroutineName(U))
return name;
}
}
return 0;
}
-void DWARFDebugInfoEntryMinimal::getCallerFrame(const DWARFCompileUnit *CU,
+void DWARFDebugInfoEntryMinimal::getCallerFrame(const DWARFUnit *U,
uint32_t &CallFile,
uint32_t &CallLine,
uint32_t &CallColumn) const {
- CallFile = getAttributeValueAsUnsigned(CU, DW_AT_call_file, 0);
- CallLine = getAttributeValueAsUnsigned(CU, DW_AT_call_line, 0);
- CallColumn = getAttributeValueAsUnsigned(CU, DW_AT_call_column, 0);
+ CallFile = getAttributeValueAsUnsignedConstant(U, DW_AT_call_file, 0);
+ CallLine = getAttributeValueAsUnsignedConstant(U, DW_AT_call_line, 0);
+ CallColumn = getAttributeValueAsUnsignedConstant(U, DW_AT_call_column, 0);
}
DWARFDebugInfoEntryInlinedChain
DWARFDebugInfoEntryMinimal::getInlinedChainForAddress(
- const DWARFCompileUnit *CU, const uint64_t Address) const {
+ const DWARFUnit *U, const uint64_t Address) const {
DWARFDebugInfoEntryInlinedChain InlinedChain;
- InlinedChain.CU = CU;
+ InlinedChain.U = U;
if (isNULL())
return InlinedChain;
for (const DWARFDebugInfoEntryMinimal *DIE = this; DIE; ) {
@@ -382,7 +326,7 @@ DWARFDebugInfoEntryMinimal::getInlinedChainForAddress(
// Try to get child which also contains provided address.
const DWARFDebugInfoEntryMinimal *Child = DIE->getFirstChild();
while (Child) {
- if (Child->addressRangeContainsAddress(CU, Address)) {
+ if (Child->addressRangeContainsAddress(U, Address)) {
// Assume there is only one such child.
break;
}
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h
index a69911f..aa61056 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.h
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.h
@@ -18,6 +18,7 @@ namespace llvm {
class DWARFDebugAranges;
class DWARFCompileUnit;
+class DWARFUnit;
class DWARFContext;
class DWARFFormValue;
struct DWARFDebugInfoEntryInlinedChain;
@@ -39,23 +40,15 @@ public:
DWARFDebugInfoEntryMinimal()
: Offset(0), ParentIdx(0), SiblingIdx(0), AbbrevDecl(0) {}
- void dump(raw_ostream &OS, const DWARFCompileUnit *cu,
- unsigned recurseDepth, unsigned indent = 0) const;
- void dumpAttribute(raw_ostream &OS, const DWARFCompileUnit *cu,
- uint32_t *offset_ptr, uint16_t attr, uint16_t form,
- unsigned indent = 0) const;
+ void dump(raw_ostream &OS, const DWARFUnit *u, unsigned recurseDepth,
+ unsigned indent = 0) const;
+ void dumpAttribute(raw_ostream &OS, const DWARFUnit *u, uint32_t *offset_ptr,
+ uint16_t attr, uint16_t form, unsigned indent = 0) const;
- /// Extracts a debug info entry, which is a child of a given compile unit,
+ /// Extracts a debug info entry, which is a child of a given unit,
/// starting at a given offset. If DIE can't be extracted, returns false and
/// doesn't change OffsetPtr.
- bool extractFast(const DWARFCompileUnit *CU, const uint8_t *FixedFormSizes,
- uint32_t *OffsetPtr);
-
- /// Extract a debug info entry for a given compile unit from the
- /// .debug_info and .debug_abbrev data starting at the given offset.
- /// If compile unit can't be parsed, returns false and doesn't change
- /// OffsetPtr.
- bool extract(const DWARFCompileUnit *CU, uint32_t *OffsetPtr);
+ bool extractFast(const DWARFUnit *U, uint32_t *OffsetPtr);
uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; }
bool isNULL() const { return AbbrevDecl == 0; }
@@ -120,54 +113,54 @@ public:
return AbbrevDecl;
}
- uint32_t getAttributeValue(const DWARFCompileUnit *cu,
- const uint16_t attr, DWARFFormValue &formValue,
- uint32_t *end_attr_offset_ptr = 0) const;
+ bool getAttributeValue(const DWARFUnit *U, const uint16_t Attr,
+ DWARFFormValue &FormValue) const;
+
+ const char *getAttributeValueAsString(const DWARFUnit *U, const uint16_t Attr,
+ const char *FailValue) const;
- const char* getAttributeValueAsString(const DWARFCompileUnit* cu,
- const uint16_t attr,
- const char *fail_value) const;
+ uint64_t getAttributeValueAsAddress(const DWARFUnit *U, const uint16_t Attr,
+ uint64_t FailValue) const;
- uint64_t getAttributeValueAsUnsigned(const DWARFCompileUnit *cu,
- const uint16_t attr,
- uint64_t fail_value) const;
+ uint64_t getAttributeValueAsUnsignedConstant(const DWARFUnit *U,
+ const uint16_t Attr,
+ uint64_t FailValue) const;
- uint64_t getAttributeValueAsReference(const DWARFCompileUnit *cu,
- const uint16_t attr,
- uint64_t fail_value) const;
+ uint64_t getAttributeValueAsReference(const DWARFUnit *U, const uint16_t Attr,
+ uint64_t FailValue) const;
- int64_t getAttributeValueAsSigned(const DWARFCompileUnit* cu,
- const uint16_t attr,
- int64_t fail_value) const;
+ uint64_t getAttributeValueAsSectionOffset(const DWARFUnit *U,
+ const uint16_t Attr,
+ uint64_t FailValue) const;
/// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU.
/// Returns true if both attributes are present.
- bool getLowAndHighPC(const DWARFCompileUnit *CU,
- uint64_t &LowPC, uint64_t &HighPC) const;
+ bool getLowAndHighPC(const DWARFUnit *U, uint64_t &LowPC,
+ uint64_t &HighPC) const;
- void buildAddressRangeTable(const DWARFCompileUnit *CU,
- DWARFDebugAranges *DebugAranges) const;
+ void buildAddressRangeTable(const DWARFUnit *U,
+ DWARFDebugAranges *DebugAranges,
+ uint32_t CUOffsetInAranges) const;
- bool addressRangeContainsAddress(const DWARFCompileUnit *CU,
+ bool addressRangeContainsAddress(const DWARFUnit *U,
const uint64_t Address) const;
/// If a DIE represents a subprogram (or inlined subroutine),
/// returns its mangled name (or short name, if mangled is missing).
/// This name may be fetched from specification or abstract origin
/// for this subprogram. Returns null if no name is found.
- const char* getSubroutineName(const DWARFCompileUnit *CU) const;
+ const char *getSubroutineName(const DWARFUnit *U) const;
/// Retrieves values of DW_AT_call_file, DW_AT_call_line and
/// DW_AT_call_column from DIE (or zeroes if they are missing).
- void getCallerFrame(const DWARFCompileUnit *CU, uint32_t &CallFile,
+ void getCallerFrame(const DWARFUnit *U, uint32_t &CallFile,
uint32_t &CallLine, uint32_t &CallColumn) const;
/// Get inlined chain for a given address, rooted at the current DIE.
/// Returns empty chain if address is not contained in address range
/// of current DIE.
DWARFDebugInfoEntryInlinedChain
- getInlinedChainForAddress(const DWARFCompileUnit *CU,
- const uint64_t Address) const;
+ getInlinedChainForAddress(const DWARFUnit *U, const uint64_t Address) const;
};
/// DWARFDebugInfoEntryInlinedChain - represents a chain of inlined_subroutine
@@ -176,9 +169,9 @@ public:
/// (except the last DIE) in this chain is contained in address
/// range for next DIE in the chain.
struct DWARFDebugInfoEntryInlinedChain {
- DWARFDebugInfoEntryInlinedChain() : CU(0) {}
+ DWARFDebugInfoEntryInlinedChain() : U(0) {}
SmallVector<DWARFDebugInfoEntryMinimal, 4> DIEs;
- const DWARFCompileUnit *CU;
+ const DWARFUnit *U;
};
}
diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp
index 192381c..13d09dd 100644
--- a/lib/DebugInfo/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARFDebugLine.cpp
@@ -211,7 +211,7 @@ DWARFDebugLine::parsePrologue(DataExtractor debug_line_data,
if (*offset_ptr != end_prologue_offset) {
fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should"
- " have ended at 0x%8.8x but it ended ad 0x%8.8x\n",
+ " have ended at 0x%8.8x but it ended at 0x%8.8x\n",
prologue_offset, end_prologue_offset, *offset_ptr);
return false;
}
diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp
index 1a1cf24..da71fb3 100644
--- a/lib/DebugInfo/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARFFormValue.cpp
@@ -10,6 +10,8 @@
#include "llvm/DebugInfo/DWARFFormValue.h"
#include "DWARFCompileUnit.h"
#include "DWARFContext.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
@@ -19,64 +21,114 @@ using namespace llvm;
using namespace dwarf;
namespace {
-template <uint8_t AddrSize, uint8_t RefAddrSize> struct FixedFormSizes {
- static const uint8_t sizes[];
-};
+uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) {
+ // FIXME: Support DWARF64.
+ return (Version == 2) ? AddrSize : 4;
}
template <uint8_t AddrSize, uint8_t RefAddrSize>
-const uint8_t FixedFormSizes<AddrSize, RefAddrSize>::sizes[] = {
- 0, // 0x00 unused
- AddrSize, // 0x01 DW_FORM_addr
- 0, // 0x02 unused
- 0, // 0x03 DW_FORM_block2
- 0, // 0x04 DW_FORM_block4
- 2, // 0x05 DW_FORM_data2
- 4, // 0x06 DW_FORM_data4
- 8, // 0x07 DW_FORM_data8
- 0, // 0x08 DW_FORM_string
- 0, // 0x09 DW_FORM_block
- 0, // 0x0a DW_FORM_block1
- 1, // 0x0b DW_FORM_data1
- 1, // 0x0c DW_FORM_flag
- 0, // 0x0d DW_FORM_sdata
- 4, // 0x0e DW_FORM_strp
- 0, // 0x0f DW_FORM_udata
- RefAddrSize, // 0x10 DW_FORM_ref_addr
- 1, // 0x11 DW_FORM_ref1
- 2, // 0x12 DW_FORM_ref2
- 4, // 0x13 DW_FORM_ref4
- 8, // 0x14 DW_FORM_ref8
- 0, // 0x15 DW_FORM_ref_udata
- 0, // 0x16 DW_FORM_indirect
- 4, // 0x17 DW_FORM_sec_offset
- 0, // 0x18 DW_FORM_exprloc
- 0, // 0x19 DW_FORM_flag_present
- 8, // 0x20 DW_FORM_ref_sig8
-};
-
-static uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) {
- // FIXME: Support DWARF64.
- return (Version == 2) ? AddrSize : 4;
+ArrayRef<uint8_t> makeFixedFormSizesArrayRef() {
+ static const uint8_t sizes[] = {
+ 0, // 0x00 unused
+ AddrSize, // 0x01 DW_FORM_addr
+ 0, // 0x02 unused
+ 0, // 0x03 DW_FORM_block2
+ 0, // 0x04 DW_FORM_block4
+ 2, // 0x05 DW_FORM_data2
+ 4, // 0x06 DW_FORM_data4
+ 8, // 0x07 DW_FORM_data8
+ 0, // 0x08 DW_FORM_string
+ 0, // 0x09 DW_FORM_block
+ 0, // 0x0a DW_FORM_block1
+ 1, // 0x0b DW_FORM_data1
+ 1, // 0x0c DW_FORM_flag
+ 0, // 0x0d DW_FORM_sdata
+ 4, // 0x0e DW_FORM_strp
+ 0, // 0x0f DW_FORM_udata
+ RefAddrSize, // 0x10 DW_FORM_ref_addr
+ 1, // 0x11 DW_FORM_ref1
+ 2, // 0x12 DW_FORM_ref2
+ 4, // 0x13 DW_FORM_ref4
+ 8, // 0x14 DW_FORM_ref8
+ 0, // 0x15 DW_FORM_ref_udata
+ 0, // 0x16 DW_FORM_indirect
+ 4, // 0x17 DW_FORM_sec_offset
+ 0, // 0x18 DW_FORM_exprloc
+ 0, // 0x19 DW_FORM_flag_present
+ };
+ return makeArrayRef(sizes);
+}
}
-const uint8_t *
-DWARFFormValue::getFixedFormSizes(uint8_t AddrSize, uint16_t Version) {
+ArrayRef<uint8_t> DWARFFormValue::getFixedFormSizes(uint8_t AddrSize,
+ uint16_t Version) {
uint8_t RefAddrSize = getRefAddrSize(AddrSize, Version);
if (AddrSize == 4 && RefAddrSize == 4)
- return FixedFormSizes<4, 4>::sizes;
+ return makeFixedFormSizesArrayRef<4, 4>();
if (AddrSize == 4 && RefAddrSize == 8)
- return FixedFormSizes<4, 8>::sizes;
+ return makeFixedFormSizesArrayRef<4, 8>();
if (AddrSize == 8 && RefAddrSize == 4)
- return FixedFormSizes<8, 4>::sizes;
+ return makeFixedFormSizesArrayRef<8, 4>();
if (AddrSize == 8 && RefAddrSize == 8)
- return FixedFormSizes<8, 8>::sizes;
- return 0;
+ return makeFixedFormSizesArrayRef<8, 8>();
+ return None;
}
-bool
-DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
- const DWARFCompileUnit *cu) {
+static const DWARFFormValue::FormClass DWARF4FormClasses[] = {
+ DWARFFormValue::FC_Unknown, // 0x0
+ DWARFFormValue::FC_Address, // 0x01 DW_FORM_addr
+ DWARFFormValue::FC_Unknown, // 0x02 unused
+ DWARFFormValue::FC_Block, // 0x03 DW_FORM_block2
+ DWARFFormValue::FC_Block, // 0x04 DW_FORM_block4
+ DWARFFormValue::FC_Constant, // 0x05 DW_FORM_data2
+ // --- These can be FC_SectionOffset in DWARF3 and below:
+ DWARFFormValue::FC_Constant, // 0x06 DW_FORM_data4
+ DWARFFormValue::FC_Constant, // 0x07 DW_FORM_data8
+ // ---
+ DWARFFormValue::FC_String, // 0x08 DW_FORM_string
+ DWARFFormValue::FC_Block, // 0x09 DW_FORM_block
+ DWARFFormValue::FC_Block, // 0x0a DW_FORM_block1
+ DWARFFormValue::FC_Constant, // 0x0b DW_FORM_data1
+ DWARFFormValue::FC_Flag, // 0x0c DW_FORM_flag
+ DWARFFormValue::FC_Constant, // 0x0d DW_FORM_sdata
+ DWARFFormValue::FC_String, // 0x0e DW_FORM_strp
+ DWARFFormValue::FC_Constant, // 0x0f DW_FORM_udata
+ DWARFFormValue::FC_Reference, // 0x10 DW_FORM_ref_addr
+ DWARFFormValue::FC_Reference, // 0x11 DW_FORM_ref1
+ DWARFFormValue::FC_Reference, // 0x12 DW_FORM_ref2
+ DWARFFormValue::FC_Reference, // 0x13 DW_FORM_ref4
+ DWARFFormValue::FC_Reference, // 0x14 DW_FORM_ref8
+ DWARFFormValue::FC_Reference, // 0x15 DW_FORM_ref_udata
+ DWARFFormValue::FC_Indirect, // 0x16 DW_FORM_indirect
+ DWARFFormValue::FC_SectionOffset, // 0x17 DW_FORM_sec_offset
+ DWARFFormValue::FC_Exprloc, // 0x18 DW_FORM_exprloc
+ DWARFFormValue::FC_Flag, // 0x19 DW_FORM_flag_present
+};
+
+bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
+ // First, check DWARF4 form classes.
+ if (Form < ArrayRef<FormClass>(DWARF4FormClasses).size() &&
+ DWARF4FormClasses[Form] == FC)
+ return true;
+ // Check DW_FORM_ref_sig8 from DWARF4.
+ if (Form == DW_FORM_ref_sig8)
+ return (FC == FC_Reference);
+ // Check for some DWARF5 forms.
+ if (Form == DW_FORM_GNU_addr_index)
+ return (FC == FC_Address);
+ if (Form == DW_FORM_GNU_str_index)
+ return (FC == FC_String);
+ // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section offset.
+ // Don't check for DWARF version here, as some producers may still do this
+ // by mistake.
+ if ((Form == DW_FORM_data4 || Form == DW_FORM_data8) &&
+ FC == FC_SectionOffset)
+ return true;
+ return false;
+}
+
+bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
+ const DWARFUnit *cu) {
bool indirect = false;
bool is_block = false;
Value.data = NULL;
@@ -156,10 +208,6 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
break;
case DW_FORM_string:
Value.cstr = data.getCStr(offset_ptr);
- // Set the string value to also be the data for inlined cstr form
- // values only so we can tell the differnence between DW_FORM_string
- // and DW_FORM_strp form values
- Value.data = (const uint8_t*)Value.cstr;
break;
case DW_FORM_indirect:
Form = data.getULEB128(offset_ptr);
@@ -183,8 +231,6 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
Value.uval = data.getU64(offset_ptr);
break;
case DW_FORM_GNU_addr_index:
- Value.uval = data.getULEB128(offset_ptr);
- break;
case DW_FORM_GNU_str_index:
Value.uval = data.getULEB128(offset_ptr);
break;
@@ -207,13 +253,13 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
bool
DWARFFormValue::skipValue(DataExtractor debug_info_data, uint32_t* offset_ptr,
- const DWARFCompileUnit *cu) const {
+ const DWARFUnit *cu) const {
return DWARFFormValue::skipValue(Form, debug_info_data, offset_ptr, cu);
}
bool
DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
- uint32_t *offset_ptr, const DWARFCompileUnit *cu) {
+ uint32_t *offset_ptr, const DWARFUnit *cu) {
bool indirect = false;
do {
switch (form) {
@@ -313,21 +359,20 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
}
void
-DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
+DWARFFormValue::dump(raw_ostream &OS, const DWARFUnit *cu) const {
DataExtractor debug_str_data(cu->getStringSection(), true, 0);
DataExtractor debug_str_offset_data(cu->getStringOffsetSection(), true, 0);
- uint64_t uvalue = getUnsigned();
+ uint64_t uvalue = Value.uval;
bool cu_relative_offset = false;
switch (Form) {
case DW_FORM_addr: OS << format("0x%016" PRIx64, uvalue); break;
case DW_FORM_GNU_addr_index: {
- StringRef AddrOffsetSec = cu->getAddrOffsetSection();
OS << format(" indexed (%8.8x) address = ", (uint32_t)uvalue);
- if (AddrOffsetSec.size() != 0) {
- DataExtractor DA(AddrOffsetSec, true, cu->getAddressByteSize());
- OS << format("0x%016" PRIx64, getIndirectAddress(&DA, cu));
- } else
+ uint64_t Address;
+ if (cu->getAddrOffsetSectionItem(uvalue, Address))
+ OS << format("0x%016" PRIx64, Address);
+ else
OS << "<no .debug_addr section>";
break;
}
@@ -340,7 +385,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
case DW_FORM_data8: OS << format("0x%016" PRIx64, uvalue); break;
case DW_FORM_string:
OS << '"';
- OS.write_escaped(getAsCString(NULL));
+ OS.write_escaped(Value.cstr);
OS << '"';
break;
case DW_FORM_exprloc:
@@ -372,25 +417,24 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
}
break;
- case DW_FORM_sdata: OS << getSigned(); break;
- case DW_FORM_udata: OS << getUnsigned(); break;
+ case DW_FORM_sdata: OS << Value.sval; break;
+ case DW_FORM_udata: OS << Value.uval; break;
case DW_FORM_strp: {
OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)uvalue);
- const char* dbg_str = getAsCString(&debug_str_data);
- if (dbg_str) {
+ Optional<const char *> DbgStr = getAsCString(cu);
+ if (DbgStr.hasValue()) {
OS << '"';
- OS.write_escaped(dbg_str);
+ OS.write_escaped(DbgStr.getValue());
OS << '"';
}
break;
}
case DW_FORM_GNU_str_index: {
OS << format(" indexed (%8.8x) string = ", (uint32_t)uvalue);
- const char *dbg_str = getIndirectCString(&debug_str_data,
- &debug_str_offset_data);
- if (dbg_str) {
+ Optional<const char *> DbgStr = getAsCString(cu);
+ if (DbgStr.hasValue()) {
OS << '"';
- OS.write_escaped(dbg_str);
+ OS.write_escaped(DbgStr.getValue());
OS << '"';
}
break;
@@ -439,97 +483,67 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
OS << format(" => {0x%8.8" PRIx64 "}", uvalue + (cu ? cu->getOffset() : 0));
}
-const char*
-DWARFFormValue::getAsCString(const DataExtractor *debug_str_data_ptr) const {
- if (isInlinedCStr()) {
+Optional<const char *> DWARFFormValue::getAsCString(const DWARFUnit *U) const {
+ if (!isFormClass(FC_String))
+ return None;
+ if (Form == DW_FORM_string)
return Value.cstr;
- } else if (debug_str_data_ptr) {
- uint32_t offset = Value.uval;
- return debug_str_data_ptr->getCStr(&offset);
+ if (U == 0)
+ return None;
+ uint32_t Offset = Value.uval;
+ if (Form == DW_FORM_GNU_str_index) {
+ uint32_t StrOffset;
+ if (!U->getStringOffsetSectionItem(Offset, StrOffset))
+ return None;
+ Offset = StrOffset;
}
- return NULL;
-}
-
-const char*
-DWARFFormValue::getIndirectCString(const DataExtractor *DS,
- const DataExtractor *DSO) const {
- if (!DS || !DSO) return NULL;
-
- uint32_t offset = Value.uval * 4;
- uint32_t soffset = DSO->getU32(&offset);
- return DS->getCStr(&soffset);
-}
-
-uint64_t
-DWARFFormValue::getIndirectAddress(const DataExtractor *DA,
- const DWARFCompileUnit *cu) const {
- if (!DA) return 0;
-
- uint32_t offset = Value.uval * cu->getAddressByteSize();
- return DA->getAddress(&offset);
+ if (const char *Str = U->getStringExtractor().getCStr(&Offset)) {
+ return Str;
+ }
+ return None;
}
-uint64_t DWARFFormValue::getReference(const DWARFCompileUnit *cu) const {
- uint64_t die_offset = Value.uval;
- switch (Form) {
- case DW_FORM_ref1:
- case DW_FORM_ref2:
- case DW_FORM_ref4:
- case DW_FORM_ref8:
- case DW_FORM_ref_udata:
- die_offset += (cu ? cu->getOffset() : 0);
- break;
- default:
- break;
+Optional<uint64_t> DWARFFormValue::getAsAddress(const DWARFUnit *U) const {
+ if (!isFormClass(FC_Address))
+ return None;
+ if (Form == DW_FORM_GNU_addr_index) {
+ uint32_t Index = Value.uval;
+ uint64_t Result;
+ if (U == 0 || !U->getAddrOffsetSectionItem(Index, Result))
+ return None;
+ return Result;
}
-
- return die_offset;
+ return Value.uval;
}
-bool
-DWARFFormValue::resolveCompileUnitReferences(const DWARFCompileUnit *cu) {
+Optional<uint64_t> DWARFFormValue::getAsReference(const DWARFUnit *U) const {
+ if (!isFormClass(FC_Reference))
+ return None;
switch (Form) {
case DW_FORM_ref1:
case DW_FORM_ref2:
case DW_FORM_ref4:
case DW_FORM_ref8:
case DW_FORM_ref_udata:
- Value.uval += cu->getOffset();
- Form = DW_FORM_ref_addr;
- return true;
+ if (U == 0)
+ return None;
+ return Value.uval + U->getOffset();
+ case DW_FORM_ref_addr:
+ return Value.uval;
+ // FIXME: Add proper support for DW_FORM_ref_sig8
default:
- break;
+ return Value.uval;
}
- return false;
}
-const uint8_t *DWARFFormValue::BlockData() const {
- if (!isInlinedCStr())
- return Value.data;
- return NULL;
+Optional<uint64_t> DWARFFormValue::getAsSectionOffset() const {
+ if (!isFormClass(FC_SectionOffset))
+ return None;
+ return Value.uval;
}
-bool DWARFFormValue::isBlockForm(uint16_t form) {
- switch (form) {
- case DW_FORM_exprloc:
- case DW_FORM_block:
- case DW_FORM_block1:
- case DW_FORM_block2:
- case DW_FORM_block4:
- return true;
- }
- return false;
-}
-
-bool DWARFFormValue::isDataForm(uint16_t form) {
- switch (form) {
- case DW_FORM_sdata:
- case DW_FORM_udata:
- case DW_FORM_data1:
- case DW_FORM_data2:
- case DW_FORM_data4:
- case DW_FORM_data8:
- return true;
- }
- return false;
+Optional<uint64_t> DWARFFormValue::getAsUnsignedConstant() const {
+ if (!isFormClass(FC_Constant) || Form == DW_FORM_sdata)
+ return None;
+ return Value.uval;
}
diff --git a/lib/DebugInfo/DWARFTypeUnit.cpp b/lib/DebugInfo/DWARFTypeUnit.cpp
new file mode 100644
index 0000000..303bf70
--- /dev/null
+++ b/lib/DebugInfo/DWARFTypeUnit.cpp
@@ -0,0 +1,39 @@
+//===-- DWARFTypeUnit.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFTypeUnit.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+bool DWARFTypeUnit::extractImpl(DataExtractor debug_info,
+ uint32_t *offset_ptr) {
+ if (!DWARFUnit::extractImpl(debug_info, offset_ptr))
+ return false;
+ TypeHash = debug_info.getU64(offset_ptr);
+ TypeOffset = debug_info.getU32(offset_ptr);
+ return TypeOffset < getLength();
+}
+
+void DWARFTypeUnit::dump(raw_ostream &OS) {
+ OS << format("0x%08x", getOffset()) << ": Type Unit:"
+ << " length = " << format("0x%08x", getLength())
+ << " version = " << format("0x%04x", getVersion())
+ << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset())
+ << " addr_size = " << format("0x%02x", getAddressByteSize())
+ << " type_signature = " << format("0x%16" PRIx64, TypeHash)
+ << " type_offset = " << format("0x%04x", TypeOffset)
+ << " (next unit at " << format("0x%08x", getNextUnitOffset())
+ << ")\n";
+
+ const DWARFDebugInfoEntryMinimal *CU = getCompileUnitDIE(false);
+ assert(CU && "Null Compile Unit?");
+ CU->dump(OS, this, -1U);
+}
diff --git a/lib/DebugInfo/DWARFTypeUnit.h b/lib/DebugInfo/DWARFTypeUnit.h
new file mode 100644
index 0000000..7a0dab2
--- /dev/null
+++ b/lib/DebugInfo/DWARFTypeUnit.h
@@ -0,0 +1,35 @@
+//===-- DWARFTypeUnit.h -----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFTYPEUNIT_H
+#define LLVM_DEBUGINFO_DWARFTYPEUNIT_H
+
+#include "DWARFUnit.h"
+
+namespace llvm {
+
+class DWARFTypeUnit : public DWARFUnit {
+private:
+ uint64_t TypeHash;
+ uint32_t TypeOffset;
+public:
+ DWARFTypeUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS,
+ StringRef RS, StringRef SS, StringRef SOS, StringRef AOS,
+ const RelocAddrMap *M, bool LE)
+ : DWARFUnit(DA, IS, AS, RS, SS, SOS, AOS, M, LE) {}
+ uint32_t getSize() const LLVM_OVERRIDE { return DWARFUnit::getSize() + 12; }
+ void dump(raw_ostream &OS);
+protected:
+ bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) LLVM_OVERRIDE;
+};
+
+}
+
+#endif
+
diff --git a/lib/DebugInfo/DWARFUnit.cpp b/lib/DebugInfo/DWARFUnit.cpp
new file mode 100644
index 0000000..5167eb9
--- /dev/null
+++ b/lib/DebugInfo/DWARFUnit.cpp
@@ -0,0 +1,365 @@
+//===-- DWARFUnit.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFUnit.h"
+#include "DWARFContext.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Path.h"
+#include <cstdio>
+
+using namespace llvm;
+using namespace dwarf;
+
+DWARFUnit::DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS,
+ StringRef RS, StringRef SS, StringRef SOS, StringRef AOS,
+ const RelocAddrMap *M, bool LE)
+ : Abbrev(DA), InfoSection(IS), AbbrevSection(AS), RangeSection(RS),
+ StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS),
+ RelocMap(M), isLittleEndian(LE) {
+ clear();
+}
+
+DWARFUnit::~DWARFUnit() {
+}
+
+bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index,
+ uint64_t &Result) const {
+ uint32_t Offset = AddrOffsetSectionBase + Index * AddrSize;
+ if (AddrOffsetSection.size() < Offset + AddrSize)
+ return false;
+ DataExtractor DA(AddrOffsetSection, isLittleEndian, AddrSize);
+ Result = DA.getAddress(&Offset);
+ return true;
+}
+
+bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index,
+ uint32_t &Result) const {
+ // FIXME: string offset section entries are 8-byte for DWARF64.
+ const uint32_t ItemSize = 4;
+ uint32_t Offset = Index * ItemSize;
+ if (StringOffsetSection.size() < Offset + ItemSize)
+ return false;
+ DataExtractor DA(StringOffsetSection, isLittleEndian, 0);
+ Result = DA.getU32(&Offset);
+ return true;
+}
+
+bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
+ Length = debug_info.getU32(offset_ptr);
+ Version = debug_info.getU16(offset_ptr);
+ uint64_t abbrOffset = debug_info.getU32(offset_ptr);
+ AddrSize = debug_info.getU8(offset_ptr);
+
+ bool lengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1);
+ bool versionOK = DWARFContext::isSupportedVersion(Version);
+ bool abbrOffsetOK = AbbrevSection.size() > abbrOffset;
+ bool addrSizeOK = AddrSize == 4 || AddrSize == 8;
+
+ if (!lengthOK || !versionOK || !addrSizeOK || !abbrOffsetOK)
+ return false;
+
+ Abbrevs = Abbrev->getAbbreviationDeclarationSet(abbrOffset);
+ return true;
+}
+
+bool DWARFUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) {
+ clear();
+
+ Offset = *offset_ptr;
+
+ if (debug_info.isValidOffset(*offset_ptr)) {
+ if (extractImpl(debug_info, offset_ptr))
+ return true;
+
+ // reset the offset to where we tried to parse from if anything went wrong
+ *offset_ptr = Offset;
+ }
+
+ return false;
+}
+
+bool DWARFUnit::extractRangeList(uint32_t RangeListOffset,
+ DWARFDebugRangeList &RangeList) const {
+ // Require that compile unit is extracted.
+ assert(DieArray.size() > 0);
+ DataExtractor RangesData(RangeSection, isLittleEndian, AddrSize);
+ uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset;
+ return RangeList.extract(RangesData, &ActualRangeListOffset);
+}
+
+void DWARFUnit::clear() {
+ Offset = 0;
+ Length = 0;
+ Version = 0;
+ Abbrevs = 0;
+ AddrSize = 0;
+ BaseAddr = 0;
+ RangeSectionBase = 0;
+ AddrOffsetSectionBase = 0;
+ clearDIEs(false);
+ DWO.reset();
+}
+
+const char *DWARFUnit::getCompilationDir() {
+ extractDIEsIfNeeded(true);
+ if (DieArray.empty())
+ return 0;
+ return DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, 0);
+}
+
+uint64_t DWARFUnit::getDWOId() {
+ extractDIEsIfNeeded(true);
+ const uint64_t FailValue = -1ULL;
+ if (DieArray.empty())
+ return FailValue;
+ return DieArray[0]
+ .getAttributeValueAsUnsignedConstant(this, DW_AT_GNU_dwo_id, FailValue);
+}
+
+void DWARFUnit::setDIERelations() {
+ if (DieArray.empty())
+ return;
+ DWARFDebugInfoEntryMinimal *die_array_begin = &DieArray.front();
+ DWARFDebugInfoEntryMinimal *die_array_end = &DieArray.back();
+ DWARFDebugInfoEntryMinimal *curr_die;
+ // We purposely are skipping the last element in the array in the loop below
+ // so that we can always have a valid next item
+ for (curr_die = die_array_begin; curr_die < die_array_end; ++curr_die) {
+ // Since our loop doesn't include the last element, we can always
+ // safely access the next die in the array.
+ DWARFDebugInfoEntryMinimal *next_die = curr_die + 1;
+
+ const DWARFAbbreviationDeclaration *curr_die_abbrev =
+ curr_die->getAbbreviationDeclarationPtr();
+
+ if (curr_die_abbrev) {
+ // Normal DIE
+ if (curr_die_abbrev->hasChildren())
+ next_die->setParent(curr_die);
+ else
+ curr_die->setSibling(next_die);
+ } else {
+ // NULL DIE that terminates a sibling chain
+ DWARFDebugInfoEntryMinimal *parent = curr_die->getParent();
+ if (parent)
+ parent->setSibling(next_die);
+ }
+ }
+
+ // Since we skipped the last element, we need to fix it up!
+ if (die_array_begin < die_array_end)
+ curr_die->setParent(die_array_begin);
+}
+
+void DWARFUnit::extractDIEsToVector(
+ bool AppendCUDie, bool AppendNonCUDies,
+ std::vector<DWARFDebugInfoEntryMinimal> &Dies) const {
+ if (!AppendCUDie && !AppendNonCUDies)
+ return;
+
+ // Set the offset to that of the first DIE and calculate the start of the
+ // next compilation unit header.
+ uint32_t Offset = getFirstDIEOffset();
+ uint32_t NextCUOffset = getNextUnitOffset();
+ DWARFDebugInfoEntryMinimal DIE;
+ uint32_t Depth = 0;
+ bool IsCUDie = true;
+
+ while (Offset < NextCUOffset && DIE.extractFast(this, &Offset)) {
+ if (IsCUDie) {
+ if (AppendCUDie)
+ Dies.push_back(DIE);
+ if (!AppendNonCUDies)
+ break;
+ // The average bytes per DIE entry has been seen to be
+ // around 14-20 so let's pre-reserve the needed memory for
+ // our DIE entries accordingly.
+ Dies.reserve(Dies.size() + getDebugInfoSize() / 14);
+ IsCUDie = false;
+ } else {
+ Dies.push_back(DIE);
+ }
+
+ const DWARFAbbreviationDeclaration *AbbrDecl =
+ DIE.getAbbreviationDeclarationPtr();
+ if (AbbrDecl) {
+ // Normal DIE
+ if (AbbrDecl->hasChildren())
+ ++Depth;
+ } else {
+ // NULL DIE.
+ if (Depth > 0)
+ --Depth;
+ if (Depth == 0)
+ break; // We are done with this compile unit!
+ }
+ }
+
+ // Give a little bit of info if we encounter corrupt DWARF (our offset
+ // should always terminate at or before the start of the next compilation
+ // unit header).
+ if (Offset > NextCUOffset)
+ fprintf(stderr, "warning: DWARF compile unit extends beyond its "
+ "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), Offset);
+}
+
+size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
+ if ((CUDieOnly && DieArray.size() > 0) ||
+ DieArray.size() > 1)
+ return 0; // Already parsed.
+
+ bool HasCUDie = DieArray.size() > 0;
+ extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
+
+ if (DieArray.empty())
+ return 0;
+
+ // If CU DIE was just parsed, copy several attribute values from it.
+ if (!HasCUDie) {
+ uint64_t BaseAddr =
+ DieArray[0].getAttributeValueAsAddress(this, DW_AT_low_pc, -1ULL);
+ if (BaseAddr == -1ULL)
+ BaseAddr = DieArray[0].getAttributeValueAsAddress(this, DW_AT_entry_pc, 0);
+ setBaseAddress(BaseAddr);
+ AddrOffsetSectionBase = DieArray[0].getAttributeValueAsSectionOffset(
+ this, DW_AT_GNU_addr_base, 0);
+ RangeSectionBase = DieArray[0].getAttributeValueAsSectionOffset(
+ this, DW_AT_GNU_ranges_base, 0);
+ }
+
+ setDIERelations();
+ return DieArray.size();
+}
+
+DWARFUnit::DWOHolder::DWOHolder(object::ObjectFile *DWOFile)
+ : DWOFile(DWOFile),
+ DWOContext(cast<DWARFContext>(DIContext::getDWARFContext(DWOFile))),
+ DWOU(0) {
+ if (DWOContext->getNumDWOCompileUnits() > 0)
+ DWOU = DWOContext->getDWOCompileUnitAtIndex(0);
+}
+
+bool DWARFUnit::parseDWO() {
+ if (DWO.get() != 0)
+ return false;
+ extractDIEsIfNeeded(true);
+ if (DieArray.empty())
+ return false;
+ const char *DWOFileName =
+ DieArray[0].getAttributeValueAsString(this, DW_AT_GNU_dwo_name, 0);
+ if (DWOFileName == 0)
+ return false;
+ const char *CompilationDir =
+ DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, 0);
+ SmallString<16> AbsolutePath;
+ if (sys::path::is_relative(DWOFileName) && CompilationDir != 0) {
+ sys::path::append(AbsolutePath, CompilationDir);
+ }
+ sys::path::append(AbsolutePath, DWOFileName);
+ object::ObjectFile *DWOFile =
+ object::ObjectFile::createObjectFile(AbsolutePath);
+ if (!DWOFile)
+ return false;
+ // Reset DWOHolder.
+ DWO.reset(new DWOHolder(DWOFile));
+ DWARFUnit *DWOCU = DWO->getUnit();
+ // Verify that compile unit in .dwo file is valid.
+ if (DWOCU == 0 || DWOCU->getDWOId() != getDWOId()) {
+ DWO.reset();
+ return false;
+ }
+ // Share .debug_addr and .debug_ranges section with compile unit in .dwo
+ DWOCU->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase);
+ DWOCU->setRangesSection(RangeSection, RangeSectionBase);
+ return true;
+}
+
+void DWARFUnit::clearDIEs(bool KeepCUDie) {
+ if (DieArray.size() > (unsigned)KeepCUDie) {
+ // std::vectors never get any smaller when resized to a smaller size,
+ // or when clear() or erase() are called, the size will report that it
+ // is smaller, but the memory allocated remains intact (call capacity()
+ // to see this). So we need to create a temporary vector and swap the
+ // contents which will cause just the internal pointers to be swapped
+ // so that when temporary vector goes out of scope, it will destroy the
+ // contents.
+ std::vector<DWARFDebugInfoEntryMinimal> TmpArray;
+ DieArray.swap(TmpArray);
+ // Save at least the compile unit DIE
+ if (KeepCUDie)
+ DieArray.push_back(TmpArray.front());
+ }
+}
+
+void
+DWARFUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges,
+ bool clear_dies_if_already_not_parsed,
+ uint32_t CUOffsetInAranges) {
+ // This function is usually called if there in no .debug_aranges section
+ // in order to produce a compile unit level set of address ranges that
+ // is accurate. If the DIEs weren't parsed, then we don't want all dies for
+ // all compile units to stay loaded when they weren't needed. So we can end
+ // up parsing the DWARF and then throwing them all away to keep memory usage
+ // down.
+ const bool clear_dies = extractDIEsIfNeeded(false) > 1 &&
+ clear_dies_if_already_not_parsed;
+ DieArray[0].buildAddressRangeTable(this, debug_aranges, CUOffsetInAranges);
+ bool DWOCreated = parseDWO();
+ if (DWO.get()) {
+ // If there is a .dwo file for this compile unit, then skeleton CU DIE
+ // doesn't have children, and we should instead build address range table
+ // from DIEs in the .debug_info.dwo section of .dwo file.
+ DWO->getUnit()->buildAddressRangeTable(
+ debug_aranges, clear_dies_if_already_not_parsed, CUOffsetInAranges);
+ }
+ if (DWOCreated && clear_dies_if_already_not_parsed)
+ DWO.reset();
+
+ // Keep memory down by clearing DIEs if this generate function
+ // caused them to be parsed.
+ if (clear_dies)
+ clearDIEs(true);
+}
+
+const DWARFDebugInfoEntryMinimal *
+DWARFUnit::getSubprogramForAddress(uint64_t Address) {
+ extractDIEsIfNeeded(false);
+ for (size_t i = 0, n = DieArray.size(); i != n; i++)
+ if (DieArray[i].isSubprogramDIE() &&
+ DieArray[i].addressRangeContainsAddress(this, Address)) {
+ return &DieArray[i];
+ }
+ return 0;
+}
+
+DWARFDebugInfoEntryInlinedChain
+DWARFUnit::getInlinedChainForAddress(uint64_t Address) {
+ // First, find a subprogram that contains the given address (the root
+ // of inlined chain).
+ const DWARFUnit *ChainCU = 0;
+ const DWARFDebugInfoEntryMinimal *SubprogramDIE =
+ getSubprogramForAddress(Address);
+ if (SubprogramDIE) {
+ ChainCU = this;
+ } else {
+ // Try to look for subprogram DIEs in the DWO file.
+ parseDWO();
+ if (DWO.get()) {
+ SubprogramDIE = DWO->getUnit()->getSubprogramForAddress(Address);
+ if (SubprogramDIE)
+ ChainCU = DWO->getUnit();
+ }
+ }
+
+ // Get inlined chain rooted at this subprogram DIE.
+ if (!SubprogramDIE)
+ return DWARFDebugInfoEntryInlinedChain();
+ return SubprogramDIE->getInlinedChainForAddress(ChainCU, Address);
+}
diff --git a/lib/DebugInfo/DWARFUnit.h b/lib/DebugInfo/DWARFUnit.h
new file mode 100644
index 0000000..bd768a6
--- /dev/null
+++ b/lib/DebugInfo/DWARFUnit.h
@@ -0,0 +1,168 @@
+//===-- DWARFUnit.h ---------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFUNIT_H
+#define LLVM_DEBUGINFO_DWARFUNIT_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "DWARFDebugAbbrev.h"
+#include "DWARFDebugInfoEntry.h"
+#include "DWARFDebugRangeList.h"
+#include "DWARFRelocMap.h"
+#include <vector>
+
+namespace llvm {
+
+namespace object {
+class ObjectFile;
+}
+
+class DWARFDebugAbbrev;
+class StringRef;
+class raw_ostream;
+
+class DWARFUnit {
+ const DWARFDebugAbbrev *Abbrev;
+ StringRef InfoSection;
+ StringRef AbbrevSection;
+ StringRef RangeSection;
+ uint32_t RangeSectionBase;
+ StringRef StringSection;
+ StringRef StringOffsetSection;
+ StringRef AddrOffsetSection;
+ uint32_t AddrOffsetSectionBase;
+ const RelocAddrMap *RelocMap;
+ bool isLittleEndian;
+
+ uint32_t Offset;
+ uint32_t Length;
+ uint16_t Version;
+ const DWARFAbbreviationDeclarationSet *Abbrevs;
+ uint8_t AddrSize;
+ uint64_t BaseAddr;
+ // The compile unit debug information entry items.
+ std::vector<DWARFDebugInfoEntryMinimal> DieArray;
+
+ class DWOHolder {
+ OwningPtr<object::ObjectFile> DWOFile;
+ OwningPtr<DWARFContext> DWOContext;
+ DWARFUnit *DWOU;
+ public:
+ DWOHolder(object::ObjectFile *DWOFile);
+ DWARFUnit *getUnit() const { return DWOU; }
+ };
+ OwningPtr<DWOHolder> DWO;
+
+protected:
+ virtual bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr);
+
+public:
+
+ DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS,
+ StringRef RS, StringRef SS, StringRef SOS, StringRef AOS,
+ const RelocAddrMap *M, bool LE);
+
+ virtual ~DWARFUnit();
+
+ StringRef getStringSection() const { return StringSection; }
+ StringRef getStringOffsetSection() const { return StringOffsetSection; }
+ void setAddrOffsetSection(StringRef AOS, uint32_t Base) {
+ AddrOffsetSection = AOS;
+ AddrOffsetSectionBase = Base;
+ }
+ void setRangesSection(StringRef RS, uint32_t Base) {
+ RangeSection = RS;
+ RangeSectionBase = Base;
+ }
+
+ bool getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const;
+ // FIXME: Result should be uint64_t in DWARF64.
+ bool getStringOffsetSectionItem(uint32_t Index, uint32_t &Result) const;
+
+ DataExtractor getDebugInfoExtractor() const {
+ return DataExtractor(InfoSection, isLittleEndian, AddrSize);
+ }
+ DataExtractor getStringExtractor() const {
+ return DataExtractor(StringSection, false, 0);
+ }
+
+ const RelocAddrMap *getRelocMap() const { return RelocMap; }
+
+ bool extract(DataExtractor debug_info, uint32_t* offset_ptr);
+
+ /// extractRangeList - extracts the range list referenced by this compile
+ /// unit from .debug_ranges section. Returns true on success.
+ /// Requires that compile unit is already extracted.
+ bool extractRangeList(uint32_t RangeListOffset,
+ DWARFDebugRangeList &RangeList) const;
+ void clear();
+ uint32_t getOffset() const { return Offset; }
+ /// Size in bytes of the compile unit header.
+ virtual uint32_t getSize() const { return 11; }
+ uint32_t getFirstDIEOffset() const { return Offset + getSize(); }
+ uint32_t getNextUnitOffset() const { return Offset + Length + 4; }
+ /// Size in bytes of the .debug_info data associated with this compile unit.
+ size_t getDebugInfoSize() const { return Length + 4 - getSize(); }
+ uint32_t getLength() const { return Length; }
+ uint16_t getVersion() const { return Version; }
+ const DWARFAbbreviationDeclarationSet *getAbbreviations() const {
+ return Abbrevs;
+ }
+ uint8_t getAddressByteSize() const { return AddrSize; }
+ uint64_t getBaseAddress() const { return BaseAddr; }
+
+ void setBaseAddress(uint64_t base_addr) {
+ BaseAddr = base_addr;
+ }
+
+ const DWARFDebugInfoEntryMinimal *
+ getCompileUnitDIE(bool extract_cu_die_only = true) {
+ extractDIEsIfNeeded(extract_cu_die_only);
+ return DieArray.empty() ? NULL : &DieArray[0];
+ }
+
+ const char *getCompilationDir();
+ uint64_t getDWOId();
+
+ void buildAddressRangeTable(DWARFDebugAranges *debug_aranges,
+ bool clear_dies_if_already_not_parsed,
+ uint32_t CUOffsetInAranges);
+
+ /// getInlinedChainForAddress - fetches inlined chain for a given address.
+ /// Returns empty chain if there is no subprogram containing address. The
+ /// chain is valid as long as parsed compile unit DIEs are not cleared.
+ DWARFDebugInfoEntryInlinedChain getInlinedChainForAddress(uint64_t Address);
+
+private:
+ /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it
+ /// hasn't already been done. Returns the number of DIEs parsed at this call.
+ size_t extractDIEsIfNeeded(bool CUDieOnly);
+ /// extractDIEsToVector - Appends all parsed DIEs to a vector.
+ void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
+ std::vector<DWARFDebugInfoEntryMinimal> &DIEs) const;
+ /// setDIERelations - We read in all of the DIE entries into our flat list
+ /// of DIE entries and now we need to go back through all of them and set the
+ /// parent, sibling and child pointers for quick DIE navigation.
+ void setDIERelations();
+ /// clearDIEs - Clear parsed DIEs to keep memory usage low.
+ void clearDIEs(bool KeepCUDie);
+
+ /// parseDWO - Parses .dwo file for current compile unit. Returns true if
+ /// it was actually constructed.
+ bool parseDWO();
+
+ /// getSubprogramForAddress - Returns subprogram DIE with address range
+ /// encompassing the provided address. The pointer is alive as long as parsed
+ /// compile unit DIEs are not cleared.
+ const DWARFDebugInfoEntryMinimal *getSubprogramForAddress(uint64_t Address);
+};
+
+}
+
+#endif
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index c463e9f..2a610d5 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "jit"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ExecutionEngine/GenericValue.h"
@@ -39,6 +40,11 @@ using namespace llvm;
STATISTIC(NumInitBytes, "Number of bytes of global vars initialized");
STATISTIC(NumGlobals , "Number of global vars initialized");
+// Pin the vtable to this file.
+void ObjectCache::anchor() {}
+void ObjectBuffer::anchor() {}
+void ObjectBufferStream::anchor() {}
+
ExecutionEngine *(*ExecutionEngine::JITCtor)(
Module *M,
std::string *ErrorStr,
@@ -56,9 +62,7 @@ ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
ExecutionEngine::ExecutionEngine(Module *M)
: EEState(*this),
- LazyFunctionCreator(0),
- ExceptionTableRegister(0),
- ExceptionTableDeregister(0) {
+ LazyFunctionCreator(0) {
CompilingLazily = false;
GVCompilationDisabled = false;
SymbolSearchingDisabled = false;
@@ -72,16 +76,6 @@ ExecutionEngine::~ExecutionEngine() {
delete Modules[i];
}
-void ExecutionEngine::DeregisterAllTables() {
- if (ExceptionTableDeregister) {
- DenseMap<const Function*, void*>::iterator it = AllExceptionTables.begin();
- DenseMap<const Function*, void*>::iterator ite = AllExceptionTables.end();
- for (; it != ite; ++it)
- ExceptionTableDeregister(it->second);
- AllExceptionTables.clear();
- }
-}
-
namespace {
/// \brief Helper class which uses a value handler to automatically deletes the
/// memory block when the GlobalVariable is destroyed.
@@ -556,6 +550,24 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
// with the correct bit width.
Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0);
break;
+ case Type::StructTyID: {
+ // if the whole struct is 'undef' just reserve memory for the value.
+ if(StructType *STy = dyn_cast<StructType>(C->getType())) {
+ unsigned int elemNum = STy->getNumElements();
+ Result.AggregateVal.resize(elemNum);
+ for (unsigned int i = 0; i < elemNum; ++i) {
+ Type *ElemTy = STy->getElementType(i);
+ if (ElemTy->isIntegerTy())
+ Result.AggregateVal[i].IntVal =
+ APInt(ElemTy->getPrimitiveSizeInBits(), 0);
+ else if (ElemTy->isAggregateType()) {
+ const Constant *ElemUndef = UndefValue::get(ElemTy);
+ Result.AggregateVal[i] = getConstantValue(ElemUndef);
+ }
+ }
+ }
+ }
+ break;
case Type::VectorTyID:
// if the whole vector is 'undef' just reserve memory for the value.
const VectorType* VTy = dyn_cast<VectorType>(C->getType());
@@ -564,7 +576,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
Result.AggregateVal.resize(elemNum);
if (ElemTy->isIntegerTy())
for (unsigned int i = 0; i < elemNum; ++i)
- Result.AggregateVal[i].IntVal =
+ Result.AggregateVal[i].IntVal =
APInt(ElemTy->getPrimitiveSizeInBits(), 0);
break;
}
@@ -1283,6 +1295,10 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
if (GA == 0) {
// If it's not already specified, allocate memory for the global.
GA = getMemoryForGV(GV);
+
+ // If we failed to allocate memory for this global, return.
+ if (GA == 0) return;
+
addGlobalMapping(GV, GA);
}
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 88e73bf..2d34eea 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -339,14 +339,10 @@ void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) {
namespace {
struct SimpleBindingMMFunctions {
- uint8_t *(*AllocateCodeSection)(void *Opaque,
- uintptr_t Size, unsigned Alignment,
- unsigned SectionID);
- uint8_t *(*AllocateDataSection)(void *Opaque,
- uintptr_t Size, unsigned Alignment,
- unsigned SectionID, LLVMBool IsReadOnly);
- LLVMBool (*FinalizeMemory)(void *Opaque, char **ErrMsg);
- void (*Destroy)(void *Opaque);
+ LLVMMemoryManagerAllocateCodeSectionCallback AllocateCodeSection;
+ LLVMMemoryManagerAllocateDataSectionCallback AllocateDataSection;
+ LLVMMemoryManagerFinalizeMemoryCallback FinalizeMemory;
+ LLVMMemoryManagerDestroyCallback Destroy;
};
class SimpleBindingMemoryManager : public RTDyldMemoryManager {
@@ -355,12 +351,13 @@ public:
void *Opaque);
virtual ~SimpleBindingMemoryManager();
- virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID);
+ virtual uint8_t *allocateCodeSection(
+ uintptr_t Size, unsigned Alignment, unsigned SectionID,
+ StringRef SectionName);
- virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID,
- bool isReadOnly);
+ virtual uint8_t *allocateDataSection(
+ uintptr_t Size, unsigned Alignment, unsigned SectionID,
+ StringRef SectionName, bool isReadOnly);
virtual bool finalizeMemory(std::string *ErrMsg);
@@ -388,13 +385,17 @@ SimpleBindingMemoryManager::~SimpleBindingMemoryManager() {
}
uint8_t *SimpleBindingMemoryManager::allocateCodeSection(
- uintptr_t Size, unsigned Alignment, unsigned SectionID) {
- return Functions.AllocateCodeSection(Opaque, Size, Alignment, SectionID);
+ uintptr_t Size, unsigned Alignment, unsigned SectionID,
+ StringRef SectionName) {
+ return Functions.AllocateCodeSection(Opaque, Size, Alignment, SectionID,
+ SectionName.str().c_str());
}
uint8_t *SimpleBindingMemoryManager::allocateDataSection(
- uintptr_t Size, unsigned Alignment, unsigned SectionID, bool isReadOnly) {
+ uintptr_t Size, unsigned Alignment, unsigned SectionID,
+ StringRef SectionName, bool isReadOnly) {
return Functions.AllocateDataSection(Opaque, Size, Alignment, SectionID,
+ SectionName.str().c_str(),
isReadOnly);
}
@@ -415,14 +416,10 @@ bool SimpleBindingMemoryManager::finalizeMemory(std::string *ErrMsg) {
LLVMMCJITMemoryManagerRef LLVMCreateSimpleMCJITMemoryManager(
void *Opaque,
- uint8_t *(*AllocateCodeSection)(void *Opaque,
- uintptr_t Size, unsigned Alignment,
- unsigned SectionID),
- uint8_t *(*AllocateDataSection)(void *Opaque,
- uintptr_t Size, unsigned Alignment,
- unsigned SectionID, LLVMBool IsReadOnly),
- LLVMBool (*FinalizeMemory)(void *Opaque, char **ErrMsg),
- void (*Destroy)(void *Opaque)) {
+ LLVMMemoryManagerAllocateCodeSectionCallback AllocateCodeSection,
+ LLVMMemoryManagerAllocateDataSectionCallback AllocateDataSection,
+ LLVMMemoryManagerFinalizeMemoryCallback FinalizeMemory,
+ LLVMMemoryManagerDestroyCallback Destroy) {
if (!AllocateCodeSection || !AllocateDataSection || !FinalizeMemory ||
!Destroy)
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
index 3d9ff53..777d0f1 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
@@ -61,7 +61,7 @@ public:
GetNewMethodIDFunc(GetNewMethodIDImpl) {
}
- // Sends an event anncouncing that a function has been emitted
+ // Sends an event announcing that a function has been emitted
// return values are event-specific. See Intel documentation for details.
int iJIT_NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) {
if (!NotifyEventFunc)
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index fc3d579..5de0659 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -786,20 +786,31 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
}
static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2,
- GenericValue Src3) {
- return Src1.IntVal == 0 ? Src3 : Src2;
+ GenericValue Src3, const Type *Ty) {
+ GenericValue Dest;
+ if(Ty->isVectorTy()) {
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+ assert(Src2.AggregateVal.size() == Src3.AggregateVal.size());
+ Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+ for (size_t i = 0; i < Src1.AggregateVal.size(); ++i)
+ Dest.AggregateVal[i] = (Src1.AggregateVal[i].IntVal == 0) ?
+ Src3.AggregateVal[i] : Src2.AggregateVal[i];
+ } else {
+ Dest = (Src1.IntVal == 0) ? Src3 : Src2;
+ }
+ return Dest;
}
void Interpreter::visitSelectInst(SelectInst &I) {
ExecutionContext &SF = ECStack.back();
+ const Type * Ty = I.getOperand(0)->getType();
GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
- GenericValue R = executeSelectInst(Src1, Src2, Src3);
+ GenericValue R = executeSelectInst(Src1, Src2, Src3, Ty);
SetValue(&I, R, SF);
}
-
//===----------------------------------------------------------------------===//
// Terminator Instruction Implementations
//===----------------------------------------------------------------------===//
@@ -887,40 +898,11 @@ void Interpreter::visitSwitchInst(SwitchInst &I) {
// Check to see if any of the cases match...
BasicBlock *Dest = 0;
for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) {
- IntegersSubset& Case = i.getCaseValueEx();
- if (Case.isSingleNumber()) {
- // FIXME: Currently work with ConstantInt based numbers.
- const ConstantInt *CI = Case.getSingleNumber(0).toConstantInt();
- GenericValue Val = getOperandValue(const_cast<ConstantInt*>(CI), SF);
- if (executeICMP_EQ(Val, CondVal, ElTy).IntVal != 0) {
- Dest = cast<BasicBlock>(i.getCaseSuccessor());
- break;
- }
+ GenericValue CaseVal = getOperandValue(i.getCaseValue(), SF);
+ if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) {
+ Dest = cast<BasicBlock>(i.getCaseSuccessor());
+ break;
}
- if (Case.isSingleNumbersOnly()) {
- for (unsigned n = 0, en = Case.getNumItems(); n != en; ++n) {
- // FIXME: Currently work with ConstantInt based numbers.
- const ConstantInt *CI = Case.getSingleNumber(n).toConstantInt();
- GenericValue Val = getOperandValue(const_cast<ConstantInt*>(CI), SF);
- if (executeICMP_EQ(Val, CondVal, ElTy).IntVal != 0) {
- Dest = cast<BasicBlock>(i.getCaseSuccessor());
- break;
- }
- }
- } else
- for (unsigned n = 0, en = Case.getNumItems(); n != en; ++n) {
- IntegersSubset::Range r = Case.getItem(n);
- // FIXME: Currently work with ConstantInt based numbers.
- const ConstantInt *LowCI = r.getLow().toConstantInt();
- const ConstantInt *HighCI = r.getHigh().toConstantInt();
- GenericValue Low = getOperandValue(const_cast<ConstantInt*>(LowCI), SF);
- GenericValue High = getOperandValue(const_cast<ConstantInt*>(HighCI), SF);
- if (executeICMP_ULE(Low, CondVal, ElTy).IntVal != 0 &&
- executeICMP_ULE(CondVal, High, ElTy).IntVal != 0) {
- Dest = cast<BasicBlock>(i.getCaseSuccessor());
- break;
- }
- }
}
if (!Dest) Dest = I.getDefaultDest(); // No cases matched: use default
SwitchToNewBasicBlock(Dest, SF);
@@ -1793,10 +1775,204 @@ void Interpreter::visitExtractElementInst(ExtractElementInst &I) {
SetValue(&I, Dest, SF);
}
+void Interpreter::visitInsertElementInst(InsertElementInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ Type *Ty = I.getType();
+
+ if(!(Ty->isVectorTy()) )
+ llvm_unreachable("Unhandled dest type for insertelement instruction");
+
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
+ GenericValue Dest;
+
+ Type *TyContained = Ty->getContainedType(0);
+
+ const unsigned indx = unsigned(Src3.IntVal.getZExtValue());
+ Dest.AggregateVal = Src1.AggregateVal;
+
+ if(Src1.AggregateVal.size() <= indx)
+ llvm_unreachable("Invalid index in insertelement instruction");
+ switch (TyContained->getTypeID()) {
+ default:
+ llvm_unreachable("Unhandled dest type for insertelement instruction");
+ case Type::IntegerTyID:
+ Dest.AggregateVal[indx].IntVal = Src2.IntVal;
+ break;
+ case Type::FloatTyID:
+ Dest.AggregateVal[indx].FloatVal = Src2.FloatVal;
+ break;
+ case Type::DoubleTyID:
+ Dest.AggregateVal[indx].DoubleVal = Src2.DoubleVal;
+ break;
+ }
+ SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitShuffleVectorInst(ShuffleVectorInst &I){
+ ExecutionContext &SF = ECStack.back();
+
+ Type *Ty = I.getType();
+ if(!(Ty->isVectorTy()))
+ llvm_unreachable("Unhandled dest type for shufflevector instruction");
+
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
+ GenericValue Dest;
+
+ // There is no need to check types of src1 and src2, because the compiled
+ // bytecode can't contain different types for src1 and src2 for a
+ // shufflevector instruction.
+
+ Type *TyContained = Ty->getContainedType(0);
+ unsigned src1Size = (unsigned)Src1.AggregateVal.size();
+ unsigned src2Size = (unsigned)Src2.AggregateVal.size();
+ unsigned src3Size = (unsigned)Src3.AggregateVal.size();
+
+ Dest.AggregateVal.resize(src3Size);
+
+ switch (TyContained->getTypeID()) {
+ default:
+ llvm_unreachable("Unhandled dest type for insertelement instruction");
+ break;
+ case Type::IntegerTyID:
+ for( unsigned i=0; i<src3Size; i++) {
+ unsigned j = Src3.AggregateVal[i].IntVal.getZExtValue();
+ if(j < src1Size)
+ Dest.AggregateVal[i].IntVal = Src1.AggregateVal[j].IntVal;
+ else if(j < src1Size + src2Size)
+ Dest.AggregateVal[i].IntVal = Src2.AggregateVal[j-src1Size].IntVal;
+ else
+ // The selector may not be greater than sum of lengths of first and
+ // second operands and llasm should not allow situation like
+ // %tmp = shufflevector <2 x i32> <i32 3, i32 4>, <2 x i32> undef,
+ // <2 x i32> < i32 0, i32 5 >,
+ // where i32 5 is invalid, but let it be additional check here:
+ llvm_unreachable("Invalid mask in shufflevector instruction");
+ }
+ break;
+ case Type::FloatTyID:
+ for( unsigned i=0; i<src3Size; i++) {
+ unsigned j = Src3.AggregateVal[i].IntVal.getZExtValue();
+ if(j < src1Size)
+ Dest.AggregateVal[i].FloatVal = Src1.AggregateVal[j].FloatVal;
+ else if(j < src1Size + src2Size)
+ Dest.AggregateVal[i].FloatVal = Src2.AggregateVal[j-src1Size].FloatVal;
+ else
+ llvm_unreachable("Invalid mask in shufflevector instruction");
+ }
+ break;
+ case Type::DoubleTyID:
+ for( unsigned i=0; i<src3Size; i++) {
+ unsigned j = Src3.AggregateVal[i].IntVal.getZExtValue();
+ if(j < src1Size)
+ Dest.AggregateVal[i].DoubleVal = Src1.AggregateVal[j].DoubleVal;
+ else if(j < src1Size + src2Size)
+ Dest.AggregateVal[i].DoubleVal =
+ Src2.AggregateVal[j-src1Size].DoubleVal;
+ else
+ llvm_unreachable("Invalid mask in shufflevector instruction");
+ }
+ break;
+ }
+ SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitExtractValueInst(ExtractValueInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ Value *Agg = I.getAggregateOperand();
+ GenericValue Dest;
+ GenericValue Src = getOperandValue(Agg, SF);
+
+ ExtractValueInst::idx_iterator IdxBegin = I.idx_begin();
+ unsigned Num = I.getNumIndices();
+ GenericValue *pSrc = &Src;
+
+ for (unsigned i = 0 ; i < Num; ++i) {
+ pSrc = &pSrc->AggregateVal[*IdxBegin];
+ ++IdxBegin;
+ }
+
+ Type *IndexedType = ExtractValueInst::getIndexedType(Agg->getType(), I.getIndices());
+ switch (IndexedType->getTypeID()) {
+ default:
+ llvm_unreachable("Unhandled dest type for extractelement instruction");
+ break;
+ case Type::IntegerTyID:
+ Dest.IntVal = pSrc->IntVal;
+ break;
+ case Type::FloatTyID:
+ Dest.FloatVal = pSrc->FloatVal;
+ break;
+ case Type::DoubleTyID:
+ Dest.DoubleVal = pSrc->DoubleVal;
+ break;
+ case Type::ArrayTyID:
+ case Type::StructTyID:
+ case Type::VectorTyID:
+ Dest.AggregateVal = pSrc->AggregateVal;
+ break;
+ case Type::PointerTyID:
+ Dest.PointerVal = pSrc->PointerVal;
+ break;
+ }
+
+ SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitInsertValueInst(InsertValueInst &I) {
+
+ ExecutionContext &SF = ECStack.back();
+ Value *Agg = I.getAggregateOperand();
+
+ GenericValue Src1 = getOperandValue(Agg, SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Dest = Src1; // Dest is a slightly changed Src1
+
+ ExtractValueInst::idx_iterator IdxBegin = I.idx_begin();
+ unsigned Num = I.getNumIndices();
+
+ GenericValue *pDest = &Dest;
+ for (unsigned i = 0 ; i < Num; ++i) {
+ pDest = &pDest->AggregateVal[*IdxBegin];
+ ++IdxBegin;
+ }
+ // pDest points to the target value in the Dest now
+
+ Type *IndexedType = ExtractValueInst::getIndexedType(Agg->getType(), I.getIndices());
+
+ switch (IndexedType->getTypeID()) {
+ default:
+ llvm_unreachable("Unhandled dest type for insertelement instruction");
+ break;
+ case Type::IntegerTyID:
+ pDest->IntVal = Src2.IntVal;
+ break;
+ case Type::FloatTyID:
+ pDest->FloatVal = Src2.FloatVal;
+ break;
+ case Type::DoubleTyID:
+ pDest->DoubleVal = Src2.DoubleVal;
+ break;
+ case Type::ArrayTyID:
+ case Type::StructTyID:
+ case Type::VectorTyID:
+ pDest->AggregateVal = Src2.AggregateVal;
+ break;
+ case Type::PointerTyID:
+ pDest->PointerVal = Src2.PointerVal;
+ break;
+ }
+
+ SetValue(&I, Dest, SF);
+}
+
GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
ExecutionContext &SF) {
switch (CE->getOpcode()) {
- case Instruction::Trunc:
+ case Instruction::Trunc:
return executeTruncInst(CE->getOperand(0), CE->getType(), SF);
case Instruction::ZExt:
return executeZExtInst(CE->getOperand(0), CE->getType(), SF);
@@ -1832,7 +2008,8 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
case Instruction::Select:
return executeSelectInst(getOperandValue(CE->getOperand(0), SF),
getOperandValue(CE->getOperand(1), SF),
- getOperandValue(CE->getOperand(2), SF));
+ getOperandValue(CE->getOperand(2), SF),
+ CE->getOperand(0)->getType());
default :
break;
}
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index bef4bbf..a03c7f5 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -406,6 +406,7 @@ GenericValue lle_X_sprintf(FunctionType *FT,
break;
}
}
+ return GV;
}
// int printf(const char *, ...) - a very rough implementation to make output
@@ -434,7 +435,7 @@ GenericValue lle_X_sscanf(FunctionType *FT,
GenericValue GV;
GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4],
- Args[5], Args[6], Args[7], Args[8], Args[9]));
+ Args[5], Args[6], Args[7], Args[8], Args[9]));
return GV;
}
@@ -450,7 +451,7 @@ GenericValue lle_X_scanf(FunctionType *FT,
GenericValue GV;
GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4],
- Args[5], Args[6], Args[7], Args[8], Args[9]));
+ Args[5], Args[6], Args[7], Args[8], Args[9]));
return GV;
}
@@ -470,6 +471,30 @@ GenericValue lle_X_fprintf(FunctionType *FT,
return GV;
}
+static GenericValue lle_X_memset(FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ int val = (int)Args[1].IntVal.getSExtValue();
+ size_t len = (size_t)Args[2].IntVal.getZExtValue();
+ memset((void *)GVTOP(Args[0]), val, len);
+ // llvm.memset.* returns void, lle_X_* returns GenericValue,
+ // so here we return GenericValue with IntVal set to zero
+ GenericValue GV;
+ GV.IntVal = 0;
+ return GV;
+}
+
+static GenericValue lle_X_memcpy(FunctionType *FT,
+ const std::vector<GenericValue> &Args) {
+ memcpy(GVTOP(Args[0]), GVTOP(Args[1]),
+ (size_t)(Args[2].IntVal.getLimitedValue()));
+
+ // llvm.memcpy* returns void, lle_X_* returns GenericValue,
+ // so here we return GenericValue with IntVal set to zero
+ GenericValue GV;
+ GV.IntVal = 0;
+ return GV;
+}
+
void Interpreter::initializeExternalFunctions() {
sys::ScopedLock Writer(*FunctionsLock);
FuncNames["lle_X_atexit"] = lle_X_atexit;
@@ -481,4 +506,6 @@ void Interpreter::initializeExternalFunctions() {
FuncNames["lle_X_sscanf"] = lle_X_sscanf;
FuncNames["lle_X_scanf"] = lle_X_scanf;
FuncNames["lle_X_fprintf"] = lle_X_fprintf;
+ FuncNames["lle_X_memset"] = lle_X_memset;
+ FuncNames["lle_X_memcpy"] = lle_X_memcpy;
}
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 2952d7e..98269ef 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -179,6 +179,12 @@ public:
void visitVAArgInst(VAArgInst &I);
void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &I);
+
+ void visitExtractValueInst(ExtractValueInst &I);
+ void visitInsertValueInst(InsertValueInst &I);
+
void visitInstruction(Instruction &I) {
errs() << I << "\n";
llvm_unreachable("Instruction not interpretable yet!");
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 53ea0a2..246a675 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -67,140 +67,6 @@ static struct RegisterJIT {
extern "C" void LLVMLinkInJIT() {
}
-// Determine whether we can register EH tables.
-#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \
- !defined(__USING_SJLJ_EXCEPTIONS__))
-#define HAVE_EHTABLE_SUPPORT 1
-#else
-#define HAVE_EHTABLE_SUPPORT 0
-#endif
-
-#if HAVE_EHTABLE_SUPPORT
-
-// libgcc defines the __register_frame function to dynamically register new
-// dwarf frames for exception handling. This functionality is not portable
-// across compilers and is only provided by GCC. We use the __register_frame
-// function here so that code generated by the JIT cooperates with the unwinding
-// runtime of libgcc. When JITting with exception handling enable, LLVM
-// generates dwarf frames and registers it to libgcc with __register_frame.
-//
-// The __register_frame function works with Linux.
-//
-// Unfortunately, this functionality seems to be in libgcc after the unwinding
-// library of libgcc for darwin was written. The code for darwin overwrites the
-// value updated by __register_frame with a value fetched with "keymgr".
-// "keymgr" is an obsolete functionality, which should be rewritten some day.
-// In the meantime, since "keymgr" is on all libgccs shipped with apple-gcc, we
-// need a workaround in LLVM which uses the "keymgr" to dynamically modify the
-// values of an opaque key, used by libgcc to find dwarf tables.
-
-extern "C" void __register_frame(void*);
-extern "C" void __deregister_frame(void*);
-
-#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED <= 1050
-# define USE_KEYMGR 1
-#else
-# define USE_KEYMGR 0
-#endif
-
-#if USE_KEYMGR
-
-namespace {
-
-// LibgccObject - This is the structure defined in libgcc. There is no #include
-// provided for this structure, so we also define it here. libgcc calls it
-// "struct object". The structure is undocumented in libgcc.
-struct LibgccObject {
- void *unused1;
- void *unused2;
- void *unused3;
-
- /// frame - Pointer to the exception table.
- void *frame;
-
- /// encoding - The encoding of the object?
- union {
- struct {
- unsigned long sorted : 1;
- unsigned long from_array : 1;
- unsigned long mixed_encoding : 1;
- unsigned long encoding : 8;
- unsigned long count : 21;
- } b;
- size_t i;
- } encoding;
-
- /// fde_end - libgcc defines this field only if some macro is defined. We
- /// include this field even if it may not there, to make libgcc happy.
- char *fde_end;
-
- /// next - At least we know it's a chained list!
- struct LibgccObject *next;
-};
-
-// "kemgr" stuff. Apparently, all frame tables are stored there.
-extern "C" void _keymgr_set_and_unlock_processwide_ptr(int, void *);
-extern "C" void *_keymgr_get_and_lock_processwide_ptr(int);
-#define KEYMGR_GCC3_DW2_OBJ_LIST 302 /* Dwarf2 object list */
-
-/// LibgccObjectInfo - libgcc defines this struct as km_object_info. It
-/// probably contains all dwarf tables that are loaded.
-struct LibgccObjectInfo {
-
- /// seenObjects - LibgccObjects already parsed by the unwinding runtime.
- ///
- struct LibgccObject* seenObjects;
-
- /// unseenObjects - LibgccObjects not parsed yet by the unwinding runtime.
- ///
- struct LibgccObject* unseenObjects;
-
- unsigned unused[2];
-};
-
-/// darwin_register_frame - Since __register_frame does not work with darwin's
-/// libgcc,we provide our own function, which "tricks" libgcc by modifying the
-/// "Dwarf2 object list" key.
-void DarwinRegisterFrame(void* FrameBegin) {
- // Get the key.
- LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
- _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
- assert(LOI && "This should be preallocated by the runtime");
-
- // Allocate a new LibgccObject to represent this frame. Deallocation of this
- // object may be impossible: since darwin code in libgcc was written after
- // the ability to dynamically register frames, things may crash if we
- // deallocate it.
- struct LibgccObject* ob = (struct LibgccObject*)
- malloc(sizeof(struct LibgccObject));
-
- // Do like libgcc for the values of the field.
- ob->unused1 = (void *)-1;
- ob->unused2 = 0;
- ob->unused3 = 0;
- ob->frame = FrameBegin;
- ob->encoding.i = 0;
- ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit;
-
- // Put the info on both places, as libgcc uses the first or the second
- // field. Note that we rely on having two pointers here. If fde_end was a
- // char, things would get complicated.
- ob->fde_end = (char*)LOI->unseenObjects;
- ob->next = LOI->unseenObjects;
-
- // Update the key's unseenObjects list.
- LOI->unseenObjects = ob;
-
- // Finally update the "key". Apparently, libgcc requires it.
- _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST,
- LOI);
-
-}
-
-}
-#endif // __APPLE__
-#endif // HAVE_EHTABLE_SUPPORT
-
/// createJIT - This is the factory method for creating a JIT for the current
/// machine, it does not fall back to the interpreter. This takes ownership
/// of the module.
@@ -293,33 +159,11 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
report_fatal_error("Target does not support machine code emission!");
}
- // Register routine for informing unwinding runtime about new EH frames
-#if HAVE_EHTABLE_SUPPORT
-#if USE_KEYMGR
- struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
- _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
-
- // The key is created on demand, and libgcc creates it the first time an
- // exception occurs. Since we need the key to register frames, we create
- // it now.
- if (!LOI)
- LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1);
- _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI);
- InstallExceptionTableRegister(DarwinRegisterFrame);
- // Not sure about how to deregister on Darwin.
-#else
- InstallExceptionTableRegister(__register_frame);
- InstallExceptionTableDeregister(__deregister_frame);
-#endif // __APPLE__
-#endif // HAVE_EHTABLE_SUPPORT
-
// Initialize passes.
PM.doInitialization();
}
JIT::~JIT() {
- // Unregister all exception tables registered by this JIT.
- DeregisterAllTables();
// Cleanup.
AllJits->Remove(this);
delete jitstate;
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 94db245..f58d31b 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -464,7 +464,7 @@ namespace {
/// allocateCodeSection - Allocate memory for a code section.
uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID) {
+ unsigned SectionID, StringRef SectionName) {
// Grow the required block size to account for the block header
Size += sizeof(*CurBlock);
@@ -510,7 +510,8 @@ namespace {
/// allocateDataSection - Allocate memory for a data section.
uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID, bool IsReadOnly) {
+ unsigned SectionID, StringRef SectionName,
+ bool IsReadOnly) {
return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
}
@@ -793,7 +794,7 @@ static void runAtExitHandlers() {
// not inlined, and hiding their real definitions in a separate archive file
// that the dynamic linker can't see. For more info, search for
// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-#if defined(__linux__)
+#if defined(__linux__) && defined(__GLIBC__)
/* stat functions are redirecting to __xstat with a version number. On x86-64
* linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
* available as an exported symbol, so we have to add it explicitly.
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 09dd924..195c458 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -14,10 +14,12 @@
#include "llvm/ExecutionEngine/MCJIT.h"
#include "llvm/ExecutionEngine/ObjectBuffer.h"
#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/PassManager.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/ErrorHandling.h"
@@ -53,37 +55,63 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
bool AllocateGVsWithCode)
- : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM),
- IsLoaded(false), M(m), ObjCache(0) {
+ : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(this, MM), Dyld(&MemMgr),
+ ObjCache(0) {
+ OwnedModules.addModule(m);
setDataLayout(TM->getDataLayout());
}
MCJIT::~MCJIT() {
- if (LoadedObject)
- NotifyFreeingObject(*LoadedObject.get());
- delete MemMgr;
+ MutexGuard locked(lock);
+ // FIXME: We are managing our modules, so we do not want the base class
+ // ExecutionEngine to manage them as well. To avoid double destruction
+ // of the first (and only) module added in ExecutionEngine constructor
+ // we remove it from EE and will destruct it ourselves.
+ //
+ // It may make sense to move our module manager (based on SmallStPtr) back
+ // into EE if the JIT and Interpreter can live with it.
+ // If so, additional functions: addModule, removeModule, FindFunctionNamed,
+ // runStaticConstructorsDestructors could be moved back to EE as well.
+ //
+ Modules.clear();
+ Dyld.deregisterEHFrames();
+
+ LoadedObjectMap::iterator it, end = LoadedObjects.end();
+ for (it = LoadedObjects.begin(); it != end; ++it) {
+ ObjectImage *Obj = it->second;
+ if (Obj) {
+ NotifyFreeingObject(*Obj);
+ delete Obj;
+ }
+ }
+ LoadedObjects.clear();
delete TM;
}
+void MCJIT::addModule(Module *M) {
+ MutexGuard locked(lock);
+ OwnedModules.addModule(M);
+}
+
+bool MCJIT::removeModule(Module *M) {
+ MutexGuard locked(lock);
+ return OwnedModules.removeModule(M);
+}
+
+
+
void MCJIT::setObjectCache(ObjectCache* NewCache) {
+ MutexGuard locked(lock);
ObjCache = NewCache;
}
-ObjectBufferStream* MCJIT::emitObject(Module *m) {
- /// Currently, MCJIT only supports a single module and the module passed to
- /// this function call is expected to be the contained module. The module
- /// is passed as a parameter here to prepare for multiple module support in
- /// the future.
- assert(M == m);
-
- // Get a thread lock to make sure we aren't trying to compile multiple times
+ObjectBufferStream* MCJIT::emitObject(Module *M) {
MutexGuard locked(lock);
- // FIXME: Track compilation state on a per-module basis when multiple modules
- // are supported.
- // Re-compilation is not supported
- assert(!IsLoaded);
+ // This must be a module which has already been added but not loaded to this
+ // MCJIT instance, since these conditions are tested by our caller,
+ // generateCodeForModule.
PassManager PM;
@@ -99,7 +127,7 @@ ObjectBufferStream* MCJIT::emitObject(Module *m) {
}
// Initialize passes.
- PM.run(*m);
+ PM.run(*M);
// Flush the output buffer to get the generated code into memory
CompiledObject->flush();
@@ -109,21 +137,22 @@ ObjectBufferStream* MCJIT::emitObject(Module *m) {
// MemoryBuffer is a thin wrapper around the actual memory, so it's OK
// to create a temporary object here and delete it after the call.
OwningPtr<MemoryBuffer> MB(CompiledObject->getMemBuffer());
- ObjCache->notifyObjectCompiled(m, MB.get());
+ ObjCache->notifyObjectCompiled(M, MB.get());
}
return CompiledObject.take();
}
-void MCJIT::loadObject(Module *M) {
-
+void MCJIT::generateCodeForModule(Module *M) {
// Get a thread lock to make sure we aren't trying to load multiple times
MutexGuard locked(lock);
- // FIXME: Track compilation state on a per-module basis when multiple modules
- // are supported.
+ // This must be a module which has already been added to this MCJIT instance.
+ assert(OwnedModules.ownsModule(M) &&
+ "MCJIT::generateCodeForModule: Unknown module.");
+
// Re-compilation is not supported
- if (IsLoaded)
+ if (OwnedModules.hasModuleBeenLoaded(M))
return;
OwningPtr<ObjectBuffer> ObjectToLoad;
@@ -141,59 +170,137 @@ void MCJIT::loadObject(Module *M) {
}
// Load the object into the dynamic linker.
- // handing off ownership of the buffer
- LoadedObject.reset(Dyld.loadObject(ObjectToLoad.take()));
+ // MCJIT now owns the ObjectImage pointer (via its LoadedObjects map).
+ ObjectImage *LoadedObject = Dyld.loadObject(ObjectToLoad.take());
+ LoadedObjects[M] = LoadedObject;
if (!LoadedObject)
report_fatal_error(Dyld.getErrorString());
- // Resolve any relocations.
- Dyld.resolveRelocations();
-
// FIXME: Make this optional, maybe even move it to a JIT event listener
LoadedObject->registerWithDebugger();
NotifyObjectEmitted(*LoadedObject);
- // FIXME: Add support for per-module compilation state
- IsLoaded = true;
+ OwnedModules.markModuleAsLoaded(M);
}
-// FIXME: Add a parameter to identify which object is being finalized when
-// MCJIT supports multiple modules.
-// FIXME: Provide a way to separate code emission, relocations and page
-// protection in the interface.
+void MCJIT::finalizeLoadedModules() {
+ MutexGuard locked(lock);
+
+ // Resolve any outstanding relocations.
+ Dyld.resolveRelocations();
+
+ OwnedModules.markAllLoadedModulesAsFinalized();
+
+ // Register EH frame data for any module we own which has been loaded
+ Dyld.registerEHFrames();
+
+ // Set page permissions.
+ MemMgr.finalizeMemory();
+}
+
+// FIXME: Rename this.
void MCJIT::finalizeObject() {
- // If the module hasn't been compiled, just do that.
- if (!IsLoaded) {
- // If the call to Dyld.resolveRelocations() is removed from loadObject()
- // we'll need to do that here.
- loadObject(M);
- } else {
- // Resolve any relocations.
- Dyld.resolveRelocations();
+ MutexGuard locked(lock);
+
+ for (ModulePtrSet::iterator I = OwnedModules.begin_added(),
+ E = OwnedModules.end_added();
+ I != E; ++I) {
+ Module *M = *I;
+ generateCodeForModule(M);
}
- StringRef EHData = Dyld.getEHFrameSection();
- if (!EHData.empty())
- MemMgr->registerEHFrames(EHData);
+ finalizeLoadedModules();
+}
- // Set page permissions.
- MemMgr->finalizeMemory();
+void MCJIT::finalizeModule(Module *M) {
+ MutexGuard locked(lock);
+
+ // This must be a module which has already been added to this MCJIT instance.
+ assert(OwnedModules.ownsModule(M) && "MCJIT::finalizeModule: Unknown module.");
+
+ // If the module hasn't been compiled, just do that.
+ if (!OwnedModules.hasModuleBeenLoaded(M))
+ generateCodeForModule(M);
+
+ finalizeLoadedModules();
}
void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
report_fatal_error("not yet implemented");
}
-void *MCJIT::getPointerToFunction(Function *F) {
- // FIXME: This should really return a uint64_t since it's a pointer in the
- // target address space, not our local address space. That's part of the
- // ExecutionEngine interface, though. Fix that when the old JIT finally
- // dies.
+uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) {
+ // Check with the RuntimeDyld to see if we already have this symbol.
+ if (Name[0] == '\1')
+ return Dyld.getSymbolLoadAddress(Name.substr(1));
+ return Dyld.getSymbolLoadAddress((TM->getMCAsmInfo()->getGlobalPrefix()
+ + Name));
+}
+
+Module *MCJIT::findModuleForSymbol(const std::string &Name,
+ bool CheckFunctionsOnly) {
+ MutexGuard locked(lock);
+
+ // If it hasn't already been generated, see if it's in one of our modules.
+ for (ModulePtrSet::iterator I = OwnedModules.begin_added(),
+ E = OwnedModules.end_added();
+ I != E; ++I) {
+ Module *M = *I;
+ Function *F = M->getFunction(Name);
+ if (F && !F->isDeclaration())
+ return M;
+ if (!CheckFunctionsOnly) {
+ GlobalVariable *G = M->getGlobalVariable(Name);
+ if (G && !G->isDeclaration())
+ return M;
+ // FIXME: Do we need to worry about global aliases?
+ }
+ }
+ // We didn't find the symbol in any of our modules.
+ return NULL;
+}
+
+uint64_t MCJIT::getSymbolAddress(const std::string &Name,
+ bool CheckFunctionsOnly)
+{
+ MutexGuard locked(lock);
+
+ // First, check to see if we already have this symbol.
+ uint64_t Addr = getExistingSymbolAddress(Name);
+ if (Addr)
+ return Addr;
+
+ // If it hasn't already been generated, see if it's in one of our modules.
+ Module *M = findModuleForSymbol(Name, CheckFunctionsOnly);
+ if (!M)
+ return 0;
+
+ generateCodeForModule(M);
+
+ // Check the RuntimeDyld table again, it should be there now.
+ return getExistingSymbolAddress(Name);
+}
- // FIXME: Add support for per-module compilation state
- if (!IsLoaded)
- loadObject(M);
+uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) {
+ MutexGuard locked(lock);
+ uint64_t Result = getSymbolAddress(Name, false);
+ if (Result != 0)
+ finalizeLoadedModules();
+ return Result;
+}
+
+uint64_t MCJIT::getFunctionAddress(const std::string &Name) {
+ MutexGuard locked(lock);
+ uint64_t Result = getSymbolAddress(Name, true);
+ if (Result != 0)
+ finalizeLoadedModules();
+ return Result;
+}
+
+// Deprecated. Use getFunctionAddress instead.
+void *MCJIT::getPointerToFunction(Function *F) {
+ MutexGuard locked(lock);
if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
bool AbortOnFailure = !F->hasExternalWeakLinkage();
@@ -202,6 +309,16 @@ void *MCJIT::getPointerToFunction(Function *F) {
return Addr;
}
+ Module *M = F->getParent();
+ bool HasBeenAddedButNotLoaded = OwnedModules.hasModuleBeenAddedButNotLoaded(M);
+
+ // Make sure the relevant module has been compiled and loaded.
+ if (HasBeenAddedButNotLoaded)
+ generateCodeForModule(M);
+ else if (!OwnedModules.hasModuleBeenLoaded(M))
+ // If this function doesn't belong to one of our modules, we're done.
+ return NULL;
+
// FIXME: Should the Dyld be retaining module information? Probably not.
// FIXME: Should we be using the mangler for this? Probably.
//
@@ -222,6 +339,45 @@ void MCJIT::freeMachineCodeForFunction(Function *F) {
report_fatal_error("not yet implemented");
}
+void MCJIT::runStaticConstructorsDestructorsInModulePtrSet(
+ bool isDtors, ModulePtrSet::iterator I, ModulePtrSet::iterator E) {
+ for (; I != E; ++I) {
+ ExecutionEngine::runStaticConstructorsDestructors(*I, isDtors);
+ }
+}
+
+void MCJIT::runStaticConstructorsDestructors(bool isDtors) {
+ // Execute global ctors/dtors for each module in the program.
+ runStaticConstructorsDestructorsInModulePtrSet(
+ isDtors, OwnedModules.begin_added(), OwnedModules.end_added());
+ runStaticConstructorsDestructorsInModulePtrSet(
+ isDtors, OwnedModules.begin_loaded(), OwnedModules.end_loaded());
+ runStaticConstructorsDestructorsInModulePtrSet(
+ isDtors, OwnedModules.begin_finalized(), OwnedModules.end_finalized());
+}
+
+Function *MCJIT::FindFunctionNamedInModulePtrSet(const char *FnName,
+ ModulePtrSet::iterator I,
+ ModulePtrSet::iterator E) {
+ for (; I != E; ++I) {
+ if (Function *F = (*I)->getFunction(FnName))
+ return F;
+ }
+ return 0;
+}
+
+Function *MCJIT::FindFunctionNamed(const char *FnName) {
+ Function *F = FindFunctionNamedInModulePtrSet(
+ FnName, OwnedModules.begin_added(), OwnedModules.end_added());
+ if (!F)
+ F = FindFunctionNamedInModulePtrSet(FnName, OwnedModules.begin_loaded(),
+ OwnedModules.end_loaded());
+ if (!F)
+ F = FindFunctionNamedInModulePtrSet(FnName, OwnedModules.begin_finalized(),
+ OwnedModules.end_finalized());
+ return F;
+}
+
GenericValue MCJIT::runFunction(Function *F,
const std::vector<GenericValue> &ArgValues) {
assert(F && "Function *F was null at entry to run()");
@@ -324,12 +480,8 @@ GenericValue MCJIT::runFunction(Function *F,
void *MCJIT::getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure) {
- // FIXME: Add support for per-module compilation state
- if (!IsLoaded)
- loadObject(M);
-
- if (!isSymbolSearchingDisabled() && MemMgr) {
- void *ptr = MemMgr->getPointerToNamedFunction(Name, false);
+ if (!isSymbolSearchingDisabled()) {
+ void *ptr = MemMgr.getPointerToNamedFunction(Name, false);
if (ptr)
return ptr;
}
@@ -365,6 +517,7 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
}
void MCJIT::NotifyObjectEmitted(const ObjectImage& Obj) {
MutexGuard locked(lock);
+ MemMgr.notifyObjectLoaded(this, &Obj);
for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
EventListeners[I]->NotifyObjectEmitted(Obj);
}
@@ -375,3 +528,14 @@ void MCJIT::NotifyFreeingObject(const ObjectImage& Obj) {
EventListeners[I]->NotifyFreeingObject(Obj);
}
}
+
+uint64_t LinkingMemoryManager::getSymbolAddress(const std::string &Name) {
+ uint64_t Result = ParentEngine->getSymbolAddress(Name, false);
+ // If the symbols wasn't found and it begins with an underscore, try again
+ // without the underscore.
+ if (!Result && Name[0] == '_')
+ Result = ParentEngine->getSymbolAddress(Name.substr(1), false);
+ if (Result)
+ return Result;
+ return ClientMM->getSymbolAddress(Name);
+}
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index a899d4f..86b478b 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -10,48 +10,235 @@
#ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H
#define LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/Module.h"
namespace llvm {
+class MCJIT;
-class ObjectImage;
+// This is a helper class that the MCJIT execution engine uses for linking
+// functions across modules that it owns. It aggregates the memory manager
+// that is passed in to the MCJIT constructor and defers most functionality
+// to that object.
+class LinkingMemoryManager : public RTDyldMemoryManager {
+public:
+ LinkingMemoryManager(MCJIT *Parent, RTDyldMemoryManager *MM)
+ : ParentEngine(Parent), ClientMM(MM) {}
+
+ virtual uint64_t getSymbolAddress(const std::string &Name);
+
+ // Functions deferred to client memory manager
+ virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID, StringRef SectionName) {
+ return ClientMM->allocateCodeSection(Size, Alignment, SectionID, SectionName);
+ }
+
+ virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID, StringRef SectionName,
+ bool IsReadOnly) {
+ return ClientMM->allocateDataSection(Size, Alignment,
+ SectionID, SectionName, IsReadOnly);
+ }
+
+ virtual void notifyObjectLoaded(ExecutionEngine *EE,
+ const ObjectImage *Obj) {
+ ClientMM->notifyObjectLoaded(EE, Obj);
+ }
+
+ virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
+ ClientMM->registerEHFrames(Addr, LoadAddr, Size);
+ }
+
+ virtual void deregisterEHFrames(uint8_t *Addr,
+ uint64_t LoadAddr,
+ size_t Size) {
+ ClientMM->deregisterEHFrames(Addr, LoadAddr, Size);
+ }
+
+ virtual bool finalizeMemory(std::string *ErrMsg = 0) {
+ return ClientMM->finalizeMemory(ErrMsg);
+ }
-// FIXME: This makes all kinds of horrible assumptions for the time being,
-// like only having one module, not needing to worry about multi-threading,
-// blah blah. Purely in get-it-up-and-limping mode for now.
+private:
+ MCJIT *ParentEngine;
+ OwningPtr<RTDyldMemoryManager> ClientMM;
+};
+
+// About Module states: added->loaded->finalized.
+//
+// The purpose of the "added" state is having modules in standby. (added=known
+// but not compiled). The idea is that you can add a module to provide function
+// definitions but if nothing in that module is referenced by a module in which
+// a function is executed (note the wording here because it's not exactly the
+// ideal case) then the module never gets compiled. This is sort of lazy
+// compilation.
+//
+// The purpose of the "loaded" state (loaded=compiled and required sections
+// copied into local memory but not yet ready for execution) is to have an
+// intermediate state wherein clients can remap the addresses of sections, using
+// MCJIT::mapSectionAddress, (in preparation for later copying to a new location
+// or an external process) before relocations and page permissions are applied.
+//
+// It might not be obvious at first glance, but the "remote-mcjit" case in the
+// lli tool does this. In that case, the intermediate action is taken by the
+// RemoteMemoryManager in response to the notifyObjectLoaded function being
+// called.
class MCJIT : public ExecutionEngine {
MCJIT(Module *M, TargetMachine *tm, RTDyldMemoryManager *MemMgr,
bool AllocateGVsWithCode);
+ typedef llvm::SmallPtrSet<Module *, 4> ModulePtrSet;
+
+ class OwningModuleContainer {
+ public:
+ OwningModuleContainer() {
+ }
+ ~OwningModuleContainer() {
+ freeModulePtrSet(AddedModules);
+ freeModulePtrSet(LoadedModules);
+ freeModulePtrSet(FinalizedModules);
+ }
+
+ ModulePtrSet::iterator begin_added() { return AddedModules.begin(); }
+ ModulePtrSet::iterator end_added() { return AddedModules.end(); }
+
+ ModulePtrSet::iterator begin_loaded() { return LoadedModules.begin(); }
+ ModulePtrSet::iterator end_loaded() { return LoadedModules.end(); }
+
+ ModulePtrSet::iterator begin_finalized() { return FinalizedModules.begin(); }
+ ModulePtrSet::iterator end_finalized() { return FinalizedModules.end(); }
+
+ void addModule(Module *M) {
+ AddedModules.insert(M);
+ }
+
+ bool removeModule(Module *M) {
+ return AddedModules.erase(M) || LoadedModules.erase(M) ||
+ FinalizedModules.erase(M);
+ }
+
+ bool hasModuleBeenAddedButNotLoaded(Module *M) {
+ return AddedModules.count(M) != 0;
+ }
+
+ bool hasModuleBeenLoaded(Module *M) {
+ // If the module is in either the "loaded" or "finalized" sections it
+ // has been loaded.
+ return (LoadedModules.count(M) != 0 ) || (FinalizedModules.count(M) != 0);
+ }
+
+ bool hasModuleBeenFinalized(Module *M) {
+ return FinalizedModules.count(M) != 0;
+ }
+
+ bool ownsModule(Module* M) {
+ return (AddedModules.count(M) != 0) || (LoadedModules.count(M) != 0) ||
+ (FinalizedModules.count(M) != 0);
+ }
+
+ void markModuleAsLoaded(Module *M) {
+ // This checks against logic errors in the MCJIT implementation.
+ // This function should never be called with either a Module that MCJIT
+ // does not own or a Module that has already been loaded and/or finalized.
+ assert(AddedModules.count(M) &&
+ "markModuleAsLoaded: Module not found in AddedModules");
+
+ // Remove the module from the "Added" set.
+ AddedModules.erase(M);
+
+ // Add the Module to the "Loaded" set.
+ LoadedModules.insert(M);
+ }
+
+ void markModuleAsFinalized(Module *M) {
+ // This checks against logic errors in the MCJIT implementation.
+ // This function should never be called with either a Module that MCJIT
+ // does not own, a Module that has not been loaded or a Module that has
+ // already been finalized.
+ assert(LoadedModules.count(M) &&
+ "markModuleAsFinalized: Module not found in LoadedModules");
+
+ // Remove the module from the "Loaded" section of the list.
+ LoadedModules.erase(M);
+
+ // Add the Module to the "Finalized" section of the list by inserting it
+ // before the 'end' iterator.
+ FinalizedModules.insert(M);
+ }
+
+ void markAllLoadedModulesAsFinalized() {
+ for (ModulePtrSet::iterator I = LoadedModules.begin(),
+ E = LoadedModules.end();
+ I != E; ++I) {
+ Module *M = *I;
+ FinalizedModules.insert(M);
+ }
+ LoadedModules.clear();
+ }
+
+ private:
+ ModulePtrSet AddedModules;
+ ModulePtrSet LoadedModules;
+ ModulePtrSet FinalizedModules;
+
+ void freeModulePtrSet(ModulePtrSet& MPS) {
+ // Go through the module set and delete everything.
+ for (ModulePtrSet::iterator I = MPS.begin(), E = MPS.end(); I != E; ++I) {
+ Module *M = *I;
+ delete M;
+ }
+ MPS.clear();
+ }
+ };
+
TargetMachine *TM;
MCContext *Ctx;
- RTDyldMemoryManager *MemMgr;
+ LinkingMemoryManager MemMgr;
RuntimeDyld Dyld;
SmallVector<JITEventListener*, 2> EventListeners;
- // FIXME: Add support for multiple modules
- bool IsLoaded;
- Module *M;
- OwningPtr<ObjectImage> LoadedObject;
+ OwningModuleContainer OwnedModules;
+
+ typedef DenseMap<Module *, ObjectImage *> LoadedObjectMap;
+ LoadedObjectMap LoadedObjects;
// An optional ObjectCache to be notified of compiled objects and used to
// perform lookup of pre-compiled code to avoid re-compilation.
ObjectCache *ObjCache;
+ Function *FindFunctionNamedInModulePtrSet(const char *FnName,
+ ModulePtrSet::iterator I,
+ ModulePtrSet::iterator E);
+
+ void runStaticConstructorsDestructorsInModulePtrSet(bool isDtors,
+ ModulePtrSet::iterator I,
+ ModulePtrSet::iterator E);
+
public:
~MCJIT();
/// @name ExecutionEngine interface implementation
/// @{
+ virtual void addModule(Module *M);
+ virtual bool removeModule(Module *M);
+
+ /// FindFunctionNamed - Search all of the active modules to find the one that
+ /// defines FnName. This is very slow operation and shouldn't be used for
+ /// general code.
+ virtual Function *FindFunctionNamed(const char *FnName);
/// Sets the object manager that MCJIT should use to avoid compilation.
virtual void setObjectCache(ObjectCache *manager);
+ virtual void generateCodeForModule(Module *M);
+
/// finalizeObject - ensure the module is fully processed and is usable.
///
/// It is the user-level function for completing the process of making the
@@ -59,7 +246,17 @@ public:
/// object have been relocated using mapSectionAddress. When this method is
/// called the MCJIT execution engine will reapply relocations for a loaded
/// object.
+ /// Is it OK to finalize a set of modules, add modules and finalize again.
+ // FIXME: Do we really need both of these?
virtual void finalizeObject();
+ virtual void finalizeModule(Module *);
+ void finalizeLoadedModules();
+
+ /// runStaticConstructorsDestructors - This method is used to execute all of
+ /// the static constructors or destructors for a program.
+ ///
+ /// \param isDtors - Run the destructors instead of constructors.
+ void runStaticConstructorsDestructors(bool isDtors);
virtual void *getPointerToBasicBlock(BasicBlock *BB);
@@ -91,10 +288,15 @@ public:
uint64_t TargetAddress) {
Dyld.mapSectionAddress(LocalAddress, TargetAddress);
}
-
virtual void RegisterJITEventListener(JITEventListener *L);
virtual void UnregisterJITEventListener(JITEventListener *L);
+ // If successful, these function will implicitly finalize all loaded objects.
+ // To get a function address within MCJIT without causing a finalize, use
+ // getSymbolAddress.
+ virtual uint64_t getGlobalValueAddress(const std::string &Name);
+ virtual uint64_t getFunctionAddress(const std::string &Name);
+
/// @}
/// @name (Private) Registration Interfaces
/// @{
@@ -111,6 +313,11 @@ public:
// @}
+ // This is not directly exposed via the ExecutionEngine API, but it is
+ // used by the LinkingMemoryManager.
+ uint64_t getSymbolAddress(const std::string &Name,
+ bool CheckFunctionsOnly);
+
protected:
/// emitObject -- Generate a JITed object in memory from the specified module
/// Currently, MCJIT only supports a single module and the module passed to
@@ -119,10 +326,12 @@ protected:
/// the future.
ObjectBufferStream* emitObject(Module *M);
- void loadObject(Module *M);
-
void NotifyObjectEmitted(const ObjectImage& Obj);
void NotifyFreeingObject(const ObjectImage& Obj);
+
+ uint64_t getExistingSymbolAddress(const std::string &Name);
+ Module *findModuleForSymbol(const std::string &Name,
+ bool CheckFunctionsOnly);
};
} // End llvm namespace
diff --git a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
index 650832e..cf90e77 100644
--- a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
+++ b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
@@ -19,9 +19,10 @@
namespace llvm {
uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size,
- unsigned Alignment,
- unsigned SectionID,
- bool IsReadOnly) {
+ unsigned Alignment,
+ unsigned SectionID,
+ StringRef SectionName,
+ bool IsReadOnly) {
if (IsReadOnly)
return allocateSection(RODataMem, Size, Alignment);
return allocateSection(RWDataMem, Size, Alignment);
@@ -29,7 +30,8 @@ uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size,
uint8_t *SectionMemoryManager::allocateCodeSection(uintptr_t Size,
unsigned Alignment,
- unsigned SectionID) {
+ unsigned SectionID,
+ StringRef SectionName) {
return allocateSection(CodeMem, Size, Alignment);
}
@@ -105,6 +107,9 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg)
// FIXME: Should in-progress permissions be reverted if an error occurs?
error_code ec;
+ // Don't allow free memory blocks to be used after setting protection flags.
+ CodeMem.FreeMem.clear();
+
// Make code memory executable.
ec = applyMemoryGroupPermissions(CodeMem,
sys::Memory::MF_READ | sys::Memory::MF_EXEC);
@@ -115,6 +120,9 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg)
return true;
}
+ // Don't allow free memory blocks to be used after setting protection flags.
+ RODataMem.FreeMem.clear();
+
// Make read-only data memory read-only.
ec = applyMemoryGroupPermissions(RODataMem,
sys::Memory::MF_READ | sys::Memory::MF_EXEC);
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 38867ec..f11df82 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -20,7 +20,9 @@
#include "llvm/IR/Function.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
#include "llvm/ExecutionEngine/OProfileWrapper.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Errno.h"
@@ -52,6 +54,10 @@ public:
const JITEvent_EmittedFunctionDetails &Details);
virtual void NotifyFreeingMachineCode(void *OldPtr);
+
+ virtual void NotifyObjectEmitted(const ObjectImage &Obj);
+
+ virtual void NotifyFreeingObject(const ObjectImage &Obj);
};
void OProfileJITEventListener::initialize() {
@@ -159,6 +165,66 @@ void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
}
}
+void OProfileJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) {
+ if (!Wrapper.isAgentAvailable()) {
+ return;
+ }
+
+ // Use symbol info to iterate functions in the object.
+ error_code ec;
+ for (object::symbol_iterator I = Obj.begin_symbols(),
+ E = Obj.end_symbols();
+ I != E && !ec;
+ I.increment(ec)) {
+ object::SymbolRef::Type SymType;
+ if (I->getType(SymType)) continue;
+ if (SymType == object::SymbolRef::ST_Function) {
+ StringRef Name;
+ uint64_t Addr;
+ uint64_t Size;
+ if (I->getName(Name)) continue;
+ if (I->getAddress(Addr)) continue;
+ if (I->getSize(Size)) continue;
+
+ if (Wrapper.op_write_native_code(Name.data(), Addr, (void*)Addr, Size)
+ == -1) {
+ DEBUG(dbgs() << "Failed to tell OProfile about native function "
+ << Name << " at ["
+ << (void*)Addr << "-" << ((char*)Addr + Size) << "]\n");
+ continue;
+ }
+ // TODO: support line number info (similar to IntelJITEventListener.cpp)
+ }
+ }
+}
+
+void OProfileJITEventListener::NotifyFreeingObject(const ObjectImage &Obj) {
+ if (!Wrapper.isAgentAvailable()) {
+ return;
+ }
+
+ // Use symbol info to iterate functions in the object.
+ error_code ec;
+ for (object::symbol_iterator I = Obj.begin_symbols(),
+ E = Obj.end_symbols();
+ I != E && !ec;
+ I.increment(ec)) {
+ object::SymbolRef::Type SymType;
+ if (I->getType(SymType)) continue;
+ if (SymType == object::SymbolRef::ST_Function) {
+ uint64_t Addr;
+ if (I->getAddress(Addr)) continue;
+
+ if (Wrapper.op_unload_native_code(Addr) == -1) {
+ DEBUG(dbgs()
+ << "Failed to tell OProfile about unload of native function at "
+ << (void*)Addr << "\n");
+ continue;
+ }
+ }
+ }
+}
+
} // anonymous namespace.
namespace llvm {
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
index 7c0d395..61d8dc2 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
@@ -13,22 +13,20 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ExecutionEngine/OProfileWrapper.h"
-
#define DEBUG_TYPE "oprofile-wrapper"
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/MutexGuard.h"
#include "llvm/ADT/SmallString.h"
-
-#include <sstream>
#include <cstring>
-#include <stddef.h>
#include <dirent.h>
-#include <sys/stat.h>
#include <fcntl.h>
+#include <sstream>
+#include <stddef.h>
+#include <sys/stat.h>
#include <unistd.h>
namespace {
@@ -143,6 +141,10 @@ bool OProfileWrapper::checkForOProfileProcEntry() {
close(CmdLineFD);
ssize_t Idx = 0;
+ if (ExeName[0] != '/') {
+ BaseName = ExeName;
+ }
+
// Find the terminator for the first string
while (Idx < NumRead-1 && ExeName[Idx] != 0) {
Idx++;
@@ -161,7 +163,8 @@ bool OProfileWrapper::checkForOProfileProcEntry() {
}
// Test this to see if it is the oprofile daemon
- if (BaseName != 0 && !strcmp("oprofiled", BaseName)) {
+ if (BaseName != 0 && (!strcmp("oprofiled", BaseName) ||
+ !strcmp("operf", BaseName))) {
// If it is, we're done
closedir(ProcDir);
return true;
diff --git a/lib/ExecutionEngine/RTDyldMemoryManager.cpp b/lib/ExecutionEngine/RTDyldMemoryManager.cpp
index 4e76457..26e1fdd 100644
--- a/lib/ExecutionEngine/RTDyldMemoryManager.cpp
+++ b/lib/ExecutionEngine/RTDyldMemoryManager.cpp
@@ -33,8 +33,8 @@ namespace llvm {
RTDyldMemoryManager::~RTDyldMemoryManager() {}
// Determine whether we can register EH tables.
-#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \
- !defined(__USING_SJLJ_EXCEPTIONS__))
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) && \
+ !defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__))
#define HAVE_EHTABLE_SUPPORT 1
#else
#define HAVE_EHTABLE_SUPPORT 0
@@ -42,35 +42,180 @@ RTDyldMemoryManager::~RTDyldMemoryManager() {}
#if HAVE_EHTABLE_SUPPORT
extern "C" void __register_frame(void*);
+extern "C" void __deregister_frame(void*);
+#else
+// The building compiler does not have __(de)register_frame but
+// it may be found at runtime in a dynamically-loaded library.
+// For example, this happens when building LLVM with Visual C++
+// but using the MingW runtime.
+void __register_frame(void *p) {
+ static bool Searched = false;
+ static void *rf = 0;
+
+ if (!Searched) {
+ Searched = true;
+ rf = llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(
+ "__register_frame");
+ }
+ if (rf)
+ ((void (*)(void *))rf)(p);
+}
+
+void __deregister_frame(void *p) {
+ static bool Searched = false;
+ static void *df = 0;
+
+ if (!Searched) {
+ Searched = true;
+ df = llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(
+ "__deregister_frame");
+ }
+ if (df)
+ ((void (*)(void *))df)(p);
+}
+#endif
+
+#ifdef __APPLE__
-static const char *processFDE(const char *Entry) {
+static const char *processFDE(const char *Entry, bool isDeregister) {
const char *P = Entry;
uint32_t Length = *((const uint32_t *)P);
P += 4;
uint32_t Offset = *((const uint32_t *)P);
- if (Offset != 0)
- __register_frame(const_cast<char *>(Entry));
+ if (Offset != 0) {
+ if (isDeregister)
+ __deregister_frame(const_cast<char *>(Entry));
+ else
+ __register_frame(const_cast<char *>(Entry));
+ }
return P + Length;
}
-#endif
-void RTDyldMemoryManager::registerEHFrames(StringRef SectionData) {
-#if HAVE_EHTABLE_SUPPORT
- const char *P = SectionData.data();
- const char *End = SectionData.data() + SectionData.size();
+// This implementation handles frame registration for local targets.
+// Memory managers for remote targets should re-implement this function
+// and use the LoadAddr parameter.
+void RTDyldMemoryManager::registerEHFrames(uint8_t *Addr,
+ uint64_t LoadAddr,
+ size_t Size) {
+ // On OS X OS X __register_frame takes a single FDE as an argument.
+ // See http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-April/061768.html
+ const char *P = (const char *)Addr;
+ const char *End = P + Size;
do {
- P = processFDE(P);
+ P = processFDE(P, false);
} while(P != End);
-#endif
}
+void RTDyldMemoryManager::deregisterEHFrames(uint8_t *Addr,
+ uint64_t LoadAddr,
+ size_t Size) {
+ const char *P = (const char *)Addr;
+ const char *End = P + Size;
+ do {
+ P = processFDE(P, true);
+ } while(P != End);
+}
+
+#else
+
+void RTDyldMemoryManager::registerEHFrames(uint8_t *Addr,
+ uint64_t LoadAddr,
+ size_t Size) {
+ // On Linux __register_frame takes a single argument:
+ // a pointer to the start of the .eh_frame section.
+
+ // How can it find the end? Because crtendS.o is linked
+ // in and it has an .eh_frame section with four zero chars.
+ __register_frame(Addr);
+}
+
+void RTDyldMemoryManager::deregisterEHFrames(uint8_t *Addr,
+ uint64_t LoadAddr,
+ size_t Size) {
+ __deregister_frame(Addr);
+}
+
+#endif
+
static int jit_noop() {
return 0;
}
-void *RTDyldMemoryManager::getPointerToNamedFunction(const std::string &Name,
- bool AbortOnFailure) {
-#if defined(__linux__)
+// ARM math functions are statically linked on Android from libgcc.a, but not
+// available at runtime for dynamic linking. On Linux these are usually placed
+// in libgcc_s.so so can be found by normal dynamic lookup.
+#if defined(__BIONIC__) && defined(__arm__)
+// List of functions which are statically linked on Android and can be generated
+// by LLVM. This is done as a nested macro which is used once to declare the
+// imported functions with ARM_MATH_DECL and once to compare them to the
+// user-requested symbol in getSymbolAddress with ARM_MATH_CHECK. The test
+// assumes that all functions start with __aeabi_ and getSymbolAddress must be
+// modified if that changes.
+#define ARM_MATH_IMPORTS(PP) \
+ PP(__aeabi_d2f) \
+ PP(__aeabi_d2iz) \
+ PP(__aeabi_d2lz) \
+ PP(__aeabi_d2uiz) \
+ PP(__aeabi_d2ulz) \
+ PP(__aeabi_dadd) \
+ PP(__aeabi_dcmpeq) \
+ PP(__aeabi_dcmpge) \
+ PP(__aeabi_dcmpgt) \
+ PP(__aeabi_dcmple) \
+ PP(__aeabi_dcmplt) \
+ PP(__aeabi_dcmpun) \
+ PP(__aeabi_ddiv) \
+ PP(__aeabi_dmul) \
+ PP(__aeabi_dsub) \
+ PP(__aeabi_f2d) \
+ PP(__aeabi_f2iz) \
+ PP(__aeabi_f2lz) \
+ PP(__aeabi_f2uiz) \
+ PP(__aeabi_f2ulz) \
+ PP(__aeabi_fadd) \
+ PP(__aeabi_fcmpeq) \
+ PP(__aeabi_fcmpge) \
+ PP(__aeabi_fcmpgt) \
+ PP(__aeabi_fcmple) \
+ PP(__aeabi_fcmplt) \
+ PP(__aeabi_fcmpun) \
+ PP(__aeabi_fdiv) \
+ PP(__aeabi_fmul) \
+ PP(__aeabi_fsub) \
+ PP(__aeabi_i2d) \
+ PP(__aeabi_i2f) \
+ PP(__aeabi_idiv) \
+ PP(__aeabi_idivmod) \
+ PP(__aeabi_l2d) \
+ PP(__aeabi_l2f) \
+ PP(__aeabi_lasr) \
+ PP(__aeabi_ldivmod) \
+ PP(__aeabi_llsl) \
+ PP(__aeabi_llsr) \
+ PP(__aeabi_lmul) \
+ PP(__aeabi_ui2d) \
+ PP(__aeabi_ui2f) \
+ PP(__aeabi_uidiv) \
+ PP(__aeabi_uidivmod) \
+ PP(__aeabi_ul2d) \
+ PP(__aeabi_ul2f) \
+ PP(__aeabi_uldivmod)
+
+// Declare statically linked math functions on ARM. The function declarations
+// here do not have the correct prototypes for each function in
+// ARM_MATH_IMPORTS, but it doesn't matter because only the symbol addresses are
+// needed. In particular the __aeabi_*divmod functions do not have calling
+// conventions which match any C prototype.
+#define ARM_MATH_DECL(name) extern "C" void name();
+ARM_MATH_IMPORTS(ARM_MATH_DECL)
+#undef ARM_MATH_DECL
+#endif
+
+uint64_t RTDyldMemoryManager::getSymbolAddress(const std::string &Name) {
+ // This implementation assumes that the host program is the target.
+ // Clients generating code for a remote target should implement their own
+ // memory manager.
+#if defined(__linux__) && defined(__GLIBC__)
//===--------------------------------------------------------------------===//
// Function stubs that are invoked instead of certain library calls
//
@@ -80,15 +225,26 @@ void *RTDyldMemoryManager::getPointerToNamedFunction(const std::string &Name,
// not inlined, and hiding their real definitions in a separate archive file
// that the dynamic linker can't see. For more info, search for
// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
- if (Name == "stat") return (void*)(intptr_t)&stat;
- if (Name == "fstat") return (void*)(intptr_t)&fstat;
- if (Name == "lstat") return (void*)(intptr_t)&lstat;
- if (Name == "stat64") return (void*)(intptr_t)&stat64;
- if (Name == "fstat64") return (void*)(intptr_t)&fstat64;
- if (Name == "lstat64") return (void*)(intptr_t)&lstat64;
- if (Name == "atexit") return (void*)(intptr_t)&atexit;
- if (Name == "mknod") return (void*)(intptr_t)&mknod;
-#endif // __linux__
+ if (Name == "stat") return (uint64_t)&stat;
+ if (Name == "fstat") return (uint64_t)&fstat;
+ if (Name == "lstat") return (uint64_t)&lstat;
+ if (Name == "stat64") return (uint64_t)&stat64;
+ if (Name == "fstat64") return (uint64_t)&fstat64;
+ if (Name == "lstat64") return (uint64_t)&lstat64;
+ if (Name == "atexit") return (uint64_t)&atexit;
+ if (Name == "mknod") return (uint64_t)&mknod;
+#endif // __linux__ && __GLIBC__
+
+ // See ARM_MATH_IMPORTS definition for explanation
+#if defined(__BIONIC__) && defined(__arm__)
+ if (Name.compare(0, 8, "__aeabi_") == 0) {
+ // Check if the user has requested any of the functions listed in
+ // ARM_MATH_IMPORTS, and if so redirect to the statically linked symbol.
+#define ARM_MATH_CHECK(fn) if (Name == #fn) return (uint64_t)&fn;
+ ARM_MATH_IMPORTS(ARM_MATH_CHECK)
+#undef ARM_MATH_CHECK
+ }
+#endif
// We should not invoke parent's ctors/dtors from generated main()!
// On Mingw and Cygwin, the symbol __main is resolved to
@@ -96,23 +252,31 @@ void *RTDyldMemoryManager::getPointerToNamedFunction(const std::string &Name,
// (and register wrong callee's dtors with atexit(3)).
// We expect ExecutionEngine::runStaticConstructorsDestructors()
// is called before ExecutionEngine::runFunctionAsMain() is called.
- if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+ if (Name == "__main") return (uint64_t)&jit_noop;
const char *NameStr = Name.c_str();
void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
- if (Ptr) return Ptr;
+ if (Ptr)
+ return (uint64_t)Ptr;
// If it wasn't found and if it starts with an underscore ('_') character,
// try again without the underscore.
if (NameStr[0] == '_') {
Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
- if (Ptr) return Ptr;
+ if (Ptr)
+ return (uint64_t)Ptr;
}
+ return 0;
+}
- if (AbortOnFailure)
+void *RTDyldMemoryManager::getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure) {
+ uint64_t Addr = getSymbolAddress(Name);
+
+ if (!Addr && AbortOnFailure)
report_fatal_error("Program used external function '" + Name +
"' which could not be resolved!");
- return 0;
+ return (void*)Addr;
}
} // namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h b/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h
index 69e9dbe..6a514ea 100644
--- a/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h
+++ b/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h
@@ -16,6 +16,7 @@ namespace llvm {
/// Global access point for the JIT debugging interface.
class JITRegistrar {
+ virtual void anchor();
public:
/// Instantiates the JIT service.
JITRegistrar() {}
diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
index 89350cc..9cbde5d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
+++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
@@ -23,6 +23,7 @@ namespace llvm {
class ObjectImageCommon : public ObjectImage {
ObjectImageCommon(); // = delete
ObjectImageCommon(const ObjectImageCommon &other); // = delete
+ virtual void anchor();
protected:
object::ObjectFile *ObjFile;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 943622f..161135a 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -13,12 +13,14 @@
#define DEBUG_TYPE "dyld"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "JITRegistrar.h"
#include "ObjectImageCommon.h"
#include "RuntimeDyldELF.h"
#include "RuntimeDyldImpl.h"
#include "RuntimeDyldMachO.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MutexGuard.h"
#include "llvm/Object/ELF.h"
using namespace llvm;
@@ -27,30 +29,44 @@ using namespace llvm::object;
// Empty out-of-line virtual destructor as the key function.
RuntimeDyldImpl::~RuntimeDyldImpl() {}
+// Pin the JITRegistrar's and ObjectImage*'s vtables to this file.
+void JITRegistrar::anchor() {}
+void ObjectImage::anchor() {}
+void ObjectImageCommon::anchor() {}
+
namespace llvm {
-StringRef RuntimeDyldImpl::getEHFrameSection() {
- return StringRef();
+void RuntimeDyldImpl::registerEHFrames() {
+}
+
+void RuntimeDyldImpl::deregisterEHFrames() {
}
// Resolve the relocations for all symbols we currently know about.
void RuntimeDyldImpl::resolveRelocations() {
+ MutexGuard locked(lock);
+
// First, resolve relocations associated with external symbols.
resolveExternalSymbols();
// Just iterate over the sections we have and resolve all the relocations
// in them. Gross overkill, but it gets the job done.
for (int i = 0, e = Sections.size(); i != e; ++i) {
+ // The Section here (Sections[i]) refers to the section in which the
+ // symbol for the relocation is located. The SectionID in the relocation
+ // entry provides the section to which the relocation will be applied.
uint64_t Addr = Sections[i].LoadAddress;
DEBUG(dbgs() << "Resolving relocations Section #" << i
<< "\t" << format("%p", (uint8_t *)Addr)
<< "\n");
resolveRelocationList(Relocations[i], Addr);
+ Relocations.erase(i);
}
}
void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress,
uint64_t TargetAddress) {
+ MutexGuard locked(lock);
for (unsigned i = 0, e = Sections.size(); i != e; ++i) {
if (Sections[i].Address == LocalAddress) {
reassignSectionAddress(i, TargetAddress);
@@ -67,11 +83,15 @@ ObjectImage *RuntimeDyldImpl::createObjectImage(ObjectBuffer *InputBuffer) {
}
ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
+ MutexGuard locked(lock);
+
OwningPtr<ObjectImage> obj(createObjectImage(InputBuffer));
if (!obj)
report_fatal_error("Unable to create object image from memory buffer!");
+ // Save information about our target
Arch = (Triple::ArchType)obj->getArch();
+ IsTargetLittleEndian = obj->getObjectFile()->isLittleEndian();
// Symbols found in this object
StringMap<SymbolLoc> LocalSymbols;
@@ -166,6 +186,9 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
}
}
+ // Give the subclasses a chance to tie-up any loose ends.
+ finalizeLoad(LocalSections);
+
return obj.take();
}
@@ -175,8 +198,8 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj,
SymbolTableMap &SymbolTable) {
// Allocate memory for the section
unsigned SectionID = Sections.size();
- uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void*),
- SectionID, false);
+ uint8_t *Addr = MemMgr->allocateDataSection(
+ TotalSize, sizeof(void*), SectionID, StringRef(), false);
if (!Addr)
report_fatal_error("Unable to allocate memory for common symbols!");
uint64_t Offset = 0;
@@ -247,6 +270,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
bool IsZeroInit;
bool IsReadOnly;
uint64_t DataSize;
+ unsigned PaddingSize = 0;
StringRef Name;
Check(Section.isRequiredForExecution(IsRequired));
Check(Section.isVirtual(IsVirtual));
@@ -261,6 +285,12 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
StubBufSize += StubAlignment - EndAlignment;
}
+ // The .eh_frame section (at least on Linux) needs an extra four bytes padded
+ // with zeroes added at the end. For MachO objects, this section has a
+ // slightly different name, so this won't have any effect for MachO objects.
+ if (Name == ".eh_frame")
+ PaddingSize = 4;
+
unsigned Allocate;
unsigned SectionID = Sections.size();
uint8_t *Addr;
@@ -269,10 +299,11 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
// Some sections, such as debug info, don't need to be loaded for execution.
// Leave those where they are.
if (IsRequired) {
- Allocate = DataSize + StubBufSize;
+ Allocate = DataSize + PaddingSize + StubBufSize;
Addr = IsCode
- ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID)
- : MemMgr->allocateDataSection(Allocate, Alignment, SectionID, IsReadOnly);
+ ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID, Name)
+ : MemMgr->allocateDataSection(Allocate, Alignment, SectionID, Name,
+ IsReadOnly);
if (!Addr)
report_fatal_error("Unable to allocate section memory!");
@@ -286,6 +317,13 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
else
memcpy(Addr, pData, DataSize);
+ // Fill in any extra bytes we allocated for padding
+ if (PaddingSize != 0) {
+ memset(Addr + DataSize, 0, PaddingSize);
+ // Update the DataSize variable so that the stub offset is set correctly.
+ DataSize += PaddingSize;
+ }
+
DEBUG(dbgs() << "emitSection SectionID: " << SectionID
<< " Name: " << Name
<< " obj addr: " << format("%p", pData)
@@ -421,6 +459,10 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
writeInt16BE(Addr+6, 0x07F1); // brc 15,%r1
// 8-byte address stored at Addr + 8
return Addr;
+ } else if (Arch == Triple::x86_64) {
+ *Addr = 0xFF; // jmp
+ *(Addr+1) = 0x25; // rip
+ // 32-bit PC-relative address of the GOT entry will be stored at Addr+2
}
return Addr;
}
@@ -454,30 +496,52 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
}
void RuntimeDyldImpl::resolveExternalSymbols() {
- StringMap<RelocationList>::iterator i = ExternalSymbolRelocations.begin(),
- e = ExternalSymbolRelocations.end();
- for (; i != e; i++) {
+ while(!ExternalSymbolRelocations.empty()) {
+ StringMap<RelocationList>::iterator i = ExternalSymbolRelocations.begin();
+
StringRef Name = i->first();
- RelocationList &Relocs = i->second;
- SymbolTableMap::const_iterator Loc = GlobalSymbolTable.find(Name);
- if (Loc == GlobalSymbolTable.end()) {
- if (Name.size() == 0) {
- // This is an absolute symbol, use an address of zero.
- DEBUG(dbgs() << "Resolving absolute relocations." << "\n");
- resolveRelocationList(Relocs, 0);
+ if (Name.size() == 0) {
+ // This is an absolute symbol, use an address of zero.
+ DEBUG(dbgs() << "Resolving absolute relocations." << "\n");
+ RelocationList &Relocs = i->second;
+ resolveRelocationList(Relocs, 0);
+ } else {
+ uint64_t Addr = 0;
+ SymbolTableMap::const_iterator Loc = GlobalSymbolTable.find(Name);
+ if (Loc == GlobalSymbolTable.end()) {
+ // This is an external symbol, try to get its address from
+ // MemoryManager.
+ Addr = MemMgr->getSymbolAddress(Name.data());
+ // The call to getSymbolAddress may have caused additional modules to
+ // be loaded, which may have added new entries to the
+ // ExternalSymbolRelocations map. Consquently, we need to update our
+ // iterator. This is also why retrieval of the relocation list
+ // associated with this symbol is deferred until below this point.
+ // New entries may have been added to the relocation list.
+ i = ExternalSymbolRelocations.find(Name);
} else {
- // This is an external symbol, try to get its address from
- // MemoryManager.
- uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(),
- true);
- DEBUG(dbgs() << "Resolving relocations Name: " << Name
- << "\t" << format("%p", Addr)
- << "\n");
- resolveRelocationList(Relocs, (uintptr_t)Addr);
+ // We found the symbol in our global table. It was probably in a
+ // Module that we loaded previously.
+ SymbolLoc SymLoc = Loc->second;
+ Addr = getSectionLoadAddress(SymLoc.first) + SymLoc.second;
}
- } else {
- report_fatal_error("Expected external symbol");
+
+ // FIXME: Implement error handling that doesn't kill the host program!
+ if (!Addr)
+ report_fatal_error("Program used external function '" + Name +
+ "' which could not be resolved!");
+
+ updateGOTEntries(Name, Addr);
+ DEBUG(dbgs() << "Resolving relocations Name: " << Name
+ << "\t" << format("0x%lx", Addr)
+ << "\n");
+ // This list may have been updated when we called getSymbolAddress, so
+ // don't change this code to get the list earlier.
+ RelocationList &Relocs = i->second;
+ resolveRelocationList(Relocs, Addr);
}
+
+ ExternalSymbolRelocations.erase(i);
}
}
@@ -526,8 +590,10 @@ ObjectImage *RuntimeDyld::loadObject(ObjectBuffer *InputBuffer) {
case sys::fs::file_magic::bitcode:
case sys::fs::file_magic::archive:
case sys::fs::file_magic::coff_object:
+ case sys::fs::file_magic::coff_import_library:
case sys::fs::file_magic::pecoff_executable:
case sys::fs::file_magic::macho_universal_binary:
+ case sys::fs::file_magic::windows_resource:
report_fatal_error("Incompatible object format!");
}
} else {
@@ -539,10 +605,14 @@ ObjectImage *RuntimeDyld::loadObject(ObjectBuffer *InputBuffer) {
}
void *RuntimeDyld::getSymbolAddress(StringRef Name) {
+ if (!Dyld)
+ return NULL;
return Dyld->getSymbolAddress(Name);
}
uint64_t RuntimeDyld::getSymbolLoadAddress(StringRef Name) {
+ if (!Dyld)
+ return 0;
return Dyld->getSymbolLoadAddress(Name);
}
@@ -564,8 +634,14 @@ StringRef RuntimeDyld::getErrorString() {
return Dyld->getErrorString();
}
-StringRef RuntimeDyld::getEHFrameSection() {
- return Dyld->getEHFrameSection();
+void RuntimeDyld::registerEHFrames() {
+ if (Dyld)
+ Dyld->registerEHFrames();
+}
+
+void RuntimeDyld::deregisterEHFrames() {
+ if (Dyld)
+ Dyld->deregisterEHFrames();
}
} // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index cd99c3c..f2c69fc 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -22,7 +22,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/ObjectBuffer.h"
#include "llvm/ExecutionEngine/ObjectImage.h"
-#include "llvm/Object/ELF.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/ELF.h"
using namespace llvm;
@@ -151,12 +151,31 @@ void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef,
namespace llvm {
-StringRef RuntimeDyldELF::getEHFrameSection() {
- for (int i = 0, e = Sections.size(); i != e; ++i) {
- if (Sections[i].Name == ".eh_frame")
- return StringRef((const char*)Sections[i].Address, Sections[i].Size);
+void RuntimeDyldELF::registerEHFrames() {
+ if (!MemMgr)
+ return;
+ for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) {
+ SID EHFrameSID = UnregisteredEHFrameSections[i];
+ uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
+ uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
+ size_t EHFrameSize = Sections[EHFrameSID].Size;
+ MemMgr->registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
+ RegisteredEHFrameSections.push_back(EHFrameSID);
}
- return StringRef();
+ UnregisteredEHFrameSections.clear();
+}
+
+void RuntimeDyldELF::deregisterEHFrames() {
+ if (!MemMgr)
+ return;
+ for (int i = 0, e = RegisteredEHFrameSections.size(); i != e; ++i) {
+ SID EHFrameSID = RegisteredEHFrameSections[i];
+ uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
+ uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
+ size_t EHFrameSize = Sections[EHFrameSID].Size;
+ MemMgr->deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
+ }
+ RegisteredEHFrameSections.clear();
}
ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) {
@@ -202,7 +221,8 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
uint64_t Offset,
uint64_t Value,
uint32_t Type,
- int64_t Addend) {
+ int64_t Addend,
+ uint64_t SymOffset) {
switch (Type) {
default:
llvm_unreachable("Relocation type not implemented yet!");
@@ -227,6 +247,21 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
<< " at " << format("%p\n",Target));
break;
}
+ case ELF::R_X86_64_GOTPCREL: {
+ // findGOTEntry returns the 'G + GOT' part of the relocation calculation
+ // based on the load/target address of the GOT (not the current/local addr).
+ uint64_t GOTAddr = findGOTEntry(Value, SymOffset);
+ uint32_t *Target = reinterpret_cast<uint32_t*>(Section.Address + Offset);
+ uint64_t FinalAddress = Section.LoadAddress + Offset;
+ // The processRelocationRef method combines the symbol offset and the addend
+ // and in most cases that's what we want. For this relocation type, we need
+ // the raw addend, so we subtract the symbol offset to get it.
+ int64_t RealOffset = GOTAddr + Addend - SymOffset - FinalAddress;
+ assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN);
+ int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
+ *Target = TruncOffset;
+ break;
+ }
case ELF::R_X86_64_PC32: {
// Get the placeholder value from the generated object since
// a previous relocation attempt may have overwritten the loaded version
@@ -240,6 +275,16 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
*Target = TruncOffset;
break;
}
+ case ELF::R_X86_64_PC64: {
+ // Get the placeholder value from the generated object since
+ // a previous relocation attempt may have overwritten the loaded version
+ uint64_t *Placeholder = reinterpret_cast<uint64_t*>(Section.ObjAddress
+ + Offset);
+ uint64_t *Target = reinterpret_cast<uint64_t*>(Section.Address + Offset);
+ uint64_t FinalAddress = Section.LoadAddress + Offset;
+ *Target = *Placeholder + Value + Addend - FinalAddress;
+ break;
+ }
}
}
@@ -304,7 +349,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
}
case ELF::R_AARCH64_PREL32: {
uint64_t Result = Value + Addend - FinalAddress;
- assert(static_cast<int64_t>(Result) >= INT32_MIN &&
+ assert(static_cast<int64_t>(Result) >= INT32_MIN &&
static_cast<int64_t>(Result) <= UINT32_MAX);
*TargetPtr = static_cast<uint32_t>(Result & 0xffffffffU);
break;
@@ -316,7 +361,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
uint64_t BranchImm = Value + Addend - FinalAddress;
// "Check that -2^27 <= result < 2^27".
- assert(-(1LL << 27) <= static_cast<int64_t>(BranchImm) &&
+ assert(-(1LL << 27) <= static_cast<int64_t>(BranchImm) &&
static_cast<int64_t>(BranchImm) < (1LL << 27));
// AArch64 code is emitted with .rela relocations. The data already in any
@@ -341,7 +386,6 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
case ELF::R_AARCH64_MOVW_UABS_G2_NC: {
uint64_t Result = Value + Addend;
-
// AArch64 code is emitted with .rela relocations. The data already in any
// bits affected by the relocation on entry is garbage.
*TargetPtr &= 0xffe0001fU;
@@ -585,7 +629,7 @@ void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj,
// Finally compares the Symbol value and the target symbol offset
// to check if this .opd entry refers to the symbol the relocation
// points to.
- if (Rel.Addend != (intptr_t)TargetSymbolOffset)
+ if (Rel.Addend != (int64_t)TargetSymbolOffset)
continue;
section_iterator tsi(Obj.end_sections());
@@ -735,20 +779,42 @@ void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section,
}
}
+// The target location for the relocation is described by RE.SectionID and
+// RE.Offset. RE.SectionID can be used to find the SectionEntry. Each
+// SectionEntry has three members describing its location.
+// SectionEntry::Address is the address at which the section has been loaded
+// into memory in the current (host) process. SectionEntry::LoadAddress is the
+// address that the section will have in the target process.
+// SectionEntry::ObjAddress is the address of the bits for this section in the
+// original emitted object image (also in the current address space).
+//
+// Relocations will be applied as if the section were loaded at
+// SectionEntry::LoadAddress, but they will be applied at an address based
+// on SectionEntry::Address. SectionEntry::ObjAddress will be used to refer to
+// Target memory contents if they are required for value calculations.
+//
+// The Value parameter here is the load address of the symbol for the
+// relocation to be applied. For relocations which refer to symbols in the
+// current object Value will be the LoadAddress of the section in which
+// the symbol resides (RE.Addend provides additional information about the
+// symbol location). For external symbols, Value will be the address of the
+// symbol in the target address space.
void RuntimeDyldELF::resolveRelocation(const RelocationEntry &RE,
- uint64_t Value) {
+ uint64_t Value) {
const SectionEntry &Section = Sections[RE.SectionID];
- return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend);
+ return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend,
+ RE.SymOffset);
}
void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
uint64_t Offset,
uint64_t Value,
uint32_t Type,
- int64_t Addend) {
+ int64_t Addend,
+ uint64_t SymOffset) {
switch (Arch) {
case Triple::x86_64:
- resolveX86_64Relocation(Section, Offset, Value, Type, Addend);
+ resolveX86_64Relocation(Section, Offset, Value, Type, Addend, SymOffset);
break;
case Triple::x86:
resolveX86Relocation(Section, Offset,
@@ -811,6 +877,7 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID,
}
if (lsi != Symbols.end()) {
Value.SectionID = lsi->second.first;
+ Value.Offset = lsi->second.second;
Value.Addend = lsi->second.second + Addend;
} else {
// Search for the symbol in the global symbol table
@@ -819,6 +886,7 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID,
gsi = GlobalSymbolTable.find(TargetName.data());
if (gsi != GlobalSymbolTable.end()) {
Value.SectionID = gsi->second.first;
+ Value.Offset = gsi->second.second;
Value.Addend = gsi->second.second + Addend;
} else {
switch (SymType) {
@@ -841,9 +909,17 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID,
Value.Addend = Addend;
break;
}
+ case SymbolRef::ST_Data:
case SymbolRef::ST_Unknown: {
Value.SymbolName = TargetName.data();
Value.Addend = Addend;
+
+ // Absolute relocations will have a zero symbol ID (STN_UNDEF), which
+ // will manifest here as a NULL symbol name.
+ // We can set this as a valid (but empty) symbol name, and rely
+ // on addRelocationForSymbol to handle this.
+ if (!Value.SymbolName)
+ Value.SymbolName = "";
break;
}
default:
@@ -955,8 +1031,8 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID,
// Look up for existing stub.
StubMap::const_iterator i = Stubs.find(Value);
if (i != Stubs.end()) {
- resolveRelocation(Section, Offset,
- (uint64_t)Section.Address + i->second, RelType, 0);
+ RelocationEntry RE(SectionID, Offset, RelType, i->second);
+ addRelocationForSection(RE, SectionID);
DEBUG(dbgs() << " Stub function found\n");
} else {
// Create a new stub function.
@@ -981,9 +1057,8 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID,
addRelocationForSection(RELo, Value.SectionID);
}
- resolveRelocation(Section, Offset,
- (uint64_t)Section.Address + Section.StubOffset,
- RelType, 0);
+ RelocationEntry RE(SectionID, Offset, RelType, Section.StubOffset);
+ addRelocationForSection(RE, SectionID);
Section.StubOffset += getMaxStubSize();
}
} else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) {
@@ -1069,7 +1144,10 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID,
RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
// Extra check to avoid relocation againt empty symbols (usually
// the R_PPC64_TOC).
- if (Value.SymbolName && !TargetName.empty())
+ if (SymType != SymbolRef::ST_Unknown && TargetName.empty())
+ Value.SymbolName = NULL;
+
+ if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
else
addRelocationForSection(RE, Value.SectionID);
@@ -1121,8 +1199,67 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID,
ELF::R_390_PC32DBL, Addend);
else
resolveRelocation(Section, Offset, StubAddress, RelType, Addend);
+ } else if (Arch == Triple::x86_64 && RelType == ELF::R_X86_64_PLT32) {
+ // The way the PLT relocations normally work is that the linker allocates the
+ // PLT and this relocation makes a PC-relative call into the PLT. The PLT
+ // entry will then jump to an address provided by the GOT. On first call, the
+ // GOT address will point back into PLT code that resolves the symbol. After
+ // the first call, the GOT entry points to the actual function.
+ //
+ // For local functions we're ignoring all of that here and just replacing
+ // the PLT32 relocation type with PC32, which will translate the relocation
+ // into a PC-relative call directly to the function. For external symbols we
+ // can't be sure the function will be within 2^32 bytes of the call site, so
+ // we need to create a stub, which calls into the GOT. This case is
+ // equivalent to the usual PLT implementation except that we use the stub
+ // mechanism in RuntimeDyld (which puts stubs at the end of the section)
+ // rather than allocating a PLT section.
+ if (Value.SymbolName) {
+ // This is a call to an external function.
+ // Look for an existing stub.
+ SectionEntry &Section = Sections[SectionID];
+ StubMap::const_iterator i = Stubs.find(Value);
+ uintptr_t StubAddress;
+ if (i != Stubs.end()) {
+ StubAddress = uintptr_t(Section.Address) + i->second;
+ DEBUG(dbgs() << " Stub function found\n");
+ } else {
+ // Create a new stub function (equivalent to a PLT entry).
+ DEBUG(dbgs() << " Create a new stub function\n");
+
+ uintptr_t BaseAddress = uintptr_t(Section.Address);
+ uintptr_t StubAlignment = getStubAlignment();
+ StubAddress = (BaseAddress + Section.StubOffset +
+ StubAlignment - 1) & -StubAlignment;
+ unsigned StubOffset = StubAddress - BaseAddress;
+ Stubs[Value] = StubOffset;
+ createStubFunction((uint8_t *)StubAddress);
+
+ // Create a GOT entry for the external function.
+ GOTEntries.push_back(Value);
+
+ // Make our stub function a relative call to the GOT entry.
+ RelocationEntry RE(SectionID, StubOffset + 2,
+ ELF::R_X86_64_GOTPCREL, -4);
+ addRelocationForSymbol(RE, Value.SymbolName);
+
+ // Bump our stub offset counter
+ Section.StubOffset = StubOffset + getMaxStubSize();
+ }
+
+ // Make the target call a call into the stub table.
+ resolveRelocation(Section, Offset, StubAddress,
+ ELF::R_X86_64_PC32, Addend);
+ } else {
+ RelocationEntry RE(SectionID, Offset, ELF::R_X86_64_PC32, Value.Addend,
+ Value.Offset);
+ addRelocationForSection(RE, Value.SectionID);
+ }
} else {
- RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
+ if (Arch == Triple::x86_64 && RelType == ELF::R_X86_64_GOTPCREL) {
+ GOTEntries.push_back(Value);
+ }
+ RelocationEntry RE(SectionID, Offset, RelType, Value.Addend, Value.Offset);
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
else
@@ -1130,6 +1267,137 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID,
}
}
+void RuntimeDyldELF::updateGOTEntries(StringRef Name, uint64_t Addr) {
+
+ SmallVectorImpl<std::pair<SID, GOTRelocations> >::iterator it;
+ SmallVectorImpl<std::pair<SID, GOTRelocations> >::iterator end = GOTs.end();
+
+ for (it = GOTs.begin(); it != end; ++it) {
+ GOTRelocations &GOTEntries = it->second;
+ for (int i = 0, e = GOTEntries.size(); i != e; ++i) {
+ if (GOTEntries[i].SymbolName != 0 && GOTEntries[i].SymbolName == Name) {
+ GOTEntries[i].Offset = Addr;
+ }
+ }
+ }
+}
+
+size_t RuntimeDyldELF::getGOTEntrySize() {
+ // We don't use the GOT in all of these cases, but it's essentially free
+ // to put them all here.
+ size_t Result = 0;
+ switch (Arch) {
+ case Triple::x86_64:
+ case Triple::aarch64:
+ case Triple::ppc64:
+ case Triple::ppc64le:
+ case Triple::systemz:
+ Result = sizeof(uint64_t);
+ break;
+ case Triple::x86:
+ case Triple::arm:
+ case Triple::thumb:
+ case Triple::mips:
+ case Triple::mipsel:
+ Result = sizeof(uint32_t);
+ break;
+ default: llvm_unreachable("Unsupported CPU type!");
+ }
+ return Result;
+}
+
+uint64_t RuntimeDyldELF::findGOTEntry(uint64_t LoadAddress,
+ uint64_t Offset) {
+
+ const size_t GOTEntrySize = getGOTEntrySize();
+
+ SmallVectorImpl<std::pair<SID, GOTRelocations> >::const_iterator it;
+ SmallVectorImpl<std::pair<SID, GOTRelocations> >::const_iterator end = GOTs.end();
+
+ int GOTIndex = -1;
+ for (it = GOTs.begin(); it != end; ++it) {
+ SID GOTSectionID = it->first;
+ const GOTRelocations &GOTEntries = it->second;
+
+ // Find the matching entry in our vector.
+ uint64_t SymbolOffset = 0;
+ for (int i = 0, e = GOTEntries.size(); i != e; ++i) {
+ if (GOTEntries[i].SymbolName == 0) {
+ if (getSectionLoadAddress(GOTEntries[i].SectionID) == LoadAddress &&
+ GOTEntries[i].Offset == Offset) {
+ GOTIndex = i;
+ SymbolOffset = GOTEntries[i].Offset;
+ break;
+ }
+ } else {
+ // GOT entries for external symbols use the addend as the address when
+ // the external symbol has been resolved.
+ if (GOTEntries[i].Offset == LoadAddress) {
+ GOTIndex = i;
+ // Don't use the Addend here. The relocation handler will use it.
+ break;
+ }
+ }
+ }
+
+ if (GOTIndex != -1) {
+ if (GOTEntrySize == sizeof(uint64_t)) {
+ uint64_t *LocalGOTAddr = (uint64_t*)getSectionAddress(GOTSectionID);
+ // Fill in this entry with the address of the symbol being referenced.
+ LocalGOTAddr[GOTIndex] = LoadAddress + SymbolOffset;
+ } else {
+ uint32_t *LocalGOTAddr = (uint32_t*)getSectionAddress(GOTSectionID);
+ // Fill in this entry with the address of the symbol being referenced.
+ LocalGOTAddr[GOTIndex] = (uint32_t)(LoadAddress + SymbolOffset);
+ }
+
+ // Calculate the load address of this entry
+ return getSectionLoadAddress(GOTSectionID) + (GOTIndex * GOTEntrySize);
+ }
+ }
+
+ assert(GOTIndex != -1 && "Unable to find requested GOT entry.");
+ return 0;
+}
+
+void RuntimeDyldELF::finalizeLoad(ObjSectionToIDMap &SectionMap) {
+ // If necessary, allocate the global offset table
+ if (MemMgr) {
+ // Allocate the GOT if necessary
+ size_t numGOTEntries = GOTEntries.size();
+ if (numGOTEntries != 0) {
+ // Allocate memory for the section
+ unsigned SectionID = Sections.size();
+ size_t TotalSize = numGOTEntries * getGOTEntrySize();
+ uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, getGOTEntrySize(),
+ SectionID, ".got", false);
+ if (!Addr)
+ report_fatal_error("Unable to allocate memory for GOT!");
+
+ GOTs.push_back(std::make_pair(SectionID, GOTEntries));
+ Sections.push_back(SectionEntry(".got", Addr, TotalSize, 0));
+ // For now, initialize all GOT entries to zero. We'll fill them in as
+ // needed when GOT-based relocations are applied.
+ memset(Addr, 0, TotalSize);
+ }
+ }
+ else {
+ report_fatal_error("Unable to allocate memory for GOT!");
+ }
+
+ // Look for and record the EH frame section.
+ ObjSectionToIDMap::iterator i, e;
+ for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) {
+ const SectionRef &Section = i->first;
+ StringRef Name;
+ Section.getName(Name);
+ if (Name == ".eh_frame") {
+ UnregisteredEHFrameSections.push_back(i->second);
+ break;
+ }
+ }
+}
+
bool RuntimeDyldELF::isCompatibleFormat(const ObjectBuffer *Buffer) const {
if (Buffer->getBufferSize() < strlen(ELF::ElfMagic))
return false;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 794c7ec..3adf827 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -15,6 +15,7 @@
#define LLVM_RUNTIME_DYLD_ELF_H
#include "RuntimeDyldImpl.h"
+#include "llvm/ADT/DenseMap.h"
using namespace llvm;
@@ -35,13 +36,15 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
uint64_t Offset,
uint64_t Value,
uint32_t Type,
- int64_t Addend);
+ int64_t Addend,
+ uint64_t SymOffset=0);
void resolveX86_64Relocation(const SectionEntry &Section,
uint64_t Offset,
uint64_t Value,
uint32_t Type,
- int64_t Addend);
+ int64_t Addend,
+ uint64_t SymOffset);
void resolveX86Relocation(const SectionEntry &Section,
uint64_t Offset,
@@ -79,13 +82,55 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
uint32_t Type,
int64_t Addend);
+ unsigned getMaxStubSize() {
+ if (Arch == Triple::aarch64)
+ return 20; // movz; movk; movk; movk; br
+ if (Arch == Triple::arm || Arch == Triple::thumb)
+ return 8; // 32-bit instruction and 32-bit address
+ else if (Arch == Triple::mipsel || Arch == Triple::mips)
+ return 16;
+ else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le)
+ return 44;
+ else if (Arch == Triple::x86_64)
+ return 6; // 2-byte jmp instruction + 32-bit relative address
+ else if (Arch == Triple::systemz)
+ return 16;
+ else
+ return 0;
+ }
+
+ unsigned getStubAlignment() {
+ if (Arch == Triple::systemz)
+ return 8;
+ else
+ return 1;
+ }
+
uint64_t findPPC64TOC() const;
void findOPDEntrySection(ObjectImage &Obj,
ObjSectionToIDMap &LocalSections,
RelocationValueRef &Rel);
+ uint64_t findGOTEntry(uint64_t LoadAddr, uint64_t Offset);
+ size_t getGOTEntrySize();
+
+ virtual void updateGOTEntries(StringRef Name, uint64_t Addr);
+
+ // Relocation entries for symbols whose position-independant offset is
+ // updated in a global offset table.
+ typedef SmallVector<RelocationValueRef, 2> GOTRelocations;
+ GOTRelocations GOTEntries; // List of entries requiring finalization.
+ SmallVector<std::pair<SID, GOTRelocations>, 8> GOTs; // Allocated tables.
+
+ // When a module is loaded we save the SectionID of the EH frame section
+ // in a table until we receive a request to register all unregistered
+ // EH frame sections with the memory manager.
+ SmallVector<SID, 2> UnregisteredEHFrameSections;
+ SmallVector<SID, 2> RegisteredEHFrameSections;
+
public:
- RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+ RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm)
+ {}
virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value);
virtual void processRelocationRef(unsigned SectionID,
@@ -96,7 +141,9 @@ public:
StubMap &Stubs);
virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
- virtual StringRef getEHFrameSection();
+ virtual void registerEHFrames();
+ virtual void deregisterEHFrames();
+ virtual void finalizeLoad(ObjSectionToIDMap &SectionMap);
virtual ~RuntimeDyldELF();
};
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 14d945b..3014b30 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -25,6 +25,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/Mutex.h"
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
@@ -80,14 +81,18 @@ public:
unsigned SectionID;
/// Offset - offset into the section.
- uintptr_t Offset;
+ uint64_t Offset;
/// RelType - relocation type.
uint32_t RelType;
/// Addend - the relocation addend encoded in the instruction itself. Also
/// used to make a relocation section relative instead of symbol relative.
- intptr_t Addend;
+ int64_t Addend;
+
+ /// SymOffset - Section offset of the relocation entry's symbol (used for GOT
+ /// lookup).
+ uint64_t SymOffset;
/// True if this is a PCRel relocation (MachO specific).
bool IsPCRel;
@@ -97,26 +102,39 @@ public:
RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend)
: SectionID(id), Offset(offset), RelType(type), Addend(addend),
- IsPCRel(false), Size(0) {}
+ SymOffset(0), IsPCRel(false), Size(0) {}
+
+ RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend,
+ uint64_t symoffset)
+ : SectionID(id), Offset(offset), RelType(type), Addend(addend),
+ SymOffset(symoffset), IsPCRel(false), Size(0) {}
RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend,
bool IsPCRel, unsigned Size)
: SectionID(id), Offset(offset), RelType(type), Addend(addend),
- IsPCRel(IsPCRel), Size(Size) {}
+ SymOffset(0), IsPCRel(IsPCRel), Size(Size) {}
};
class RelocationValueRef {
public:
unsigned SectionID;
- intptr_t Addend;
+ uint64_t Offset;
+ int64_t Addend;
const char *SymbolName;
- RelocationValueRef(): SectionID(0), Addend(0), SymbolName(0) {}
+ RelocationValueRef(): SectionID(0), Offset(0), Addend(0), SymbolName(0) {}
inline bool operator==(const RelocationValueRef &Other) const {
- return std::memcmp(this, &Other, sizeof(RelocationValueRef)) == 0;
+ return SectionID == Other.SectionID && Offset == Other.Offset &&
+ Addend == Other.Addend && SymbolName == Other.SymbolName;
}
inline bool operator <(const RelocationValueRef &Other) const {
- return std::memcmp(this, &Other, sizeof(RelocationValueRef)) < 0;
+ if (SectionID != Other.SectionID)
+ return SectionID < Other.SectionID;
+ if (Offset != Other.Offset)
+ return Offset < Other.Offset;
+ if (Addend != Other.Addend)
+ return Addend < Other.Addend;
+ return SymbolName < Other.SymbolName;
}
};
@@ -130,6 +148,9 @@ protected:
typedef SmallVector<SectionEntry, 64> SectionList;
SectionList Sections;
+ typedef unsigned SID; // Type for SectionIDs
+ #define RTDYLD_INVALID_SECTION_ID ((SID)(-1))
+
// Keep a map of sections from object file to the SectionID which
// references it.
typedef std::map<SectionRef, unsigned> ObjSectionToIDMap;
@@ -164,30 +185,22 @@ protected:
typedef std::map<RelocationValueRef, uintptr_t> StubMap;
Triple::ArchType Arch;
-
- inline unsigned getMaxStubSize() {
- if (Arch == Triple::aarch64)
- return 20; // movz; movk; movk; movk; br
- if (Arch == Triple::arm || Arch == Triple::thumb)
- return 8; // 32-bit instruction and 32-bit address
- else if (Arch == Triple::mipsel || Arch == Triple::mips)
- return 16;
- else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le)
- return 44;
- else if (Arch == Triple::x86_64)
- return 8; // GOT
- else if (Arch == Triple::systemz)
- return 16;
- else
- return 0;
- }
-
- inline unsigned getStubAlignment() {
- if (Arch == Triple::systemz)
- return 8;
- else
- return 1;
- }
+ bool IsTargetLittleEndian;
+
+ // This mutex prevents simultaneously loading objects from two different
+ // threads. This keeps us from having to protect individual data structures
+ // and guarantees that section allocation requests to the memory manager
+ // won't be interleaved between modules. It is also used in mapSectionAddress
+ // and resolveRelocations to protect write access to internal data structures.
+ //
+ // loadObject may be called on the same thread during the handling of of
+ // processRelocations, and that's OK. The handling of the relocation lists
+ // is written in such a way as to work correctly if new elements are added to
+ // the end of the list while the list is being processed.
+ sys::Mutex lock;
+
+ virtual unsigned getMaxStubSize() = 0;
+ virtual unsigned getStubAlignment() = 0;
bool HasError;
std::string ErrorStr;
@@ -208,14 +221,14 @@ protected:
}
void writeInt16BE(uint8_t *Addr, uint16_t Value) {
- if (sys::IsLittleEndianHost)
+ if (IsTargetLittleEndian)
Value = sys::SwapByteOrder(Value);
*Addr = (Value >> 8) & 0xFF;
*(Addr+1) = Value & 0xFF;
}
void writeInt32BE(uint8_t *Addr, uint32_t Value) {
- if (sys::IsLittleEndianHost)
+ if (IsTargetLittleEndian)
Value = sys::SwapByteOrder(Value);
*Addr = (Value >> 24) & 0xFF;
*(Addr+1) = (Value >> 16) & 0xFF;
@@ -224,7 +237,7 @@ protected:
}
void writeInt64BE(uint8_t *Addr, uint64_t Value) {
- if (sys::IsLittleEndianHost)
+ if (IsTargetLittleEndian)
Value = sys::SwapByteOrder(Value);
*Addr = (Value >> 56) & 0xFF;
*(Addr+1) = (Value >> 48) & 0xFF;
@@ -292,6 +305,11 @@ protected:
/// \brief Resolve relocations to external symbols.
void resolveExternalSymbols();
+
+ /// \brief Update GOT entries for external symbols.
+ // The base class does nothing. ELF overrides this.
+ virtual void updateGOTEntries(StringRef Name, uint64_t Addr) {}
+
virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
public:
RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
@@ -303,18 +321,20 @@ public:
void *getSymbolAddress(StringRef Name) {
// FIXME: Just look up as a function for now. Overly simple of course.
// Work in progress.
- if (GlobalSymbolTable.find(Name) == GlobalSymbolTable.end())
+ SymbolTableMap::const_iterator pos = GlobalSymbolTable.find(Name);
+ if (pos == GlobalSymbolTable.end())
return 0;
- SymbolLoc Loc = GlobalSymbolTable.lookup(Name);
+ SymbolLoc Loc = pos->second;
return getSectionAddress(Loc.first) + Loc.second;
}
uint64_t getSymbolLoadAddress(StringRef Name) {
// FIXME: Just look up as a function for now. Overly simple of course.
// Work in progress.
- if (GlobalSymbolTable.find(Name) == GlobalSymbolTable.end())
+ SymbolTableMap::const_iterator pos = GlobalSymbolTable.find(Name);
+ if (pos == GlobalSymbolTable.end())
return 0;
- SymbolLoc Loc = GlobalSymbolTable.lookup(Name);
+ SymbolLoc Loc = pos->second;
return getSectionLoadAddress(Loc.first) + Loc.second;
}
@@ -335,7 +355,11 @@ public:
virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const = 0;
- virtual StringRef getEHFrameSection();
+ virtual void registerEHFrames();
+
+ virtual void deregisterEHFrames();
+
+ virtual void finalizeLoad(ObjSectionToIDMap &SectionMap) {}
};
} // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 0384b32..5b92867 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -55,35 +55,80 @@ static intptr_t computeDelta(SectionEntry *A, SectionEntry *B) {
return ObjDistance - MemDistance;
}
-StringRef RuntimeDyldMachO::getEHFrameSection() {
- SectionEntry *Text = NULL;
- SectionEntry *EHFrame = NULL;
- SectionEntry *ExceptTab = NULL;
- for (int i = 0, e = Sections.size(); i != e; ++i) {
- if (Sections[i].Name == "__eh_frame")
- EHFrame = &Sections[i];
- else if (Sections[i].Name == "__text")
- Text = &Sections[i];
- else if (Sections[i].Name == "__gcc_except_tab")
- ExceptTab = &Sections[i];
+void RuntimeDyldMachO::registerEHFrames() {
+
+ if (!MemMgr)
+ return;
+ for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) {
+ EHFrameRelatedSections &SectionInfo = UnregisteredEHFrameSections[i];
+ if (SectionInfo.EHFrameSID == RTDYLD_INVALID_SECTION_ID ||
+ SectionInfo.TextSID == RTDYLD_INVALID_SECTION_ID)
+ continue;
+ SectionEntry *Text = &Sections[SectionInfo.TextSID];
+ SectionEntry *EHFrame = &Sections[SectionInfo.EHFrameSID];
+ SectionEntry *ExceptTab = NULL;
+ if (SectionInfo.ExceptTabSID != RTDYLD_INVALID_SECTION_ID)
+ ExceptTab = &Sections[SectionInfo.ExceptTabSID];
+
+ intptr_t DeltaForText = computeDelta(Text, EHFrame);
+ intptr_t DeltaForEH = 0;
+ if (ExceptTab)
+ DeltaForEH = computeDelta(ExceptTab, EHFrame);
+
+ unsigned char *P = EHFrame->Address;
+ unsigned char *End = P + EHFrame->Size;
+ do {
+ P = processFDE(P, DeltaForText, DeltaForEH);
+ } while(P != End);
+
+ MemMgr->registerEHFrames(EHFrame->Address,
+ EHFrame->LoadAddress,
+ EHFrame->Size);
}
- if (Text == NULL || EHFrame == NULL)
- return StringRef();
-
- intptr_t DeltaForText = computeDelta(Text, EHFrame);
- intptr_t DeltaForEH = 0;
- if (ExceptTab)
- DeltaForEH = computeDelta(ExceptTab, EHFrame);
-
- unsigned char *P = EHFrame->Address;
- unsigned char *End = P + EHFrame->Size;
- do {
- P = processFDE(P, DeltaForText, DeltaForEH);
- } while(P != End);
+ UnregisteredEHFrameSections.clear();
+}
- return StringRef((char*)EHFrame->Address, EHFrame->Size);
+void RuntimeDyldMachO::finalizeLoad(ObjSectionToIDMap &SectionMap) {
+ unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID;
+ unsigned TextSID = RTDYLD_INVALID_SECTION_ID;
+ unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID;
+ ObjSectionToIDMap::iterator i, e;
+ for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) {
+ const SectionRef &Section = i->first;
+ StringRef Name;
+ Section.getName(Name);
+ if (Name == "__eh_frame")
+ EHFrameSID = i->second;
+ else if (Name == "__text")
+ TextSID = i->second;
+ else if (Name == "__gcc_except_tab")
+ ExceptTabSID = i->second;
+ }
+ UnregisteredEHFrameSections.push_back(EHFrameRelatedSections(EHFrameSID,
+ TextSID,
+ ExceptTabSID));
}
+// The target location for the relocation is described by RE.SectionID and
+// RE.Offset. RE.SectionID can be used to find the SectionEntry. Each
+// SectionEntry has three members describing its location.
+// SectionEntry::Address is the address at which the section has been loaded
+// into memory in the current (host) process. SectionEntry::LoadAddress is the
+// address that the section will have in the target process.
+// SectionEntry::ObjAddress is the address of the bits for this section in the
+// original emitted object image (also in the current address space).
+//
+// Relocations will be applied as if the section were loaded at
+// SectionEntry::LoadAddress, but they will be applied at an address based
+// on SectionEntry::Address. SectionEntry::ObjAddress will be used to refer to
+// Target memory contents if they are required for value calculations.
+//
+// The Value parameter here is the load address of the symbol for the
+// relocation to be applied. For relocations which refer to symbols in the
+// current object Value will be the LoadAddress of the section in which
+// the symbol resides (RE.Addend provides additional information about the
+// symbol location). For external symbols, Value will be the address of the
+// symbol in the target address space.
void RuntimeDyldMachO::resolveRelocation(const RelocationEntry &RE,
uint64_t Value) {
const SectionEntry &Section = Sections[RE.SectionID];
@@ -160,7 +205,7 @@ bool RuntimeDyldMachO::resolveI386Relocation(uint8_t *LocalAddress,
switch (Type) {
default:
llvm_unreachable("Invalid relocation type!");
- case macho::RIT_Vanilla: {
+ case MachO::GENERIC_RELOC_VANILLA: {
uint8_t *p = LocalAddress;
uint64_t ValueToWrite = Value + Addend;
for (unsigned i = 0; i < Size; ++i) {
@@ -169,9 +214,9 @@ bool RuntimeDyldMachO::resolveI386Relocation(uint8_t *LocalAddress,
}
return false;
}
- case macho::RIT_Difference:
- case macho::RIT_Generic_LocalDifference:
- case macho::RIT_Generic_PreboundLazyPointer:
+ case MachO::GENERIC_RELOC_SECTDIFF:
+ case MachO::GENERIC_RELOC_LOCAL_SECTDIFF:
+ case MachO::GENERIC_RELOC_PB_LA_PTR:
return Error("Relocation type not implemented yet!");
}
}
@@ -193,12 +238,12 @@ bool RuntimeDyldMachO::resolveX86_64Relocation(uint8_t *LocalAddress,
switch(Type) {
default:
llvm_unreachable("Invalid relocation type!");
- case macho::RIT_X86_64_Signed1:
- case macho::RIT_X86_64_Signed2:
- case macho::RIT_X86_64_Signed4:
- case macho::RIT_X86_64_Signed:
- case macho::RIT_X86_64_Unsigned:
- case macho::RIT_X86_64_Branch: {
+ case MachO::X86_64_RELOC_SIGNED_1:
+ case MachO::X86_64_RELOC_SIGNED_2:
+ case MachO::X86_64_RELOC_SIGNED_4:
+ case MachO::X86_64_RELOC_SIGNED:
+ case MachO::X86_64_RELOC_UNSIGNED:
+ case MachO::X86_64_RELOC_BRANCH: {
Value += Addend;
// Mask in the target value a byte at a time (we don't have an alignment
// guarantee for the target address, so this is safest).
@@ -209,10 +254,10 @@ bool RuntimeDyldMachO::resolveX86_64Relocation(uint8_t *LocalAddress,
}
return false;
}
- case macho::RIT_X86_64_GOTLoad:
- case macho::RIT_X86_64_GOT:
- case macho::RIT_X86_64_Subtractor:
- case macho::RIT_X86_64_TLV:
+ case MachO::X86_64_RELOC_GOT_LOAD:
+ case MachO::X86_64_RELOC_GOT:
+ case MachO::X86_64_RELOC_SUBTRACTOR:
+ case MachO::X86_64_RELOC_TLV:
return Error("Relocation type not implemented yet!");
}
}
@@ -237,7 +282,7 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
switch(Type) {
default:
llvm_unreachable("Invalid relocation type!");
- case macho::RIT_Vanilla: {
+ case MachO::ARM_RELOC_VANILLA: {
// Mask in the target value a byte at a time (we don't have an alignment
// guarantee for the target address, so this is safest).
uint8_t *p = (uint8_t*)LocalAddress;
@@ -247,7 +292,7 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
}
break;
}
- case macho::RIT_ARM_Branch24Bit: {
+ case MachO::ARM_RELOC_BR24: {
// Mask the value into the target address. We know instructions are
// 32-bit aligned, so we can do it all at once.
uint32_t *p = (uint32_t*)LocalAddress;
@@ -263,14 +308,14 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
*p = (*p & ~0xffffff) | Value;
break;
}
- case macho::RIT_ARM_ThumbBranch22Bit:
- case macho::RIT_ARM_ThumbBranch32Bit:
- case macho::RIT_ARM_Half:
- case macho::RIT_ARM_HalfDifference:
- case macho::RIT_Pair:
- case macho::RIT_Difference:
- case macho::RIT_ARM_LocalDifference:
- case macho::RIT_ARM_PreboundLazyPointer:
+ case MachO::ARM_THUMB_RELOC_BR22:
+ case MachO::ARM_THUMB_32BIT_BRANCH:
+ case MachO::ARM_RELOC_HALF:
+ case MachO::ARM_RELOC_HALF_SECTDIFF:
+ case MachO::ARM_RELOC_PAIR:
+ case MachO::ARM_RELOC_SECTDIFF:
+ case MachO::ARM_RELOC_LOCAL_SECTDIFF:
+ case MachO::ARM_RELOC_PB_LA_PTR:
return Error("Relocation type not implemented yet!");
}
return false;
@@ -284,9 +329,19 @@ void RuntimeDyldMachO::processRelocationRef(unsigned SectionID,
StubMap &Stubs) {
const ObjectFile *OF = Obj.getObjectFile();
const MachOObjectFile *MachO = static_cast<const MachOObjectFile*>(OF);
- macho::RelocationEntry RE = MachO->getRelocation(RelI.getRawDataRefImpl());
+ MachO::any_relocation_info RE= MachO->getRelocation(RelI.getRawDataRefImpl());
uint32_t RelType = MachO->getAnyRelocationType(RE);
+
+ // FIXME: Properly handle scattered relocations.
+ // For now, optimistically skip these: they can often be ignored, as
+ // the static linker will already have applied the relocation, and it
+ // only needs to be reapplied if symbols move relative to one another.
+ // Note: This will fail horribly where the relocations *do* need to be
+ // applied, but that was already the case.
+ if (MachO->isRelocationScattered(RE))
+ return;
+
RelocationValueRef Value;
SectionEntry &Section = Sections[SectionID];
@@ -329,7 +384,8 @@ void RuntimeDyldMachO::processRelocationRef(unsigned SectionID,
Value.Addend = Addend - Addr;
}
- if (Arch == Triple::x86_64 && RelType == macho::RIT_X86_64_GOT) {
+ if (Arch == Triple::x86_64 && (RelType == MachO::X86_64_RELOC_GOT ||
+ RelType == MachO::X86_64_RELOC_GOT_LOAD)) {
assert(IsPCRel);
assert(Size == 2);
StubMap::const_iterator i = Stubs.find(Value);
@@ -340,8 +396,7 @@ void RuntimeDyldMachO::processRelocationRef(unsigned SectionID,
Stubs[Value] = Section.StubOffset;
uint8_t *GOTEntry = Section.Address + Section.StubOffset;
RelocationEntry RE(SectionID, Section.StubOffset,
- macho::RIT_X86_64_Unsigned, Value.Addend - 4, false,
- 3);
+ MachO::X86_64_RELOC_UNSIGNED, 0, false, 3);
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
else
@@ -350,9 +405,9 @@ void RuntimeDyldMachO::processRelocationRef(unsigned SectionID,
Addr = GOTEntry;
}
resolveRelocation(Section, Offset, (uint64_t)Addr,
- macho::RIT_X86_64_Unsigned, 4, true, 2);
+ MachO::X86_64_RELOC_UNSIGNED, Value.Addend, true, 2);
} else if (Arch == Triple::arm &&
- (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) {
+ (RelType & 0xf) == MachO::ARM_RELOC_BR24) {
// This is an ARM branch relocation, need to use a stub function.
// Look up for existing stub.
@@ -367,7 +422,7 @@ void RuntimeDyldMachO::processRelocationRef(unsigned SectionID,
uint8_t *StubTargetAddr = createStubFunction(Section.Address +
Section.StubOffset);
RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
- macho::RIT_Vanilla, Value.Addend);
+ MachO::GENERIC_RELOC_VANILLA, Value.Addend);
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
else
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index df8d3bb..bbf6aa9 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -54,6 +54,35 @@ class RuntimeDyldMachO : public RuntimeDyldImpl {
int64_t Addend,
bool isPCRel,
unsigned Size);
+
+ unsigned getMaxStubSize() {
+ if (Arch == Triple::arm || Arch == Triple::thumb)
+ return 8; // 32-bit instruction and 32-bit address
+ else if (Arch == Triple::x86_64)
+ return 8; // GOT entry
+ else
+ return 0;
+ }
+
+ unsigned getStubAlignment() {
+ return 1;
+ }
+
+ struct EHFrameRelatedSections {
+ EHFrameRelatedSections() : EHFrameSID(RTDYLD_INVALID_SECTION_ID),
+ TextSID(RTDYLD_INVALID_SECTION_ID),
+ ExceptTabSID(RTDYLD_INVALID_SECTION_ID) {}
+ EHFrameRelatedSections(SID EH, SID T, SID Ex)
+ : EHFrameSID(EH), TextSID(T), ExceptTabSID(Ex) {}
+ SID EHFrameSID;
+ SID TextSID;
+ SID ExceptTabSID;
+ };
+
+ // When a module is loaded we save the SectionID of the EH frame section
+ // in a table until we receive a request to register all unregistered
+ // EH frame sections with the memory manager.
+ SmallVector<EHFrameRelatedSections, 2> UnregisteredEHFrameSections;
public:
RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
@@ -65,7 +94,8 @@ public:
const SymbolTableMap &Symbols,
StubMap &Stubs);
virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
- virtual StringRef getEHFrameSection();
+ virtual void registerEHFrames();
+ virtual void finalizeLoad(ObjSectionToIDMap &SectionMap);
};
} // end namespace llvm
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index 558d8b3..9b7d348 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -91,7 +91,7 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
// FIXME: non-iOS ARM FastISel is broken with MCJIT.
if (UseMCJIT &&
TheTriple.getArch() == Triple::arm &&
- TheTriple.getOS() != Triple::IOS &&
+ !TheTriple.isiOS() &&
OptLevel == CodeGenOpt::None) {
OptLevel = CodeGenOpt::Less;
}
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index f275305..7decffd 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -74,6 +74,8 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
default: Out << "cc" << cc; break;
case CallingConv::Fast: Out << "fastcc"; break;
case CallingConv::Cold: Out << "coldcc"; break;
+ case CallingConv::WebKit_JS: Out << "webkit_jscc"; break;
+ case CallingConv::AnyReg: Out << "anyregcc"; break;
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
@@ -1394,9 +1396,6 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT,
case GlobalValue::InternalLinkage: Out << "internal "; break;
case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break;
case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break;
- case GlobalValue::LinkOnceODRAutoHideLinkage:
- Out << "linkonce_odr_auto_hide ";
- break;
case GlobalValue::WeakAnyLinkage: Out << "weak "; break;
case GlobalValue::WeakODRLinkage: Out << "weak_odr "; break;
case GlobalValue::CommonLinkage: Out << "common "; break;
@@ -1647,6 +1646,10 @@ void AssemblyWriter::printFunction(const Function *F) {
Out << " align " << F->getAlignment();
if (F->hasGC())
Out << " gc \"" << F->getGC() << '"';
+ if (F->hasPrefixData()) {
+ Out << " prefix ";
+ writeOperand(F->getPrefixData(), true);
+ }
if (F->isDeclaration()) {
Out << '\n';
} else {
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index 9da3f96..ea954ac 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -94,6 +94,7 @@ public:
/// attribute enties, which are for target-dependent attributes.
class EnumAttributeImpl : public AttributeImpl {
+ virtual void anchor();
Attribute::AttrKind Kind;
protected:
@@ -108,6 +109,7 @@ public:
};
class AlignAttributeImpl : public EnumAttributeImpl {
+ virtual void anchor();
unsigned Align;
public:
@@ -122,6 +124,7 @@ public:
};
class StringAttributeImpl : public AttributeImpl {
+ virtual void anchor();
std::string Kind;
std::string Val;
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index f466d16..0f2b7a0 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -196,6 +196,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "noreturn";
if (hasAttribute(Attribute::NoUnwind))
return "nounwind";
+ if (hasAttribute(Attribute::OptimizeNone))
+ return "optnone";
if (hasAttribute(Attribute::OptimizeForSize))
return "optsize";
if (hasAttribute(Attribute::ReadNone))
@@ -284,7 +286,11 @@ bool Attribute::operator<(Attribute A) const {
// AttributeImpl Definition
//===----------------------------------------------------------------------===//
+// Pin the vtabels to this file.
AttributeImpl::~AttributeImpl() {}
+void EnumAttributeImpl::anchor() {}
+void AlignAttributeImpl::anchor() {}
+void StringAttributeImpl::anchor() {}
bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const {
if (isStringAttribute()) return false;
@@ -381,6 +387,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
case Attribute::Returned: return 1ULL << 39;
case Attribute::Cold: return 1ULL << 40;
case Attribute::Builtin: return 1ULL << 41;
+ case Attribute::OptimizeNone: return 1ULL << 42;
}
llvm_unreachable("Unsupported attribute type");
}
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index a4f5289..d12bf7b 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/AutoUpgrade.h"
+#include "llvm/DebugInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -88,6 +89,20 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
}
+ case 'o':
+ // We only need to change the name to match the mangling including the
+ // address space.
+ if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
+ Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
+ if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
+ F->setName(Name + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::objectsize, Tys);
+ return true;
+ }
+ }
+ break;
+
case 'x': {
if (Name.startswith("x86.sse2.pcmpeq.") ||
Name.startswith("x86.sse2.pcmpgt.") ||
@@ -97,6 +112,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "x86.avx.movnt.dq.256" ||
Name == "x86.avx.movnt.pd.256" ||
Name == "x86.avx.movnt.ps.256" ||
+ Name == "x86.sse42.crc32.64.8" ||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
NewFn = 0;
return true;
@@ -257,6 +273,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0),
CI->getArgOperand(1), Builder.getInt8(Imm));
+ } else if (Name == "llvm.x86.sse42.crc32.64.8") {
+ Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::x86_sse42_crc32_32_8);
+ Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
+ Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1));
+ Rep = Builder.CreateZExt(Rep, CI->getType(), "");
} else {
bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
if (Name == "llvm.x86.avx.vpermil.pd.256")
@@ -317,6 +339,14 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
return;
+ case Intrinsic::objectsize:
+ CI->replaceAllUsesWith(Builder.CreateCall2(NewFn,
+ CI->getArgOperand(0),
+ CI->getArgOperand(1),
+ Name));
+ CI->eraseFromParent();
+ return;
+
case Intrinsic::arm_neon_vclz: {
// Change name from llvm.arm.neon.vclz.* to llvm.ctlz.*
CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
@@ -391,3 +421,81 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
}
}
+void llvm::UpgradeInstWithTBAATag(Instruction *I) {
+ MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
+ assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
+ // Check if the tag uses struct-path aware TBAA format.
+ if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
+ return;
+
+ if (MD->getNumOperands() == 3) {
+ Value *Elts[] = {
+ MD->getOperand(0),
+ MD->getOperand(1)
+ };
+ MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
+ // Create a MDNode <ScalarType, ScalarType, offset 0, const>
+ Value *Elts2[] = {
+ ScalarType, ScalarType,
+ Constant::getNullValue(Type::getInt64Ty(I->getContext())),
+ MD->getOperand(2)
+ };
+ I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
+ } else {
+ // Create a MDNode <MD, MD, offset 0>
+ Value *Elts[] = {MD, MD,
+ Constant::getNullValue(Type::getInt64Ty(I->getContext()))};
+ I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
+ }
+}
+
+Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
+ Instruction *&Temp) {
+ if (Opc != Instruction::BitCast)
+ return 0;
+
+ Temp = 0;
+ Type *SrcTy = V->getType();
+ if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
+ SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
+ LLVMContext &Context = V->getContext();
+
+ // We have no information about target data layout, so we assume that
+ // the maximum pointer size is 64bit.
+ Type *MidTy = Type::getInt64Ty(Context);
+ Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
+
+ return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
+ }
+
+ return 0;
+}
+
+Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
+ if (Opc != Instruction::BitCast)
+ return 0;
+
+ Type *SrcTy = C->getType();
+ if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
+ SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
+ LLVMContext &Context = C->getContext();
+
+ // We have no information about target data layout, so we assume that
+ // the maximum pointer size is 64bit.
+ Type *MidTy = Type::getInt64Ty(Context);
+
+ return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
+ DestTy);
+ }
+
+ return 0;
+}
+
+/// Check the debug info version number, if it is out-dated, drop the debug
+/// info. Return true if module is modified.
+bool llvm::UpgradeDebugInfo(Module &M) {
+ if (getDebugMetadataVersionFromModule(M) == DEBUG_METADATA_VERSION)
+ return false;
+
+ return StripDebugInfo(M);
+}
diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt
index c2a4ee3..581946c 100644
--- a/lib/IR/CMakeLists.txt
+++ b/lib/IR/CMakeLists.txt
@@ -6,10 +6,10 @@ add_llvm_library(LLVMCore
ConstantFold.cpp
Constants.cpp
Core.cpp
+ DIBuilder.cpp
DataLayout.cpp
DebugInfo.cpp
DebugLoc.cpp
- DIBuilder.cpp
Dominators.cpp
Function.cpp
GCOV.cpp
@@ -23,6 +23,7 @@ add_llvm_library(LLVMCore
LLVMContext.cpp
LLVMContextImpl.cpp
LeakDetector.cpp
+ LegacyPassManager.cpp
Metadata.cpp
Module.cpp
Pass.cpp
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 8c5a983..f5e225c 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -689,6 +689,8 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
}
case Instruction::BitCast:
return FoldBitCast(V, DestTy);
+ case Instruction::AddrSpaceCast:
+ return 0;
}
}
@@ -1897,6 +1899,37 @@ static bool isInBoundsIndices(ArrayRef<IndexTy> Idxs) {
return true;
}
+/// \brief Test whether a given ConstantInt is in-range for a SequentialType.
+static bool isIndexInRangeOfSequentialType(const SequentialType *STy,
+ const ConstantInt *CI) {
+ if (const PointerType *PTy = dyn_cast<PointerType>(STy))
+ // Only handle pointers to sized types, not pointers to functions.
+ return PTy->getElementType()->isSized();
+
+ uint64_t NumElements = 0;
+ // Determine the number of elements in our sequential type.
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
+ NumElements = ATy->getNumElements();
+ else if (const VectorType *VTy = dyn_cast<VectorType>(STy))
+ NumElements = VTy->getNumElements();
+
+ assert((isa<ArrayType>(STy) || NumElements > 0) &&
+ "didn't expect non-array type to have zero elements!");
+
+ // We cannot bounds check the index if it doesn't fit in an int64_t.
+ if (CI->getValue().getActiveBits() > 64)
+ return false;
+
+ // A negative index or an index past the end of our sequential type is
+ // considered out-of-range.
+ int64_t IndexVal = CI->getSExtValue();
+ if (IndexVal < 0 || (NumElements > 0 && (uint64_t)IndexVal >= NumElements))
+ return false;
+
+ // Otherwise, it is in-range.
+ return true;
+}
+
template<typename IndexTy>
static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
bool inBounds,
@@ -1940,7 +1973,32 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
I != E; ++I)
LastTy = *I;
- if ((LastTy && isa<SequentialType>(LastTy)) || Idx0->isNullValue()) {
+ // We cannot combine indices if doing so would take us outside of an
+ // array or vector. Doing otherwise could trick us if we evaluated such a
+ // GEP as part of a load.
+ //
+ // e.g. Consider if the original GEP was:
+ // i8* getelementptr ({ [2 x i8], i32, i8, [3 x i8] }* @main.c,
+ // i32 0, i32 0, i64 0)
+ //
+ // If we then tried to offset it by '8' to get to the third element,
+ // an i8, we should *not* get:
+ // i8* getelementptr ({ [2 x i8], i32, i8, [3 x i8] }* @main.c,
+ // i32 0, i32 0, i64 8)
+ //
+ // This GEP tries to index array element '8 which runs out-of-bounds.
+ // Subsequent evaluation would get confused and produce erroneous results.
+ //
+ // The following prohibits such a GEP from being formed by checking to see
+ // if the index is in-range with respect to an array or vector.
+ bool PerformFold = false;
+ if (Idx0->isNullValue())
+ PerformFold = true;
+ else if (SequentialType *STy = dyn_cast_or_null<SequentialType>(LastTy))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx0))
+ PerformFold = isIndexInRangeOfSequentialType(STy, CI);
+
+ if (PerformFold) {
SmallVector<Value*, 16> NewIndices;
NewIndices.reserve(Idxs.size() + CE->getNumOperands());
for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
@@ -2000,8 +2058,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
}
// Check to see if any array indices are not within the corresponding
- // notional array bounds. If so, try to determine if they can be factored
- // out into preceding dimensions.
+ // notional array or vector bounds. If so, try to determine if they can be
+ // factored out into preceding dimensions.
bool Unknown = false;
SmallVector<Constant *, 8> NewIdxs;
Type *Ty = C->getType();
@@ -2009,16 +2067,20 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
for (unsigned i = 0, e = Idxs.size(); i != e;
Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
- if (ArrayType *ATy = dyn_cast<ArrayType>(Ty))
- if (ATy->getNumElements() <= INT64_MAX &&
- ATy->getNumElements() != 0 &&
- CI->getSExtValue() >= (int64_t)ATy->getNumElements()) {
+ if (isa<ArrayType>(Ty) || isa<VectorType>(Ty))
+ if (CI->getSExtValue() > 0 &&
+ !isIndexInRangeOfSequentialType(cast<SequentialType>(Ty), CI)) {
if (isa<SequentialType>(Prev)) {
// It's out of range, but we can factor it into the prior
// dimension.
NewIdxs.resize(Idxs.size());
- ConstantInt *Factor = ConstantInt::get(CI->getType(),
- ATy->getNumElements());
+ uint64_t NumElements = 0;
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+ NumElements = ATy->getNumElements();
+ else
+ NumElements = cast<VectorType>(Ty)->getNumElements();
+
+ ConstantInt *Factor = ConstantInt::get(CI->getType(), NumElements);
NewIdxs[i] = ConstantExpr::getSRem(CI, Factor);
Constant *PrevIdx = cast<Constant>(Idxs[i-1]);
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 9067b34..690ac59 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -1126,6 +1126,7 @@ getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const {
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
return ConstantExpr::getCast(getOpcode(), Ops[0], Ty);
case Instruction::Select:
return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
@@ -1461,6 +1462,7 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) {
case Instruction::PtrToInt: return getPtrToInt(C, Ty);
case Instruction::IntToPtr: return getIntToPtr(C, Ty);
case Instruction::BitCast: return getBitCast(C, Ty);
+ case Instruction::AddrSpaceCast: return getAddrSpaceCast(C, Ty);
}
}
@@ -1489,10 +1491,26 @@ Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) {
if (Ty->isIntOrIntVectorTy())
return getPtrToInt(S, Ty);
+
+ unsigned SrcAS = S->getType()->getPointerAddressSpace();
+ if (Ty->isPtrOrPtrVectorTy() && SrcAS != Ty->getPointerAddressSpace())
+ return getAddrSpaceCast(S, Ty);
+
return getBitCast(S, Ty);
}
-Constant *ConstantExpr::getIntegerCast(Constant *C, Type *Ty,
+Constant *ConstantExpr::getPointerBitCastOrAddrSpaceCast(Constant *S,
+ Type *Ty) {
+ assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast");
+ assert(Ty->isPtrOrPtrVectorTy() && "Invalid cast");
+
+ if (S->getType()->getPointerAddressSpace() != Ty->getPointerAddressSpace())
+ return getAddrSpaceCast(S, Ty);
+
+ return getBitCast(S, Ty);
+}
+
+Constant *ConstantExpr::getIntegerCast(Constant *C, Type *Ty,
bool isSigned) {
assert(C->getType()->isIntOrIntVectorTy() &&
Ty->isIntOrIntVectorTy() && "Invalid cast");
@@ -1662,6 +1680,13 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy) {
return getFoldedCast(Instruction::BitCast, C, DstTy);
}
+Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy) {
+ assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) &&
+ "Invalid constantexpr addrspacecast!");
+
+ return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy);
+}
+
Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
unsigned Flags) {
// Check the operands for consistency first.
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 66610bd..c70f459 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -97,7 +97,7 @@ LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) {
return wrap(new Module(ModuleID, getGlobalContext()));
}
-LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID,
+LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID,
LLVMContextRef C) {
return wrap(new Module(ModuleID, *unwrap(C)));
}
@@ -147,6 +147,16 @@ LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename,
return false;
}
+char *LLVMPrintModuleToString(LLVMModuleRef M) {
+ std::string buf;
+ raw_string_ostream os(buf);
+
+ unwrap(M)->print(os, NULL);
+ os.flush();
+
+ return strdup(buf.c_str());
+}
+
/*--.. Operations on inline assembler ......................................--*/
void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) {
unwrap(M)->setModuleInlineAsm(StringRef(Asm));
@@ -210,6 +220,20 @@ LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) {
return wrap(&unwrap(Ty)->getContext());
}
+void LLVMDumpType(LLVMTypeRef Ty) {
+ return unwrap(Ty)->dump();
+}
+
+char *LLVMPrintTypeToString(LLVMTypeRef Ty) {
+ std::string buf;
+ raw_string_ostream os(buf);
+
+ unwrap(Ty)->print(os);
+ os.flush();
+
+ return strdup(buf.c_str());
+}
+
/*--.. Operations on integer types .........................................--*/
LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C) {
@@ -450,6 +474,16 @@ void LLVMDumpValue(LLVMValueRef Val) {
unwrap(Val)->dump();
}
+char* LLVMPrintValueToString(LLVMValueRef Val) {
+ std::string buf;
+ raw_string_ostream os(buf);
+
+ unwrap(Val)->print(os);
+ os.flush();
+
+ return strdup(buf.c_str());
+}
+
void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal));
}
@@ -681,7 +715,7 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
return wrap(ConstantDataArray::getString(*unwrap(C), StringRef(Str, Length),
DontNullTerminate == 0));
}
-LLVMValueRef LLVMConstStructInContext(LLVMContextRef C,
+LLVMValueRef LLVMConstStructInContext(LLVMContextRef C,
LLVMValueRef *ConstantVals,
unsigned Count, LLVMBool Packed) {
Constant **Elements = unwrap<Constant>(ConstantVals, Count);
@@ -999,6 +1033,12 @@ LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
unwrap(ToType)));
}
+LLVMValueRef LLVMConstAddrSpaceCast(LLVMValueRef ConstantVal,
+ LLVMTypeRef ToType) {
+ return wrap(ConstantExpr::getAddrSpaceCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType)));
+}
+
LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
LLVMTypeRef ToType) {
return wrap(ConstantExpr::getZExtOrBitCast(unwrap<Constant>(ConstantVal),
@@ -1110,8 +1150,6 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
return LLVMLinkOnceAnyLinkage;
case GlobalValue::LinkOnceODRLinkage:
return LLVMLinkOnceODRLinkage;
- case GlobalValue::LinkOnceODRAutoHideLinkage:
- return LLVMLinkOnceODRAutoHideLinkage;
case GlobalValue::WeakAnyLinkage:
return LLVMWeakAnyLinkage;
case GlobalValue::WeakODRLinkage:
@@ -1156,7 +1194,8 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
GV->setLinkage(GlobalValue::LinkOnceODRLinkage);
break;
case LLVMLinkOnceODRAutoHideLinkage:
- GV->setLinkage(GlobalValue::LinkOnceODRAutoHideLinkage);
+ DEBUG(errs() << "LLVMSetLinkage(): LLVMLinkOnceODRAutoHideLinkage is no "
+ "longer supported.");
break;
case LLVMWeakAnyLinkage:
GV->setLinkage(GlobalValue::WeakAnyLinkage);
@@ -1216,12 +1255,30 @@ void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz) {
->setVisibility(static_cast<GlobalValue::VisibilityTypes>(Viz));
}
-unsigned LLVMGetAlignment(LLVMValueRef Global) {
- return unwrap<GlobalValue>(Global)->getAlignment();
+/*--.. Operations on global variables, load and store instructions .........--*/
+
+unsigned LLVMGetAlignment(LLVMValueRef V) {
+ Value *P = unwrap<Value>(V);
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(P))
+ return GV->getAlignment();
+ if (LoadInst *LI = dyn_cast<LoadInst>(P))
+ return LI->getAlignment();
+ if (StoreInst *SI = dyn_cast<StoreInst>(P))
+ return SI->getAlignment();
+
+ llvm_unreachable("only GlobalValue, LoadInst and StoreInst have alignment");
}
-void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes) {
- unwrap<GlobalValue>(Global)->setAlignment(Bytes);
+void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) {
+ Value *P = unwrap<Value>(V);
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(P))
+ GV->setAlignment(Bytes);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(P))
+ LI->setAlignment(Bytes);
+ else if (StoreInst *SI = dyn_cast<StoreInst>(P))
+ SI->setAlignment(Bytes);
+ else
+ llvm_unreachable("only GlobalValue, LoadInst and StoreInst have alignment");
}
/*--.. Operations on global variables ......................................--*/
@@ -1553,7 +1610,7 @@ LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
return (LLVMAttribute)A->getParent()->getAttributes().
Raw(A->getArgNo()+1);
}
-
+
void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
Argument *A = unwrap<Argument>(Arg);
@@ -1745,7 +1802,7 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
llvm_unreachable("LLVMSetInstructionCallConv applies only to call and invoke!");
}
-void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index,
+void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index,
LLVMAttribute PA) {
CallSite Call = CallSite(unwrap<Instruction>(Instr));
AttrBuilder B(PA);
@@ -1755,7 +1812,7 @@ void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index,
index, B)));
}
-void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index,
+void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index,
LLVMAttribute PA) {
CallSite Call = CallSite(unwrap<Instruction>(Instr));
AttrBuilder B(PA);
@@ -1765,7 +1822,7 @@ void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index,
index, B)));
}
-void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
unsigned align) {
CallSite Call = CallSite(unwrap<Instruction>(Instr));
AttrBuilder B;
@@ -2119,8 +2176,8 @@ LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
- Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
- ITy, unwrap(Ty), AllocSize,
+ Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
+ ITy, unwrap(Ty), AllocSize,
0, 0, "");
return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
}
@@ -2130,8 +2187,8 @@ LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
- Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
- ITy, unwrap(Ty), AllocSize,
+ Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
+ ITy, unwrap(Ty), AllocSize,
unwrap(Val), 0, "");
return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
}
@@ -2157,7 +2214,7 @@ LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name));
}
-LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val,
+LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val,
LLVMValueRef PointerVal) {
return wrap(unwrap(B)->CreateStore(unwrap(Val), unwrap(PointerVal)));
}
@@ -2267,6 +2324,11 @@ LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef B, LLVMValueRef Val,
return wrap(unwrap(B)->CreateBitCast(unwrap(Val), unwrap(DestTy), Name));
}
+LLVMValueRef LLVMBuildAddrSpaceCast(LLVMBuilderRef B, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateAddrSpaceCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
LLVMTypeRef DestTy, const char *Name) {
return wrap(unwrap(B)->CreateZExtOrBitCast(unwrap(Val), unwrap(DestTy),
@@ -2396,9 +2458,9 @@ LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
}
-LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,
- LLVMValueRef PTR, LLVMValueRef Val,
- LLVMAtomicOrdering ordering,
+LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,
+ LLVMValueRef PTR, LLVMValueRef Val,
+ LLVMAtomicOrdering ordering,
LLVMBool singleThread) {
AtomicRMWInst::BinOp intop;
switch (op) {
@@ -2421,14 +2483,14 @@ LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,
case LLVMAtomicOrderingMonotonic: intordering = Monotonic; break;
case LLVMAtomicOrderingAcquire: intordering = Acquire; break;
case LLVMAtomicOrderingRelease: intordering = Release; break;
- case LLVMAtomicOrderingAcquireRelease:
- intordering = AcquireRelease;
+ case LLVMAtomicOrderingAcquireRelease:
+ intordering = AcquireRelease;
break;
- case LLVMAtomicOrderingSequentiallyConsistent:
- intordering = SequentiallyConsistent;
+ case LLVMAtomicOrderingSequentiallyConsistent:
+ intordering = SequentiallyConsistent;
break;
}
- return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val),
+ return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val),
intordering, singleThread ? SingleThread : CrossThread));
}
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 3005f77..c4a9f41 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -30,17 +30,24 @@ static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
}
DIBuilder::DIBuilder(Module &m)
- : M(m), VMContext(M.getContext()), TempEnumTypes(0),
- TempRetainTypes(0), TempSubprograms(0), TempGVs(0), DeclareFn(0),
- ValueFn(0)
-{}
+ : M(m), VMContext(M.getContext()), TempEnumTypes(0), TempRetainTypes(0),
+ TempSubprograms(0), TempGVs(0), DeclareFn(0), ValueFn(0) {}
/// finalize - Construct any deferred debug info descriptors.
void DIBuilder::finalize() {
DIArray Enums = getOrCreateArray(AllEnumTypes);
DIType(TempEnumTypes).replaceAllUsesWith(Enums);
- DIArray RetainTypes = getOrCreateArray(AllRetainTypes);
+ SmallVector<Value *, 16> RetainValues;
+ // Declarations and definitions of the same type may be retained. Some
+ // clients RAUW these pairs, leaving duplicates in the retained types
+ // list. Use a set to remove the duplicates while we transform the
+ // TrackingVHs back into Values.
+ SmallPtrSet<Value *, 16> RetainSet;
+ for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++)
+ if (RetainSet.insert(AllRetainTypes[I]))
+ RetainValues.push_back(AllRetainTypes[I]);
+ DIArray RetainTypes = getOrCreateArray(RetainValues);
DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes);
DIArray SPs = getOrCreateArray(AllSubprograms);
@@ -79,7 +86,7 @@ static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename,
assert(!Filename.empty() && "Unable to create file without name");
Value *Pair[] = {
MDString::get(VMContext, Filename),
- MDString::get(VMContext, Directory),
+ MDString::get(VMContext, Directory)
};
return MDNode::get(VMContext, Pair);
}
@@ -274,7 +281,7 @@ DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- FromTy
+ FromTy.getRef()
};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
@@ -294,7 +301,7 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- PointeeTy
+ PointeeTy.getRef()
};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
@@ -308,12 +315,12 @@ DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy,
NULL, // Unused
NULL,
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
- ConstantInt::get(Type::getInt64Ty(VMContext), 0),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- PointeeTy,
- Base
+ PointeeTy.getRef(),
+ Base.getRef()
};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
@@ -333,7 +340,7 @@ DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- RTy
+ RTy.getRef()
};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
@@ -346,14 +353,14 @@ DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
File.getFileNode(),
- getNonCompileUnitScope(Context),
+ DIScope(getNonCompileUnitScope(Context)).getRef(),
MDString::get(VMContext, Name),
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- Ty
+ Ty.getRef()
};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
@@ -366,56 +373,59 @@ DIDerivedType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_friend),
NULL,
- Ty,
+ Ty.getRef(),
NULL, // Name
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- FriendTy
+ FriendTy.getRef()
};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
/// createInheritance - Create debugging information entry to establish
/// inheritance relationship between two types.
-DIDerivedType DIBuilder::createInheritance(
- DIType Ty, DIType BaseTy, uint64_t BaseOffset, unsigned Flags) {
+DIDerivedType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
+ uint64_t BaseOffset,
+ unsigned Flags) {
assert(Ty.isType() && "Unable to create inheritance");
// TAG_inheritance is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
NULL,
- Ty,
+ Ty.getRef(),
NULL, // Name
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset),
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- BaseTy
+ BaseTy.getRef()
};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
/// createMemberType - Create debugging information entry for a member.
-DIDerivedType DIBuilder::createMemberType(
- DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
- uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
- unsigned Flags, DIType Ty) {
+DIDerivedType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType Ty) {
// TAG_member is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
File.getFileNode(),
- getNonCompileUnitScope(Scope),
+ DIScope(getNonCompileUnitScope(Scope)).getRef(),
MDString::get(VMContext, Name),
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- Ty
+ Ty.getRef()
};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
@@ -432,14 +442,14 @@ DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name,
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
File.getFileNode(),
- getNonCompileUnitScope(Scope),
+ DIScope(getNonCompileUnitScope(Scope)).getRef(),
MDString::get(VMContext, Name),
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0/*SizeInBits*/),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0/*AlignInBits*/),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0/*OffsetInBits*/),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- Ty,
+ Ty.getRef(),
Val
};
return DIDerivedType(MDNode::get(VMContext, Elts));
@@ -448,13 +458,11 @@ DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name,
/// createObjCIVar - Create debugging information entry for Objective-C
/// instance variable.
DIDerivedType
-DIBuilder::createObjCIVar(StringRef Name,
- DIFile File, unsigned LineNumber,
+DIBuilder::createObjCIVar(StringRef Name, DIFile File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t AlignInBits,
- uint64_t OffsetInBits, unsigned Flags,
- DIType Ty, StringRef PropertyName,
- StringRef GetterName, StringRef SetterName,
- unsigned PropertyAttributes) {
+ uint64_t OffsetInBits, unsigned Flags, DIType Ty,
+ StringRef PropertyName, StringRef GetterName,
+ StringRef SetterName, unsigned PropertyAttributes) {
// TAG_member is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
@@ -477,12 +485,12 @@ DIBuilder::createObjCIVar(StringRef Name,
/// createObjCIVar - Create debugging information entry for Objective-C
/// instance variable.
-DIDerivedType
-DIBuilder::createObjCIVar(StringRef Name,
- DIFile File, unsigned LineNumber,
- uint64_t SizeInBits, uint64_t AlignInBits,
- uint64_t OffsetInBits, unsigned Flags,
- DIType Ty, MDNode *PropertyNode) {
+DIDerivedType DIBuilder::createObjCIVar(StringRef Name, DIFile File,
+ unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType Ty, MDNode *PropertyNode) {
// TAG_member is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
@@ -502,12 +510,10 @@ DIBuilder::createObjCIVar(StringRef Name,
/// createObjCProperty - Create debugging information entry for Objective-C
/// property.
-DIObjCProperty DIBuilder::createObjCProperty(StringRef Name,
- DIFile File, unsigned LineNumber,
- StringRef GetterName,
- StringRef SetterName,
- unsigned PropertyAttributes,
- DIType Ty) {
+DIObjCProperty
+DIBuilder::createObjCProperty(StringRef Name, DIFile File, unsigned LineNumber,
+ StringRef GetterName, StringRef SetterName,
+ unsigned PropertyAttributes, DIType Ty) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property),
MDString::get(VMContext, Name),
@@ -529,9 +535,9 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
unsigned ColumnNo) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
- getNonCompileUnitScope(Context),
+ DIScope(getNonCompileUnitScope(Context)).getRef(),
MDString::get(VMContext, Name),
- Ty,
+ Ty.getRef(),
File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
@@ -547,9 +553,9 @@ DIBuilder::createTemplateValueParameter(unsigned Tag, DIDescriptor Context,
unsigned ColumnNo) {
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
- getNonCompileUnitScope(Context),
+ DIScope(getNonCompileUnitScope(Context)).getRef(),
MDString::get(VMContext, Name),
- Ty,
+ Ty.getRef(),
Val,
File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
@@ -598,30 +604,34 @@ DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
uint64_t OffsetInBits,
unsigned Flags, DIType DerivedFrom,
DIArray Elements,
- MDNode *VTableHolder,
- MDNode *TemplateParams) {
+ DIType VTableHolder,
+ MDNode *TemplateParams,
+ StringRef UniqueIdentifier) {
assert((!Context || Context.isScope() || Context.isType()) &&
"createClassType should be called with a valid Context");
// TAG_class_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
File.getFileNode(),
- getNonCompileUnitScope(Context),
+ DIScope(getNonCompileUnitScope(Context)).getRef(),
MDString::get(VMContext, Name),
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits),
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- DerivedFrom,
+ DerivedFrom.getRef(),
Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- VTableHolder,
- TemplateParams
+ VTableHolder.getRef(),
+ TemplateParams,
+ UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier)
};
DICompositeType R(MDNode::get(VMContext, Elts));
assert(R.isCompositeType() &&
"createClassType should return a DICompositeType");
+ if (!UniqueIdentifier.empty())
+ retainType(R);
return R;
}
@@ -634,27 +644,31 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context,
unsigned Flags, DIType DerivedFrom,
DIArray Elements,
unsigned RunTimeLang,
- MDNode *VTableHolder) {
+ DIType VTableHolder,
+ StringRef UniqueIdentifier) {
// TAG_structure_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
File.getFileNode(),
- getNonCompileUnitScope(Context),
+ DIScope(getNonCompileUnitScope(Context)).getRef(),
MDString::get(VMContext, Name),
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- DerivedFrom,
+ DerivedFrom.getRef(),
Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
- VTableHolder,
+ VTableHolder.getRef(),
NULL,
+ UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier)
};
DICompositeType R(MDNode::get(VMContext, Elts));
assert(R.isCompositeType() &&
"createStructType should return a DICompositeType");
+ if (!UniqueIdentifier.empty())
+ retainType(R);
return R;
}
@@ -664,45 +678,52 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
uint64_t SizeInBits,
uint64_t AlignInBits, unsigned Flags,
DIArray Elements,
- unsigned RunTimeLang) {
+ unsigned RunTimeLang,
+ StringRef UniqueIdentifier) {
// TAG_union_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
File.getFileNode(),
- getNonCompileUnitScope(Scope),
+ DIScope(getNonCompileUnitScope(Scope)).getRef(),
MDString::get(VMContext, Name),
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
NULL,
Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
- NULL
+ NULL,
+ NULL,
+ UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier)
};
- return DICompositeType(MDNode::get(VMContext, Elts));
+ DICompositeType R(MDNode::get(VMContext, Elts));
+ if (!UniqueIdentifier.empty())
+ retainType(R);
+ return R;
}
/// createSubroutineType - Create subroutine type.
-DICompositeType
-DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
+DICompositeType DIBuilder::createSubroutineType(DIFile File,
+ DIArray ParameterTypes) {
// TAG_subroutine_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
Constant::getNullValue(Type::getInt32Ty(VMContext)),
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ NULL,
MDString::get(VMContext, ""),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
NULL,
ParameterTypes,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- Constant::getNullValue(Type::getInt32Ty(VMContext))
+ NULL,
+ NULL,
+ NULL // Type Identifer
};
return DICompositeType(MDNode::get(VMContext, Elts));
}
@@ -712,26 +733,30 @@ DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
DICompositeType DIBuilder::createEnumerationType(
DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
- DIType UnderlyingType) {
+ DIType UnderlyingType, StringRef UniqueIdentifier) {
// TAG_enumeration_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
File.getFileNode(),
- getNonCompileUnitScope(Scope),
+ DIScope(getNonCompileUnitScope(Scope)).getRef(),
MDString::get(VMContext, Name),
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- UnderlyingType,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ UnderlyingType.getRef(),
Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- Constant::getNullValue(Type::getInt32Ty(VMContext))
+ NULL,
+ NULL,
+ UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier)
};
- MDNode *Node = MDNode::get(VMContext, Elts);
- AllEnumTypes.push_back(Node);
- return DICompositeType(Node);
+ DICompositeType CTy(MDNode::get(VMContext, Elts));
+ AllEnumTypes.push_back(CTy);
+ if (!UniqueIdentifier.empty())
+ retainType(CTy);
+ return CTy;
}
/// createArrayType - Create debugging information entry for an array.
@@ -743,15 +768,17 @@ DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
NULL, // Filename/Directory,
NULL, // Unused
MDString::get(VMContext, ""),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), Size),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ Ty.getRef(),
Subscripts,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- Constant::getNullValue(Type::getInt32Ty(VMContext))
+ NULL,
+ NULL,
+ NULL // Type Identifer
};
return DICompositeType(MDNode::get(VMContext, Elts));
}
@@ -759,22 +786,23 @@ DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
/// createVectorType - Create debugging information entry for a vector.
DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
DIType Ty, DIArray Subscripts) {
-
// A vector is an array type with the FlagVector flag applied.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
NULL, // Filename/Directory,
NULL, // Unused
MDString::get(VMContext, ""),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), Size),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset
ConstantInt::get(Type::getInt32Ty(VMContext), DIType::FlagVector),
- Ty,
+ Ty.getRef(),
Subscripts,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- Constant::getNullValue(Type::getInt32Ty(VMContext))
+ NULL,
+ NULL,
+ NULL // Type Identifer
};
return DICompositeType(MDNode::get(VMContext, Elts));
}
@@ -787,17 +815,14 @@ DIType DIBuilder::createArtificialType(DIType Ty) {
SmallVector<Value *, 9> Elts;
MDNode *N = Ty;
assert (N && "Unexpected input DIType!");
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- if (Value *V = N->getOperand(i))
- Elts.push_back(V);
- else
- Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
- }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Elts.push_back(N->getOperand(i));
unsigned CurFlags = Ty.getFlags();
CurFlags = CurFlags | DIType::FlagArtificial;
// Flags are stored at this slot.
+ // FIXME: Add an enum for this magic value.
Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
return DIType(MDNode::get(VMContext, Elts));
@@ -812,17 +837,14 @@ DIType DIBuilder::createObjectPointerType(DIType Ty) {
SmallVector<Value *, 9> Elts;
MDNode *N = Ty;
assert (N && "Unexpected input DIType!");
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- if (Value *V = N->getOperand(i))
- Elts.push_back(V);
- else
- Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
- }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Elts.push_back(N->getOperand(i));
unsigned CurFlags = Ty.getFlags();
CurFlags = CurFlags | (DIType::FlagObjectPointer | DIType::FlagArtificial);
// Flags are stored at this slot.
+ // FIXME: Add an enum for this magic value.
Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
return DIType(MDNode::get(VMContext, Elts));
@@ -831,7 +853,7 @@ DIType DIBuilder::createObjectPointerType(DIType Ty) {
/// retainType - Retain DIType in a module even if it is not referenced
/// through debug info anchors.
void DIBuilder::retainType(DIType T) {
- AllRetainTypes.push_back(T);
+ AllRetainTypes.push_back(TrackingVH<MDNode>(T));
}
/// createUnspecifiedParameter - Create unspeicified type descriptor
@@ -845,31 +867,35 @@ DIDescriptor DIBuilder::createUnspecifiedParameter() {
/// createForwardDecl - Create a temporary forward-declared type that
/// can be RAUW'd if the full type is seen.
-DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name,
- DIDescriptor Scope, DIFile F,
- unsigned Line, unsigned RuntimeLang,
- uint64_t SizeInBits,
- uint64_t AlignInBits) {
+DICompositeType
+DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
+ DIFile F, unsigned Line, unsigned RuntimeLang,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ StringRef UniqueIdentifier) {
// Create a temporary MDNode.
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
F.getFileNode(),
- getNonCompileUnitScope(Scope),
+ DIScope(getNonCompileUnitScope(Scope)).getRef(),
MDString::get(VMContext, Name),
ConstantInt::get(Type::getInt32Ty(VMContext), Line),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- ConstantInt::get(Type::getInt32Ty(VMContext),
- DIDescriptor::FlagFwdDecl),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), DIDescriptor::FlagFwdDecl),
NULL,
DIArray(),
- ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
+ ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
+ NULL,
+ NULL, //TemplateParams
+ UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier)
};
MDNode *Node = MDNode::getTemporary(VMContext, Elts);
- DIType RetTy(Node);
- assert(RetTy.isType() &&
+ DICompositeType RetTy(Node);
+ assert(RetTy.isCompositeType() &&
"createForwardDecl result should be a DIType");
+ if (!UniqueIdentifier.empty())
+ retainType(RetTy);
return RetTy;
}
@@ -895,10 +921,11 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
}
/// \brief Create a new descriptor for the specified global.
-DIGlobalVariable DIBuilder::
-createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile F,
- unsigned LineNumber, DIType Ty, bool isLocalToUnit,
- Value *Val) {
+DIGlobalVariable DIBuilder::createGlobalVariable(StringRef Name,
+ StringRef LinkageName,
+ DIFile F, unsigned LineNumber,
+ DIType Ty, bool isLocalToUnit,
+ Value *Val) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_variable),
Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -920,19 +947,22 @@ createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile F,
}
/// \brief Create a new descriptor for the specified global.
-DIGlobalVariable DIBuilder::
-createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
- DIType Ty, bool isLocalToUnit, Value *Val) {
+DIGlobalVariable DIBuilder::createGlobalVariable(StringRef Name, DIFile F,
+ unsigned LineNumber, DIType Ty,
+ bool isLocalToUnit,
+ Value *Val) {
return createGlobalVariable(Name, Name, F, LineNumber, Ty, isLocalToUnit,
Val);
}
/// createStaticVariable - Create a new descriptor for the specified static
/// variable.
-DIGlobalVariable DIBuilder::
-createStaticVariable(DIDescriptor Context, StringRef Name,
- StringRef LinkageName, DIFile F, unsigned LineNumber,
- DIType Ty, bool isLocalToUnit, Value *Val, MDNode *Decl) {
+DIGlobalVariable DIBuilder::createStaticVariable(DIDescriptor Context,
+ StringRef Name,
+ StringRef LinkageName,
+ DIFile F, unsigned LineNumber,
+ DIType Ty, bool isLocalToUnit,
+ Value *Val, MDNode *Decl) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_variable),
Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -1012,24 +1042,38 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
}
/// createFunction - Create a new descriptor for the specified function.
-DISubprogram DIBuilder::createFunction(DIDescriptor Context,
- StringRef Name,
- StringRef LinkageName,
- DIFile File, unsigned LineNo,
- DICompositeType Ty,
+/// FIXME: this is added for dragonegg. Once we update dragonegg
+/// to call resolve function, this will be removed.
+DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
+ StringRef LinkageName, DIFile File,
+ unsigned LineNo, DICompositeType Ty,
+ bool isLocalToUnit, bool isDefinition,
+ unsigned ScopeLine, unsigned Flags,
+ bool isOptimized, Function *Fn,
+ MDNode *TParams, MDNode *Decl) {
+ // dragonegg does not generate identifier for types, so using an empty map
+ // to resolve the context should be fine.
+ DITypeIdentifierMap EmptyMap;
+ return createFunction(Context.resolve(EmptyMap), Name, LinkageName, File,
+ LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine,
+ Flags, isOptimized, Fn, TParams, Decl);
+}
+
+/// createFunction - Create a new descriptor for the specified function.
+DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name,
+ StringRef LinkageName, DIFile File,
+ unsigned LineNo, DICompositeType Ty,
bool isLocalToUnit, bool isDefinition,
- unsigned ScopeLine,
- unsigned Flags, bool isOptimized,
- Function *Fn,
- MDNode *TParams,
- MDNode *Decl) {
+ unsigned ScopeLine, unsigned Flags,
+ bool isOptimized, Function *Fn,
+ MDNode *TParams, MDNode *Decl) {
assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type &&
"function types should be subroutines");
Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
File.getFileNode(),
- getNonCompileUnitScope(Context),
+ DIScope(getNonCompileUnitScope(Context)).getRef(),
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
MDString::get(VMContext, LinkageName),
@@ -1059,26 +1103,24 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
}
/// createMethod - Create a new descriptor for the specified C++ method.
-DISubprogram DIBuilder::createMethod(DIDescriptor Context,
- StringRef Name,
- StringRef LinkageName,
- DIFile F,
+DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name,
+ StringRef LinkageName, DIFile F,
unsigned LineNo, DICompositeType Ty,
- bool isLocalToUnit,
- bool isDefinition,
+ bool isLocalToUnit, bool isDefinition,
unsigned VK, unsigned VIndex,
- MDNode *VTableHolder,
- unsigned Flags,
- bool isOptimized,
- Function *Fn,
+ DIType VTableHolder, unsigned Flags,
+ bool isOptimized, Function *Fn,
MDNode *TParam) {
assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type &&
"function types should be subroutines");
+ assert(getNonCompileUnitScope(Context) &&
+ "Methods should have both a Context and a context that isn't "
+ "the compile unit.");
Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
F.getFileNode(),
- getNonCompileUnitScope(Context),
+ DIScope(Context).getRef(),
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
MDString::get(VMContext, LinkageName),
@@ -1086,9 +1128,9 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
Ty,
ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
- ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
+ ConstantInt::get(Type::getInt32Ty(VMContext), VK),
ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
- VTableHolder,
+ VTableHolder.getRef(),
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
Fn,
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index d786d33..6bdc09e 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -629,6 +629,13 @@ Type *DataLayout::getSmallestLegalIntType(LLVMContext &C, unsigned Width) const
return 0;
}
+unsigned DataLayout::getLargestLegalIntTypeSize() const {
+ unsigned MaxWidth = 0;
+ for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i)
+ MaxWidth = std::max<unsigned>(MaxWidth, LegalIntWidths[i]);
+ return MaxWidth;
+}
+
uint64_t DataLayout::getIndexedOffset(Type *ptrTy,
ArrayRef<Value *> Indices) const {
Type *Ty = ptrTy;
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index ff37542..70a756f 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -75,8 +75,8 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
return 0;
if (Elt < DbgNode->getNumOperands())
- if (ConstantInt *CI
- = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
+ if (ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
return CI->getZExtValue();
return 0;
@@ -87,8 +87,8 @@ int64_t DIDescriptor::getInt64Field(unsigned Elt) const {
return 0;
if (Elt < DbgNode->getNumOperands())
- if (ConstantInt *CI
- = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
+ if (ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
return CI->getSExtValue();
return 0;
@@ -104,7 +104,7 @@ GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
return 0;
if (Elt < DbgNode->getNumOperands())
- return dyn_cast_or_null<GlobalVariable>(DbgNode->getOperand(Elt));
+ return dyn_cast_or_null<GlobalVariable>(DbgNode->getOperand(Elt));
return 0;
}
@@ -113,7 +113,7 @@ Constant *DIDescriptor::getConstantField(unsigned Elt) const {
return 0;
if (Elt < DbgNode->getNumOperands())
- return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt));
+ return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt));
return 0;
}
@@ -122,7 +122,7 @@ Function *DIDescriptor::getFunctionField(unsigned Elt) const {
return 0;
if (Elt < DbgNode->getNumOperands())
- return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt));
+ return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt));
return 0;
}
@@ -131,19 +131,17 @@ void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) {
return;
if (Elt < DbgNode->getNumOperands()) {
- MDNode *Node = const_cast<MDNode*>(DbgNode);
+ MDNode *Node = const_cast<MDNode *>(DbgNode);
Node->replaceOperandWith(Elt, F);
}
}
unsigned DIVariable::getNumAddrElements() const {
- return DbgNode->getNumOperands()-8;
+ return DbgNode->getNumOperands() - 8;
}
/// getInlinedAt - If this variable is inlined then return inline location.
-MDNode *DIVariable::getInlinedAt() const {
- return getNodeField(DbgNode, 7);
-}
+MDNode *DIVariable::getInlinedAt() const { return getNodeField(DbgNode, 7); }
//===----------------------------------------------------------------------===//
// Predicates
@@ -152,7 +150,8 @@ MDNode *DIVariable::getInlinedAt() const {
/// isBasicType - Return true if the specified tag is legal for
/// DIBasicType.
bool DIDescriptor::isBasicType() const {
- if (!DbgNode) return false;
+ if (!DbgNode)
+ return false;
switch (getTag()) {
case dwarf::DW_TAG_base_type:
case dwarf::DW_TAG_unspecified_type:
@@ -164,7 +163,8 @@ bool DIDescriptor::isBasicType() const {
/// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
bool DIDescriptor::isDerivedType() const {
- if (!DbgNode) return false;
+ if (!DbgNode)
+ return false;
switch (getTag()) {
case dwarf::DW_TAG_typedef:
case dwarf::DW_TAG_pointer_type:
@@ -187,7 +187,8 @@ bool DIDescriptor::isDerivedType() const {
/// isCompositeType - Return true if the specified tag is legal for
/// DICompositeType.
bool DIDescriptor::isCompositeType() const {
- if (!DbgNode) return false;
+ if (!DbgNode)
+ return false;
switch (getTag()) {
case dwarf::DW_TAG_array_type:
case dwarf::DW_TAG_structure_type:
@@ -203,7 +204,8 @@ bool DIDescriptor::isCompositeType() const {
/// isVariable - Return true if the specified tag is legal for DIVariable.
bool DIDescriptor::isVariable() const {
- if (!DbgNode) return false;
+ if (!DbgNode)
+ return false;
switch (getTag()) {
case dwarf::DW_TAG_auto_variable:
case dwarf::DW_TAG_arg_variable:
@@ -240,17 +242,19 @@ bool DIDescriptor::isUnspecifiedParameter() const {
/// isScope - Return true if the specified tag is one of the scope
/// related tag.
bool DIDescriptor::isScope() const {
- if (!DbgNode) return false;
+ if (!DbgNode)
+ return false;
switch (getTag()) {
case dwarf::DW_TAG_compile_unit:
case dwarf::DW_TAG_lexical_block:
case dwarf::DW_TAG_subprogram:
case dwarf::DW_TAG_namespace:
+ case dwarf::DW_TAG_file_type:
return true;
default:
break;
}
- return false;
+ return isType();
}
/// isTemplateTypeParameter - Return true if the specified tag is
@@ -286,13 +290,13 @@ bool DIDescriptor::isNameSpace() const {
/// lexical block with an extra file.
bool DIDescriptor::isLexicalBlockFile() const {
return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
- (DbgNode->getNumOperands() == 3);
+ (DbgNode->getNumOperands() == 3);
}
/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
bool DIDescriptor::isLexicalBlock() const {
return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
- (DbgNode->getNumOperands() > 3);
+ (DbgNode->getNumOperands() > 3);
}
/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
@@ -339,10 +343,10 @@ void DIType::replaceAllUsesWith(DIDescriptor &D) {
// this detail by allowing a value to be replaced with replaceAllUsesWith()
// itself.
if (DbgNode != D) {
- MDNode *Node = const_cast<MDNode*>(DbgNode);
+ MDNode *Node = const_cast<MDNode *>(DbgNode);
const MDNode *DN = D;
const Value *V = cast_or_null<Value>(DN);
- Node->replaceAllUsesWith(const_cast<Value*>(V));
+ Node->replaceAllUsesWith(const_cast<Value *>(V));
MDNode::deleteTemporary(Node);
}
}
@@ -359,31 +363,14 @@ void DIType::replaceAllUsesWith(MDNode *D) {
// this detail by allowing a value to be replaced with replaceAllUsesWith()
// itself.
if (DbgNode != D) {
- MDNode *Node = const_cast<MDNode*>(DbgNode);
+ MDNode *Node = const_cast<MDNode *>(DbgNode);
const MDNode *DN = D;
const Value *V = cast_or_null<Value>(DN);
- Node->replaceAllUsesWith(const_cast<Value*>(V));
+ Node->replaceAllUsesWith(const_cast<Value *>(V));
MDNode::deleteTemporary(Node);
}
}
-/// isUnsignedDIType - Return true if type encoding is unsigned.
-bool DIType::isUnsignedDIType() {
- DIDerivedType DTy(DbgNode);
- if (DTy.Verify())
- return DTy.getTypeDerivedFrom().isUnsignedDIType();
-
- DIBasicType BTy(DbgNode);
- if (BTy.Verify()) {
- unsigned Encoding = BTy.getEncoding();
- if (Encoding == dwarf::DW_ATE_unsigned ||
- Encoding == dwarf::DW_ATE_unsigned_char ||
- Encoding == dwarf::DW_ATE_boolean)
- return true;
- }
- return false;
-}
-
/// Verify - Verify that a compile unit is well formed.
bool DICompileUnit::Verify() const {
if (!isCompileUnit())
@@ -413,22 +400,53 @@ static bool fieldIsMDNode(const MDNode *DbgNode, unsigned Elt) {
// FIXME: This function should return true, if the field is null or the field
// is indeed a MDNode: return !Fld || isa<MDNode>(Fld).
Value *Fld = getField(DbgNode, Elt);
- if (Fld && isa<MDString>(Fld) &&
- !cast<MDString>(Fld)->getString().empty())
+ if (Fld && isa<MDString>(Fld) && !cast<MDString>(Fld)->getString().empty())
return false;
return true;
}
+/// Check if a field at position Elt of a MDNode is a MDString.
+static bool fieldIsMDString(const MDNode *DbgNode, unsigned Elt) {
+ Value *Fld = getField(DbgNode, Elt);
+ return !Fld || isa<MDString>(Fld);
+}
+
+/// Check if a value can be a reference to a type.
+static bool isTypeRef(const Value *Val) {
+ return !Val ||
+ (isa<MDString>(Val) && !cast<MDString>(Val)->getString().empty()) ||
+ (isa<MDNode>(Val) && DIType(cast<MDNode>(Val)).isType());
+}
+
+/// Check if a field at position Elt of a MDNode can be a reference to a type.
+static bool fieldIsTypeRef(const MDNode *DbgNode, unsigned Elt) {
+ Value *Fld = getField(DbgNode, Elt);
+ return isTypeRef(Fld);
+}
+
+/// Check if a value can be a ScopeRef.
+static bool isScopeRef(const Value *Val) {
+ return !Val ||
+ (isa<MDString>(Val) && !cast<MDString>(Val)->getString().empty()) ||
+ (isa<MDNode>(Val) && DIScope(cast<MDNode>(Val)).isScope());
+}
+
+/// Check if a field at position Elt of a MDNode can be a ScopeRef.
+static bool fieldIsScopeRef(const MDNode *DbgNode, unsigned Elt) {
+ Value *Fld = getField(DbgNode, Elt);
+ return isScopeRef(Fld);
+}
+
/// Verify - Verify that a type descriptor is well formed.
bool DIType::Verify() const {
if (!isType())
return false;
// Make sure Context @ field 2 is MDNode.
- if (!fieldIsMDNode(DbgNode, 2))
+ if (!fieldIsScopeRef(DbgNode, 2))
return false;
// FIXME: Sink this into the various subclass verifies.
- unsigned Tag = getTag();
+ uint16_t Tag = getTag();
if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type &&
Tag != dwarf::DW_TAG_ptr_to_member_type &&
@@ -460,12 +478,12 @@ bool DIBasicType::Verify() const {
/// Verify - Verify that a derived type descriptor is well formed.
bool DIDerivedType::Verify() const {
- // Make sure DerivedFrom @ field 9 is MDNode.
- if (!fieldIsMDNode(DbgNode, 9))
+ // Make sure DerivedFrom @ field 9 is TypeRef.
+ if (!fieldIsTypeRef(DbgNode, 9))
return false;
if (getTag() == dwarf::DW_TAG_ptr_to_member_type)
- // Make sure ClassType @ field 10 is MDNode.
- if (!fieldIsMDNode(DbgNode, 10))
+ // Make sure ClassType @ field 10 is a TypeRef.
+ if (!fieldIsTypeRef(DbgNode, 10))
return false;
return isDerivedType() && DbgNode->getNumOperands() >= 10 &&
@@ -477,13 +495,17 @@ bool DICompositeType::Verify() const {
if (!isCompositeType())
return false;
- // Make sure DerivedFrom @ field 9 and ContainingType @ field 12 are MDNodes.
- if (!fieldIsMDNode(DbgNode, 9))
+ // Make sure DerivedFrom @ field 9 and ContainingType @ field 12 are TypeRef.
+ if (!fieldIsTypeRef(DbgNode, 9))
return false;
- if (!fieldIsMDNode(DbgNode, 12))
+ if (!fieldIsTypeRef(DbgNode, 12))
return false;
- return DbgNode->getNumOperands() >= 10 && DbgNode->getNumOperands() <= 14;
+ // Make sure the type identifier at field 14 is MDString, it can be null.
+ if (!fieldIsMDString(DbgNode, 14))
+ return false;
+
+ return DbgNode->getNumOperands() == 15;
}
/// Verify - Verify that a subprogram descriptor is well formed.
@@ -491,13 +513,13 @@ bool DISubprogram::Verify() const {
if (!isSubprogram())
return false;
- // Make sure context @ field 2 and type @ field 7 are MDNodes.
- if (!fieldIsMDNode(DbgNode, 2))
+ // Make sure context @ field 2 is a ScopeRef and type @ field 7 is a MDNode.
+ if (!fieldIsScopeRef(DbgNode, 2))
return false;
if (!fieldIsMDNode(DbgNode, 7))
return false;
// Containing type @ field 12.
- if (!fieldIsMDNode(DbgNode, 12))
+ if (!fieldIsTypeRef(DbgNode, 12))
return false;
return DbgNode->getNumOperands() == 20;
}
@@ -550,9 +572,7 @@ bool DINameSpace::Verify() const {
}
/// \brief Retrieve the MDNode for the directory/file pair.
-MDNode *DIFile::getFileNode() const {
- return getNodeField(DbgNode, 1);
-}
+MDNode *DIFile::getFileNode() const { return getNodeField(DbgNode, 1); }
/// \brief Verify that the file descriptor is well formed.
bool DIFile::Verify() const {
@@ -595,56 +615,77 @@ bool DIImportedEntity::Verify() const {
(DbgNode->getNumOperands() == 4 || DbgNode->getNumOperands() == 5);
}
-/// getOriginalTypeSize - If this type is derived from a base type then
-/// return base type size.
-uint64_t DIDerivedType::getOriginalTypeSize() const {
- unsigned Tag = getTag();
-
- if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
- Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
- Tag != dwarf::DW_TAG_restrict_type)
- return getSizeInBits();
-
- DIType BaseType = getTypeDerivedFrom();
-
- // If this type is not derived from any type then take conservative approach.
- if (!BaseType.isValid())
- return getSizeInBits();
-
- // If this is a derived type, go ahead and get the base type, unless it's a
- // reference then it's just the size of the field. Pointer types have no need
- // of this since they're a different type of qualification on the type.
- if (BaseType.getTag() == dwarf::DW_TAG_reference_type ||
- BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type)
- return getSizeInBits();
-
- if (BaseType.isDerivedType())
- return DIDerivedType(BaseType).getOriginalTypeSize();
-
- return BaseType.getSizeInBits();
-}
-
/// getObjCProperty - Return property node, if this ivar is associated with one.
MDNode *DIDerivedType::getObjCProperty() const {
return getNodeField(DbgNode, 10);
}
+MDString *DICompositeType::getIdentifier() const {
+ return cast_or_null<MDString>(getField(DbgNode, 14));
+}
+
+#ifndef NDEBUG
+static void VerifySubsetOf(const MDNode *LHS, const MDNode *RHS) {
+ for (unsigned i = 0; i != LHS->getNumOperands(); ++i) {
+ // Skip the 'empty' list (that's a single i32 0, rather than truly empty).
+ if (i == 0 && isa<ConstantInt>(LHS->getOperand(i)))
+ continue;
+ const MDNode *E = cast<MDNode>(LHS->getOperand(i));
+ bool found = false;
+ for (unsigned j = 0; !found && j != RHS->getNumOperands(); ++j)
+ found = E == RHS->getOperand(j);
+ assert(found && "Losing a member during member list replacement");
+ }
+}
+#endif
+
/// \brief Set the array of member DITypes.
void DICompositeType::setTypeArray(DIArray Elements, DIArray TParams) {
- assert((!TParams || DbgNode->getNumOperands() == 14) &&
+ assert((!TParams || DbgNode->getNumOperands() == 15) &&
"If you're setting the template parameters this should include a slot "
"for that!");
TrackingVH<MDNode> N(*this);
- N->replaceOperandWith(10, Elements);
+ if (Elements) {
+#ifndef NDEBUG
+ // Check that the new list of members contains all the old members as well.
+ if (const MDNode *El = cast_or_null<MDNode>(N->getOperand(10)))
+ VerifySubsetOf(El, Elements);
+#endif
+ N->replaceOperandWith(10, Elements);
+ }
if (TParams)
N->replaceOperandWith(13, TParams);
DbgNode = N;
}
+void DICompositeType::addMember(DIDescriptor D) {
+ SmallVector<llvm::Value *, 16> M;
+ DIArray OrigM = getTypeArray();
+ unsigned Elements = OrigM.getNumElements();
+ if (Elements == 1 && !OrigM.getElement(0))
+ Elements = 0;
+ M.reserve(Elements + 1);
+ for (unsigned i = 0; i != Elements; ++i)
+ M.push_back(OrigM.getElement(i));
+ M.push_back(D);
+ setTypeArray(DIArray(MDNode::get(DbgNode->getContext(), M)));
+}
+
+/// Generate a reference to this DIType. Uses the type identifier instead
+/// of the actual MDNode if possible, to help type uniquing.
+DIScopeRef DIScope::getRef() const {
+ if (!isCompositeType())
+ return DIScopeRef(*this);
+ DICompositeType DTy(DbgNode);
+ if (!DTy.getIdentifier())
+ return DIScopeRef(*this);
+ return DIScopeRef(DTy.getIdentifier());
+}
+
/// \brief Set the containing type.
void DICompositeType::setContainingType(DICompositeType ContainingType) {
TrackingVH<MDNode> N(*this);
- N->replaceOperandWith(12, ContainingType);
+ N->replaceOperandWith(12, ContainingType.getRef());
DbgNode = N;
}
@@ -674,7 +715,7 @@ bool DISubprogram::describes(const Function *F) {
}
unsigned DISubprogram::isOptimized() const {
- assert (DbgNode && "Invalid subprogram descriptor!");
+ assert(DbgNode && "Invalid subprogram descriptor!");
if (DbgNode->getNumOperands() == 15)
return getUnsignedField(14);
return 0;
@@ -694,25 +735,39 @@ Value *DITemplateValueParameter::getValue() const {
// If the current node has a parent scope then return that,
// else return an empty scope.
-DIScope DIScope::getContext() const {
+DIScopeRef DIScope::getContext() const {
if (isType())
return DIType(DbgNode).getContext();
if (isSubprogram())
- return DISubprogram(DbgNode).getContext();
+ return DIScopeRef(DISubprogram(DbgNode).getContext());
if (isLexicalBlock())
- return DILexicalBlock(DbgNode).getContext();
+ return DIScopeRef(DILexicalBlock(DbgNode).getContext());
if (isLexicalBlockFile())
- return DILexicalBlockFile(DbgNode).getContext();
+ return DIScopeRef(DILexicalBlockFile(DbgNode).getContext());
if (isNameSpace())
- return DINameSpace(DbgNode).getContext();
+ return DIScopeRef(DINameSpace(DbgNode).getContext());
assert((isFile() || isCompileUnit()) && "Unhandled type of scope.");
- return DIScope();
+ return DIScopeRef(NULL);
+}
+
+// If the scope node has a name, return that, else return an empty string.
+StringRef DIScope::getName() const {
+ if (isType())
+ return DIType(DbgNode).getName();
+ if (isSubprogram())
+ return DISubprogram(DbgNode).getName();
+ if (isNameSpace())
+ return DINameSpace(DbgNode).getName();
+ assert((isLexicalBlock() || isLexicalBlockFile() || isFile() ||
+ isCompileUnit()) &&
+ "Unhandled type of scope.");
+ return StringRef();
}
StringRef DIScope::getFilename() const {
@@ -748,7 +803,6 @@ DIArray DICompileUnit::getSubprograms() const {
return DIArray(getNodeField(DbgNode, 9));
}
-
DIArray DICompileUnit::getGlobalVariables() const {
if (!DbgNode || DbgNode->getNumOperands() < 13)
return DIArray();
@@ -813,8 +867,7 @@ DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope,
SmallVector<Value *, 16> Elts;
// Insert inlined scope as 7th element.
for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
- i == 7 ? Elts.push_back(InlinedScope) :
- Elts.push_back(DV->getOperand(i));
+ i == 7 ? Elts.push_back(InlinedScope) : Elts.push_back(DV->getOperand(i));
return DIVariable(MDNode::get(VMContext, Elts));
}
@@ -823,9 +876,8 @@ DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
SmallVector<Value *, 16> Elts;
// Insert inlined scope as 7th element.
for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
- i == 7 ?
- Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))):
- Elts.push_back(DV->getOperand(i));
+ i == 7 ? Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)))
+ : Elts.push_back(DV->getOperand(i));
return DIVariable(MDNode::get(VMContext, Elts));
}
@@ -849,23 +901,42 @@ DICompositeType llvm::getDICompositeType(DIType T) {
if (T.isCompositeType())
return DICompositeType(T);
- if (T.isDerivedType())
- return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom());
+ if (T.isDerivedType()) {
+ // This function is currently used by dragonegg and dragonegg does
+ // not generate identifier for types, so using an empty map to resolve
+ // DerivedFrom should be fine.
+ DITypeIdentifierMap EmptyMap;
+ return getDICompositeType(
+ DIDerivedType(T).getTypeDerivedFrom().resolve(EmptyMap));
+ }
return DICompositeType();
}
-/// isSubprogramContext - Return true if Context is either a subprogram
-/// or another context nested inside a subprogram.
-bool llvm::isSubprogramContext(const MDNode *Context) {
- if (!Context)
- return false;
- DIDescriptor D(Context);
- if (D.isSubprogram())
- return true;
- if (D.isType())
- return isSubprogramContext(DIType(Context).getContext());
- return false;
+/// Update DITypeIdentifierMap by going through retained types of each CU.
+DITypeIdentifierMap
+llvm::generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes) {
+ DITypeIdentifierMap Map;
+ for (unsigned CUi = 0, CUe = CU_Nodes->getNumOperands(); CUi != CUe; ++CUi) {
+ DICompileUnit CU(CU_Nodes->getOperand(CUi));
+ DIArray Retain = CU.getRetainedTypes();
+ for (unsigned Ti = 0, Te = Retain.getNumElements(); Ti != Te; ++Ti) {
+ if (!Retain.getElement(Ti).isCompositeType())
+ continue;
+ DICompositeType Ty(Retain.getElement(Ti));
+ if (MDString *TypeId = Ty.getIdentifier()) {
+ // Definition has priority over declaration.
+ // Try to insert (TypeId, Ty) to Map.
+ std::pair<DITypeIdentifierMap::iterator, bool> P =
+ Map.insert(std::make_pair(TypeId, Ty));
+ // If TypeId already exists in Map and this is a definition, replace
+ // whatever we had (declaration or definition) with the definition.
+ if (!P.second && !Ty.isForwardDecl())
+ P.first->second = Ty;
+ }
+ }
+ }
+ return Map;
}
//===----------------------------------------------------------------------===//
@@ -879,10 +950,21 @@ void DebugInfoFinder::reset() {
TYs.clear();
Scopes.clear();
NodesSeen.clear();
+ TypeIdentifierMap.clear();
+ TypeMapInitialized = false;
+}
+
+void DebugInfoFinder::InitializeTypeMap(const Module &M) {
+ if (!TypeMapInitialized)
+ if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
+ TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes);
+ TypeMapInitialized = true;
+ }
}
/// processModule - Process entire module and collect debug info.
void DebugInfoFinder::processModule(const Module &M) {
+ InitializeTypeMap(M);
if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
DICompileUnit CU(CU_Nodes->getOperand(i));
@@ -904,27 +986,38 @@ void DebugInfoFinder::processModule(const Module &M) {
DIArray RetainedTypes = CU.getRetainedTypes();
for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
processType(DIType(RetainedTypes.getElement(i)));
- // FIXME: We really shouldn't be bailing out after visiting just one CU
- return;
+ DIArray Imports = CU.getImportedEntities();
+ for (unsigned i = 0, e = Imports.getNumElements(); i != e; ++i) {
+ DIImportedEntity Import = DIImportedEntity(Imports.getElement(i));
+ DIDescriptor Entity = Import.getEntity();
+ if (Entity.isType())
+ processType(DIType(Entity));
+ else if (Entity.isSubprogram())
+ processSubprogram(DISubprogram(Entity));
+ else if (Entity.isNameSpace())
+ processScope(DINameSpace(Entity).getContext());
+ }
}
}
}
/// processLocation - Process DILocation.
-void DebugInfoFinder::processLocation(DILocation Loc) {
- if (!Loc) return;
+void DebugInfoFinder::processLocation(const Module &M, DILocation Loc) {
+ if (!Loc)
+ return;
+ InitializeTypeMap(M);
processScope(Loc.getScope());
- processLocation(Loc.getOrigLocation());
+ processLocation(M, Loc.getOrigLocation());
}
/// processType - Process DIType.
void DebugInfoFinder::processType(DIType DT) {
if (!addType(DT))
return;
- processScope(DT.getContext());
+ processScope(DT.getContext().resolve(TypeIdentifierMap));
if (DT.isCompositeType()) {
DICompositeType DCT(DT);
- processType(DCT.getTypeDerivedFrom());
+ processType(DCT.getTypeDerivedFrom().resolve(TypeIdentifierMap));
DIArray DA = DCT.getTypeArray();
for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
DIDescriptor D = DA.getElement(i);
@@ -935,7 +1028,7 @@ void DebugInfoFinder::processType(DIType DT) {
}
} else if (DT.isDerivedType()) {
DIDerivedType DDT(DT);
- processType(DDT.getTypeDerivedFrom());
+ processType(DDT.getTypeDerivedFrom().resolve(TypeIdentifierMap));
}
}
@@ -975,8 +1068,7 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
else if (Context.isLexicalBlockFile()) {
DILexicalBlockFile DBF = DILexicalBlockFile(Context);
return processLexicalBlock(DILexicalBlock(DBF.getScope()));
- }
- else
+ } else
return processSubprogram(DISubprogram(Context));
}
@@ -984,14 +1076,30 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
void DebugInfoFinder::processSubprogram(DISubprogram SP) {
if (!addSubprogram(SP))
return;
- processScope(SP.getContext());
+ processScope(SP.getContext().resolve(TypeIdentifierMap));
processType(SP.getType());
+ DIArray TParams = SP.getTemplateParams();
+ for (unsigned I = 0, E = TParams.getNumElements(); I != E; ++I) {
+ DIDescriptor Element = TParams.getElement(I);
+ if (Element.isTemplateTypeParameter()) {
+ DITemplateTypeParameter TType(Element);
+ processScope(TType.getContext().resolve(TypeIdentifierMap));
+ processType(TType.getType().resolve(TypeIdentifierMap));
+ } else if (Element.isTemplateValueParameter()) {
+ DITemplateValueParameter TVal(Element);
+ processScope(TVal.getContext().resolve(TypeIdentifierMap));
+ processType(TVal.getType().resolve(TypeIdentifierMap));
+ }
+ }
}
/// processDeclare - Process DbgDeclareInst.
-void DebugInfoFinder::processDeclare(const DbgDeclareInst *DDI) {
+void DebugInfoFinder::processDeclare(const Module &M,
+ const DbgDeclareInst *DDI) {
MDNode *N = dyn_cast<MDNode>(DDI->getVariable());
- if (!N) return;
+ if (!N)
+ return;
+ InitializeTypeMap(M);
DIDescriptor DV(N);
if (!DV.isVariable())
@@ -1003,9 +1111,11 @@ void DebugInfoFinder::processDeclare(const DbgDeclareInst *DDI) {
processType(DIVariable(N).getType());
}
-void DebugInfoFinder::processValue(const DbgValueInst *DVI) {
+void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) {
MDNode *N = dyn_cast<MDNode>(DVI->getVariable());
- if (!N) return;
+ if (!N)
+ return;
+ InitializeTypeMap(M);
DIDescriptor DV(N);
if (!DV.isVariable())
@@ -1083,12 +1193,14 @@ bool DebugInfoFinder::addScope(DIScope Scope) {
/// dump - Print descriptor to dbgs() with a newline.
void DIDescriptor::dump() const {
- print(dbgs()); dbgs() << '\n';
+ print(dbgs());
+ dbgs() << '\n';
}
/// print - Print descriptor.
void DIDescriptor::print(raw_ostream &OS) const {
- if (!DbgNode) return;
+ if (!DbgNode)
+ return;
if (const char *Tag = dwarf::TagString(getTag()))
OS << "[ " << Tag << " ]";
@@ -1150,7 +1262,8 @@ void DIEnumerator::printInternal(raw_ostream &OS) const {
}
void DIType::printInternal(raw_ostream &OS) const {
- if (!DbgNode) return;
+ if (!DbgNode)
+ return;
StringRef Res = getName();
if (!Res.empty())
@@ -1158,13 +1271,11 @@ void DIType::printInternal(raw_ostream &OS) const {
// TODO: Print context?
- OS << " [line " << getLineNumber()
- << ", size " << getSizeInBits()
- << ", align " << getAlignInBits()
- << ", offset " << getOffsetInBits();
+ OS << " [line " << getLineNumber() << ", size " << getSizeInBits()
+ << ", align " << getAlignInBits() << ", offset " << getOffsetInBits();
if (isBasicType())
if (const char *Enc =
- dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding()))
+ dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding()))
OS << ", enc " << Enc;
OS << "]";
@@ -1260,16 +1371,15 @@ void DIObjCProperty::printInternal(raw_ostream &OS) const {
if (!Name.empty())
OS << " [" << Name << ']';
- OS << " [line " << getLineNumber()
- << ", properties " << getUnsignedField(6) << ']';
+ OS << " [line " << getLineNumber() << ", properties " << getUnsignedField(6)
+ << ']';
}
static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
const LLVMContext &Ctx) {
- if (!DL.isUnknown()) { // Print source line info.
+ if (!DL.isUnknown()) { // Print source line info.
DIScope Scope(DL.getScope(Ctx));
- assert(Scope.isScope() &&
- "Scope of a DebugLoc should be a DIScope.");
+ assert(Scope.isScope() && "Scope of a DebugLoc should be a DIScope.");
// Omit the directory, because it's likely to be long and uninteresting.
CommentOS << Scope.getFilename();
CommentOS << ':' << DL.getLine();
@@ -1298,3 +1408,81 @@ void DIVariable::printExtendedName(raw_ostream &OS) const {
}
}
}
+
+/// Specialize constructor to make sure it has the correct type.
+template <> DIRef<DIScope>::DIRef(const Value *V) : Val(V) {
+ assert(isScopeRef(V) && "DIScopeRef should be a MDString or MDNode");
+}
+template <> DIRef<DIType>::DIRef(const Value *V) : Val(V) {
+ assert(isTypeRef(V) && "DITypeRef should be a MDString or MDNode");
+}
+
+/// Specialize getFieldAs to handle fields that are references to DIScopes.
+template <>
+DIScopeRef DIDescriptor::getFieldAs<DIScopeRef>(unsigned Elt) const {
+ return DIScopeRef(getField(DbgNode, Elt));
+}
+/// Specialize getFieldAs to handle fields that are references to DITypes.
+template <> DITypeRef DIDescriptor::getFieldAs<DITypeRef>(unsigned Elt) const {
+ return DITypeRef(getField(DbgNode, Elt));
+}
+
+/// Strip debug info in the module if it exists.
+/// To do this, we remove all calls to the debugger intrinsics and any named
+/// metadata for debugging. We also remove debug locations for instructions.
+/// Return true if module is modified.
+bool llvm::StripDebugInfo(Module &M) {
+
+ bool Changed = false;
+
+ // Remove all of the calls to the debugger intrinsics, and remove them from
+ // the module.
+ if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
+ while (!Declare->use_empty()) {
+ CallInst *CI = cast<CallInst>(Declare->use_back());
+ CI->eraseFromParent();
+ }
+ Declare->eraseFromParent();
+ Changed = true;
+ }
+
+ if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
+ while (!DbgVal->use_empty()) {
+ CallInst *CI = cast<CallInst>(DbgVal->use_back());
+ CI->eraseFromParent();
+ }
+ DbgVal->eraseFromParent();
+ Changed = true;
+ }
+
+ for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
+ NME = M.named_metadata_end(); NMI != NME;) {
+ NamedMDNode *NMD = NMI;
+ ++NMI;
+ if (NMD->getName().startswith("llvm.dbg.")) {
+ NMD->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
+ for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
+ ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+ ++BI) {
+ if (!BI->getDebugLoc().isUnknown()) {
+ Changed = true;
+ BI->setDebugLoc(DebugLoc());
+ }
+ }
+
+ return Changed;
+}
+
+/// Return Debug Info Metadata Version by checking module flags.
+unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) {
+ Value *Val = M.getModuleFlag("Debug Info Version");
+ if (!Val)
+ return 0;
+ return cast<ConstantInt>(Val)->getZExtValue();
+}
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index bf9d949..e8a2402 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -276,6 +276,9 @@ void Function::dropAllReferences() {
// blockaddresses, but BasicBlock's destructor takes care of those.
while (!BasicBlocks.empty())
BasicBlocks.begin()->eraseFromParent();
+
+ // Prefix data is stored in a side table.
+ setPrefixData(0);
}
void Function::addAttribute(unsigned i, Attribute::AttrKind attr) {
@@ -351,6 +354,10 @@ void Function::copyAttributesFrom(const GlobalValue *Src) {
setGC(SrcF->getGC());
else
clearGC();
+ if (SrcF->hasPrefixData())
+ setPrefixData(SrcF->getPrefixData());
+ else
+ setPrefixData(0);
}
/// getIntrinsicID - This method returns the ID number of the specified
@@ -446,7 +453,9 @@ enum IIT_Info {
IIT_STRUCT5 = 22,
IIT_EXTEND_VEC_ARG = 23,
IIT_TRUNC_VEC_ARG = 24,
- IIT_ANYPTR = 25
+ IIT_ANYPTR = 25,
+ IIT_V1 = 26,
+ IIT_VARARG = 27
};
@@ -460,6 +469,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
case IIT_Done:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Void, 0));
return;
+ case IIT_VARARG:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::VarArg, 0));
+ return;
case IIT_MMX:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::MMX, 0));
return;
@@ -490,6 +502,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
case IIT_I64:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64));
return;
+ case IIT_V1:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
case IIT_V2:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 2));
DecodeIITType(NextElt, Infos, OutputTable);
@@ -601,6 +617,7 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
switch (D.Kind) {
case IITDescriptor::Void: return Type::getVoidTy(Context);
+ case IITDescriptor::VarArg: return Type::getVoidTy(Context);
case IITDescriptor::MMX: return Type::getX86_MMXTy(Context);
case IITDescriptor::Metadata: return Type::getMetadataTy(Context);
case IITDescriptor::Half: return Type::getHalfTy(Context);
@@ -720,3 +737,32 @@ bool Function::callsFunctionThatReturnsTwice() const {
return false;
}
+
+Constant *Function::getPrefixData() const {
+ assert(hasPrefixData());
+ const LLVMContextImpl::PrefixDataMapTy &PDMap =
+ getContext().pImpl->PrefixDataMap;
+ assert(PDMap.find(this) != PDMap.end());
+ return cast<Constant>(PDMap.find(this)->second->getReturnValue());
+}
+
+void Function::setPrefixData(Constant *PrefixData) {
+ if (!PrefixData && !hasPrefixData())
+ return;
+
+ unsigned SCData = getSubclassDataFromValue();
+ LLVMContextImpl::PrefixDataMapTy &PDMap = getContext().pImpl->PrefixDataMap;
+ ReturnInst *&PDHolder = PDMap[this];
+ if (PrefixData) {
+ if (PDHolder)
+ PDHolder->setOperand(0, PrefixData);
+ else
+ PDHolder = ReturnInst::Create(getContext(), PrefixData);
+ SCData |= 2;
+ } else {
+ delete PDHolder;
+ PDMap.erase(this);
+ SCData &= ~2;
+ }
+ setValueSubclassData(SCData);
+}
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index e9baa5c..f0f8c7d 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -7,14 +7,16 @@
//
//===----------------------------------------------------------------------===//
//
-// GCOV implements the interface to read and write coverage files that use
+// GCOV implements the interface to read and write coverage files that use
// 'gcov' format.
//
//===----------------------------------------------------------------------===//
+#include "llvm/Support/Debug.h"
#include "llvm/Support/GCOV.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/system_error.h"
using namespace llvm;
@@ -43,25 +45,45 @@ bool GCOVFile::read(GCOVBuffer &Buffer) {
if (Format == GCOV::InvalidGCOV)
return false;
- unsigned i = 0;
- while (1) {
- GCOVFunction *GFun = NULL;
- if (isGCDAFile(Format)) {
- // Use existing function while reading .gcda file.
- assert(i < Functions.size() && ".gcda data does not match .gcno data");
- GFun = Functions[i];
- } else if (isGCNOFile(Format)){
- GFun = new GCOVFunction();
+ if (isGCNOFile(Format)) {
+ while (true) {
+ if (!Buffer.readFunctionTag()) break;
+ GCOVFunction *GFun = new GCOVFunction();
+ if (!GFun->read(Buffer, Format))
+ return false;
Functions.push_back(GFun);
}
- if (!GFun || !GFun->read(Buffer, Format))
- break;
- ++i;
}
+ else if (isGCDAFile(Format)) {
+ for (size_t i = 0, e = Functions.size(); i < e; ++i) {
+ if (!Buffer.readFunctionTag()) {
+ errs() << "Unexpected number of functions.\n";
+ return false;
+ }
+ if (!Functions[i]->read(Buffer, Format))
+ return false;
+ }
+ if (Buffer.readObjectTag()) {
+ uint32_t Length;
+ uint32_t Dummy;
+ if (!Buffer.readInt(Length)) return false;
+ if (!Buffer.readInt(Dummy)) return false; // checksum
+ if (!Buffer.readInt(Dummy)) return false; // num
+ if (!Buffer.readInt(RunCount)) return false;;
+ Buffer.advanceCursor(Length-3);
+ }
+ while (Buffer.readProgramTag()) {
+ uint32_t Length;
+ if (!Buffer.readInt(Length)) return false;
+ Buffer.advanceCursor(Length);
+ ++ProgramCount;
+ }
+ }
+
return true;
}
-/// dump - Dump GCOVFile content on standard out for debugging purposes.
+/// dump - Dump GCOVFile content to dbgs() for debugging purposes.
void GCOVFile::dump() {
for (SmallVectorImpl<GCOVFunction *>::iterator I = Functions.begin(),
E = Functions.end(); I != E; ++I)
@@ -72,9 +94,10 @@ void GCOVFile::dump() {
/// reading .gcno and .gcda files.
void GCOVFile::collectLineCounts(FileInfo &FI) {
for (SmallVectorImpl<GCOVFunction *>::iterator I = Functions.begin(),
- E = Functions.end(); I != E; ++I)
+ E = Functions.end(); I != E; ++I)
(*I)->collectLineCounts(FI);
- FI.print();
+ FI.setRunCount(RunCount);
+ FI.setProgramCount(ProgramCount);
}
//===----------------------------------------------------------------------===//
@@ -85,76 +108,121 @@ GCOVFunction::~GCOVFunction() {
DeleteContainerPointers(Blocks);
}
-/// read - Read a aunction from the buffer. Return false if buffer cursor
+/// read - Read a function from the buffer. Return false if buffer cursor
/// does not point to a function tag.
bool GCOVFunction::read(GCOVBuffer &Buff, GCOV::GCOVFormat Format) {
- if (!Buff.readFunctionTag())
- return false;
-
- Buff.readInt(); // Function header length
- Ident = Buff.readInt();
- Buff.readInt(); // Checksum #1
+ uint32_t Dummy;
+ if (!Buff.readInt(Dummy)) return false; // Function header length
+ if (!Buff.readInt(Ident)) return false;
+ if (!Buff.readInt(Dummy)) return false; // Checksum #1
if (Format != GCOV::GCNO_402 && Format != GCOV::GCDA_402)
- Buff.readInt(); // Checksum #2
+ if (!Buff.readInt(Dummy)) return false; // Checksum #2
+
+ if (!Buff.readString(Name)) return false;
- Name = Buff.readString();
if (Format == GCOV::GCNO_402 || Format == GCOV::GCNO_404)
- Filename = Buff.readString();
+ if (!Buff.readString(Filename)) return false;
if (Format == GCOV::GCDA_402 || Format == GCOV::GCDA_404) {
- Buff.readArcTag();
- uint32_t Count = Buff.readInt() / 2;
- for (unsigned i = 0, e = Count; i != e; ++i) {
- Blocks[i]->addCount(Buff.readInt64());
+ if (!Buff.readArcTag()) {
+ errs() << "Arc tag not found.\n";
+ return false;
+ }
+ uint32_t Count;
+ if (!Buff.readInt(Count)) return false;
+ Count /= 2;
+
+ // This for loop adds the counts for each block. A second nested loop is
+ // required to combine the edge counts that are contained in the GCDA file.
+ for (uint32_t Line = 0; Count > 0; ++Line) {
+ if (Line >= Blocks.size()) {
+ errs() << "Unexpected number of edges.\n";
+ return false;
+ }
+ GCOVBlock &Block = *Blocks[Line];
+ for (size_t Edge = 0, End = Block.getNumEdges(); Edge < End; ++Edge) {
+ if (Count == 0) {
+ errs() << "Unexpected number of edges.\n";
+ return false;
+ }
+ uint64_t ArcCount;
+ if (!Buff.readInt64(ArcCount)) return false;
+ Block.addCount(ArcCount);
+ --Count;
+ }
}
return true;
}
- LineNumber = Buff.readInt();
+ if (!Buff.readInt(LineNumber)) return false;
// read blocks.
- bool BlockTagFound = Buff.readBlockTag();
- (void)BlockTagFound;
- assert(BlockTagFound && "Block Tag not found!");
- uint32_t BlockCount = Buff.readInt();
- for (int i = 0, e = BlockCount; i != e; ++i) {
- Buff.readInt(); // Block flags;
- Blocks.push_back(new GCOVBlock(i));
+ if (!Buff.readBlockTag()) {
+ errs() << "Block tag not found.\n";
+ return false;
+ }
+ uint32_t BlockCount;
+ if (!Buff.readInt(BlockCount)) return false;
+ for (uint32_t i = 0, e = BlockCount; i != e; ++i) {
+ if (!Buff.readInt(Dummy)) return false; // Block flags;
+ Blocks.push_back(new GCOVBlock(*this, i));
}
// read edges.
while (Buff.readEdgeTag()) {
- uint32_t EdgeCount = (Buff.readInt() - 1) / 2;
- uint32_t BlockNo = Buff.readInt();
- assert(BlockNo < BlockCount && "Unexpected Block number!");
- for (int i = 0, e = EdgeCount; i != e; ++i) {
- Blocks[BlockNo]->addEdge(Buff.readInt());
- Buff.readInt(); // Edge flag
+ uint32_t EdgeCount;
+ if (!Buff.readInt(EdgeCount)) return false;
+ EdgeCount = (EdgeCount - 1) / 2;
+ uint32_t BlockNo;
+ if (!Buff.readInt(BlockNo)) return false;
+ if (BlockNo >= BlockCount) {
+ errs() << "Unexpected block number.\n";
+ return false;
+ }
+ for (uint32_t i = 0, e = EdgeCount; i != e; ++i) {
+ uint32_t Dst;
+ if (!Buff.readInt(Dst)) return false;
+ Blocks[BlockNo]->addEdge(Dst);
+ if (!Buff.readInt(Dummy)) return false; // Edge flag
}
}
// read line table.
while (Buff.readLineTag()) {
- uint32_t LineTableLength = Buff.readInt();
- uint32_t Size = Buff.getCursor() + LineTableLength*4;
- uint32_t BlockNo = Buff.readInt();
- assert(BlockNo < BlockCount && "Unexpected Block number!");
+ uint32_t LineTableLength;
+ if (!Buff.readInt(LineTableLength)) return false;
+ uint32_t EndPos = Buff.getCursor() + LineTableLength*4;
+ uint32_t BlockNo;
+ if (!Buff.readInt(BlockNo)) return false;
+ if (BlockNo >= BlockCount) {
+ errs() << "Unexpected block number.\n";
+ return false;
+ }
GCOVBlock *Block = Blocks[BlockNo];
- Buff.readInt(); // flag
- while (Buff.getCursor() != (Size - 4)) {
- StringRef Filename = Buff.readString();
- if (Buff.getCursor() == (Size - 4)) break;
- while (uint32_t L = Buff.readInt())
- Block->addLine(Filename, L);
+ if (!Buff.readInt(Dummy)) return false; // flag
+ while (Buff.getCursor() != (EndPos - 4)) {
+ StringRef F;
+ if (!Buff.readString(F)) return false;
+ if (F != Filename) {
+ errs() << "Multiple sources for a single basic block.\n";
+ return false;
+ }
+ if (Buff.getCursor() == (EndPos - 4)) break;
+ while (true) {
+ uint32_t Line;
+ if (!Buff.readInt(Line)) return false;
+ if (!Line) break;
+ Block->addLine(Line);
+ }
}
- Buff.readInt(); // flag
+ if (!Buff.readInt(Dummy)) return false; // flag
}
return true;
}
-/// dump - Dump GCOVFunction content on standard out for debugging purposes.
+/// dump - Dump GCOVFunction content to dbgs() for debugging purposes.
void GCOVFunction::dump() {
- outs() << "===== " << Name << " @ " << Filename << ":" << LineNumber << "\n";
+ dbgs() << "===== " << Name << " @ " << Filename << ":" << LineNumber << "\n";
for (SmallVectorImpl<GCOVBlock *>::iterator I = Blocks.begin(),
E = Blocks.end(); I != E; ++I)
(*I)->dump();
@@ -174,110 +242,73 @@ void GCOVFunction::collectLineCounts(FileInfo &FI) {
/// ~GCOVBlock - Delete GCOVBlock and its content.
GCOVBlock::~GCOVBlock() {
Edges.clear();
- DeleteContainerSeconds(Lines);
-}
-
-void GCOVBlock::addLine(StringRef Filename, uint32_t LineNo) {
- GCOVLines *&LinesForFile = Lines[Filename];
- if (!LinesForFile)
- LinesForFile = new GCOVLines();
- LinesForFile->add(LineNo);
+ Lines.clear();
}
/// collectLineCounts - Collect line counts. This must be used after
/// reading .gcno and .gcda files.
void GCOVBlock::collectLineCounts(FileInfo &FI) {
- for (StringMap<GCOVLines *>::iterator I = Lines.begin(),
+ for (SmallVectorImpl<uint32_t>::iterator I = Lines.begin(),
E = Lines.end(); I != E; ++I)
- I->second->collectLineCounts(FI, I->first(), Counter);
+ FI.addLineCount(Parent.getFilename(), *I, Counter);
}
-/// dump - Dump GCOVBlock content on standard out for debugging purposes.
+/// dump - Dump GCOVBlock content to dbgs() for debugging purposes.
void GCOVBlock::dump() {
- outs() << "Block : " << Number << " Counter : " << Counter << "\n";
+ dbgs() << "Block : " << Number << " Counter : " << Counter << "\n";
if (!Edges.empty()) {
- outs() << "\tEdges : ";
+ dbgs() << "\tEdges : ";
for (SmallVectorImpl<uint32_t>::iterator I = Edges.begin(), E = Edges.end();
I != E; ++I)
- outs() << (*I) << ",";
- outs() << "\n";
+ dbgs() << (*I) << ",";
+ dbgs() << "\n";
}
if (!Lines.empty()) {
- outs() << "\tLines : ";
- for (StringMap<GCOVLines *>::iterator LI = Lines.begin(),
- LE = Lines.end(); LI != LE; ++LI) {
- outs() << LI->first() << " -> ";
- LI->second->dump();
- outs() << "\n";
- }
+ dbgs() << "\tLines : ";
+ for (SmallVectorImpl<uint32_t>::iterator I = Lines.begin(),
+ E = Lines.end(); I != E; ++I)
+ dbgs() << (*I) << ",";
+ dbgs() << "\n";
}
}
//===----------------------------------------------------------------------===//
-// GCOVLines implementation.
-
-/// collectLineCounts - Collect line counts. This must be used after
-/// reading .gcno and .gcda files.
-void GCOVLines::collectLineCounts(FileInfo &FI, StringRef Filename,
- uint32_t Count) {
- for (SmallVectorImpl<uint32_t>::iterator I = Lines.begin(),
- E = Lines.end(); I != E; ++I)
- FI.addLineCount(Filename, *I, Count);
-}
-
-/// dump - Dump GCOVLines content on standard out for debugging purposes.
-void GCOVLines::dump() {
- for (SmallVectorImpl<uint32_t>::iterator I = Lines.begin(),
- E = Lines.end(); I != E; ++I)
- outs() << (*I) << ",";
-}
-
-//===----------------------------------------------------------------------===//
// FileInfo implementation.
-/// addLineCount - Add line count for the given line number in a file.
-void FileInfo::addLineCount(StringRef Filename, uint32_t Line, uint32_t Count) {
- if (LineInfo.find(Filename) == LineInfo.end()) {
- OwningPtr<MemoryBuffer> Buff;
- if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
- errs() << Filename << ": " << ec.message() << "\n";
- return;
- }
- StringRef AllLines = Buff.take()->getBuffer();
- LineCounts L(AllLines.count('\n')+2);
- L[Line-1] = Count;
- LineInfo[Filename] = L;
- return;
- }
- LineCounts &L = LineInfo[Filename];
- L[Line-1] = Count;
-}
-
/// print - Print source files with collected line count information.
-void FileInfo::print() {
+void FileInfo::print(raw_fd_ostream &OS, StringRef gcnoFile,
+ StringRef gcdaFile) {
for (StringMap<LineCounts>::iterator I = LineInfo.begin(), E = LineInfo.end();
I != E; ++I) {
StringRef Filename = I->first();
- outs() << Filename << "\n";
+ OS << " -: 0:Source:" << Filename << "\n";
+ OS << " -: 0:Graph:" << gcnoFile << "\n";
+ OS << " -: 0:Data:" << gcdaFile << "\n";
+ OS << " -: 0:Runs:" << RunCount << "\n";
+ OS << " -: 0:Programs:" << ProgramCount << "\n";
LineCounts &L = LineInfo[Filename];
OwningPtr<MemoryBuffer> Buff;
if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
errs() << Filename << ": " << ec.message() << "\n";
return;
}
- StringRef AllLines = Buff.take()->getBuffer();
- for (unsigned i = 0, e = L.size(); i != e; ++i) {
- if (L[i])
- outs() << L[i] << ":\t";
- else
- outs() << " :\t";
+ StringRef AllLines = Buff->getBuffer();
+ uint32_t i = 0;
+ while (!AllLines.empty()) {
+ if (L.find(i) != L.end()) {
+ if (L[i] == 0)
+ OS << " #####:";
+ else
+ OS << format("%9" PRIu64 ":", L[i]);
+ } else {
+ OS << " -:";
+ }
std::pair<StringRef, StringRef> P = AllLines.split('\n');
if (AllLines != P.first)
- outs() << P.first;
- outs() << "\n";
+ OS << format("%5u:", i+1) << P.first;
+ OS << "\n";
AllLines = P.second;
+ ++i;
}
}
}
-
-
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index 6d547f3..da3b02a 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -229,14 +229,14 @@ void GlobalAlias::setAliasee(Constant *Aliasee) {
setOperand(0, Aliasee);
}
-const GlobalValue *GlobalAlias::getAliasedGlobal() const {
- const Constant *C = getAliasee();
+GlobalValue *GlobalAlias::getAliasedGlobal() {
+ Constant *C = getAliasee();
if (C == 0) return 0;
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
return GV;
- const ConstantExpr *CE = cast<ConstantExpr>(C);
+ ConstantExpr *CE = cast<ConstantExpr>(C);
assert((CE->getOpcode() == Instruction::BitCast ||
CE->getOpcode() == Instruction::GetElementPtr) &&
"Unsupported aliasee");
@@ -244,18 +244,18 @@ const GlobalValue *GlobalAlias::getAliasedGlobal() const {
return cast<GlobalValue>(CE->getOperand(0));
}
-const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
- SmallPtrSet<const GlobalValue*, 3> Visited;
+GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) {
+ SmallPtrSet<GlobalValue*, 3> Visited;
// Check if we need to stop early.
if (stopOnWeak && mayBeOverridden())
return this;
- const GlobalValue *GV = getAliasedGlobal();
+ GlobalValue *GV = getAliasedGlobal();
Visited.insert(GV);
// Iterate over aliasing chain, stopping on weak alias if necessary.
- while (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) {
+ while (GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) {
if (stopOnWeak && GA->mayBeOverridden())
break;
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index 2b5a0b3..a7773c4 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -223,18 +223,19 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
case GetElementPtr: return "getelementptr";
// Convert instructions...
- case Trunc: return "trunc";
- case ZExt: return "zext";
- case SExt: return "sext";
- case FPTrunc: return "fptrunc";
- case FPExt: return "fpext";
- case FPToUI: return "fptoui";
- case FPToSI: return "fptosi";
- case UIToFP: return "uitofp";
- case SIToFP: return "sitofp";
- case IntToPtr: return "inttoptr";
- case PtrToInt: return "ptrtoint";
- case BitCast: return "bitcast";
+ case Trunc: return "trunc";
+ case ZExt: return "zext";
+ case SExt: return "sext";
+ case FPTrunc: return "fptrunc";
+ case FPExt: return "fpext";
+ case FPToUI: return "fptoui";
+ case FPToSI: return "fptosi";
+ case UIToFP: return "uitofp";
+ case SIToFP: return "sitofp";
+ case IntToPtr: return "inttoptr";
+ case PtrToInt: return "ptrtoint";
+ case BitCast: return "bitcast";
+ case AddrSpaceCast: return "addrspacecast";
// Other instructions...
case ICmp: return "icmp";
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 205cb43..8a6b77b 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -2095,7 +2095,9 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode,
case Instruction::SIToFP:
case Instruction::FPToUI:
case Instruction::FPToSI:
- return false; // These always modify bits
+ case Instruction::AddrSpaceCast:
+ // TODO: Target informations may give a more accurate answer here.
+ return false;
case Instruction::BitCast:
return true; // BitCast never modifies bits.
case Instruction::PtrToInt:
@@ -2137,44 +2139,46 @@ unsigned CastInst::isEliminableCastPair(
// ZEXT < Integral Unsigned Integer Any
// SEXT < Integral Signed Integer Any
// FPTOUI n/a FloatPt n/a Integral Unsigned
- // FPTOSI n/a FloatPt n/a Integral Signed
- // UITOFP n/a Integral Unsigned FloatPt n/a
- // SITOFP n/a Integral Signed FloatPt n/a
- // FPTRUNC > FloatPt n/a FloatPt n/a
- // FPEXT < FloatPt n/a FloatPt n/a
+ // FPTOSI n/a FloatPt n/a Integral Signed
+ // UITOFP n/a Integral Unsigned FloatPt n/a
+ // SITOFP n/a Integral Signed FloatPt n/a
+ // FPTRUNC > FloatPt n/a FloatPt n/a
+ // FPEXT < FloatPt n/a FloatPt n/a
// PTRTOINT n/a Pointer n/a Integral Unsigned
// INTTOPTR n/a Integral Unsigned Pointer n/a
- // BITCAST = FirstClass n/a FirstClass n/a
+ // BITCAST = FirstClass n/a FirstClass n/a
+ // ADDRSPCST n/a Pointer n/a Pointer n/a
//
// NOTE: some transforms are safe, but we consider them to be non-profitable.
// For example, we could merge "fptoui double to i32" + "zext i32 to i64",
// into "fptoui double to i64", but this loses information about the range
- // of the produced value (we no longer know the top-part is all zeros).
+ // of the produced value (we no longer know the top-part is all zeros).
// Further this conversion is often much more expensive for typical hardware,
- // and causes issues when building libgcc. We disallow fptosi+sext for the
+ // and causes issues when building libgcc. We disallow fptosi+sext for the
// same reason.
- const unsigned numCastOps =
+ const unsigned numCastOps =
Instruction::CastOpsEnd - Instruction::CastOpsBegin;
static const uint8_t CastResults[numCastOps][numCastOps] = {
- // T F F U S F F P I B -+
- // R Z S P P I I T P 2 N T |
- // U E E 2 2 2 2 R E I T C +- secondOp
- // N X X U S F F N X N 2 V |
- // C T T I I P P C T T P T -+
- { 1, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // Trunc -+
- { 8, 1, 9,99,99, 2, 0,99,99,99, 2, 3 }, // ZExt |
- { 8, 0, 1,99,99, 0, 2,99,99,99, 0, 3 }, // SExt |
- { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToUI |
- { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToSI |
- { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // UIToFP +- firstOp
- { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // SIToFP |
- { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4 }, // FPTrunc |
- { 99,99,99, 2, 2,99,99,10, 2,99,99, 4 }, // FPExt |
- { 1, 0, 0,99,99, 0, 0,99,99,99, 7, 3 }, // PtrToInt |
- { 99,99,99,99,99,99,99,99,99,13,99,12 }, // IntToPtr |
- { 5, 5, 5, 6, 6, 5, 5, 6, 6,11, 5, 1 }, // BitCast -+
+ // T F F U S F F P I B A -+
+ // R Z S P P I I T P 2 N T S |
+ // U E E 2 2 2 2 R E I T C C +- secondOp
+ // N X X U S F F N X N 2 V V |
+ // C T T I I P P C T T P T T -+
+ { 1, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // Trunc -+
+ { 8, 1, 9,99,99, 2, 0,99,99,99, 2, 3, 0}, // ZExt |
+ { 8, 0, 1,99,99, 0, 2,99,99,99, 0, 3, 0}, // SExt |
+ { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToUI |
+ { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToSI |
+ { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // UIToFP +- firstOp
+ { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // SIToFP |
+ { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4, 0}, // FPTrunc |
+ { 99,99,99, 2, 2,99,99,10, 2,99,99, 4, 0}, // FPExt |
+ { 1, 0, 0,99,99, 0, 0,99,99,99, 7, 3, 0}, // PtrToInt |
+ { 99,99,99,99,99,99,99,99,99,11,99,15, 0}, // IntToPtr |
+ { 5, 5, 5, 6, 6, 5, 5, 6, 6,16, 5, 1,14}, // BitCast |
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+
};
-
+
// If either of the casts are a bitcast from scalar to vector, disallow the
// merging. However, bitcast of A->B->A are allowed.
bool isFirstBitcast = (firstOp == Instruction::BitCast);
@@ -2191,47 +2195,50 @@ unsigned CastInst::isEliminableCastPair(
[secondOp-Instruction::CastOpsBegin];
switch (ElimCase) {
case 0:
- // categorically disallowed
+ // Categorically disallowed.
return 0;
case 1:
- // allowed, use first cast's opcode
+ // Allowed, use first cast's opcode.
return firstOp;
case 2:
- // allowed, use second cast's opcode
+ // Allowed, use second cast's opcode.
return secondOp;
case 3:
- // no-op cast in second op implies firstOp as long as the DestTy
+ // No-op cast in second op implies firstOp as long as the DestTy
// is integer and we are not converting between a vector and a
// non vector type.
if (!SrcTy->isVectorTy() && DstTy->isIntegerTy())
return firstOp;
return 0;
case 4:
- // no-op cast in second op implies firstOp as long as the DestTy
+ // No-op cast in second op implies firstOp as long as the DestTy
// is floating point.
if (DstTy->isFloatingPointTy())
return firstOp;
return 0;
case 5:
- // no-op cast in first op implies secondOp as long as the SrcTy
+ // No-op cast in first op implies secondOp as long as the SrcTy
// is an integer.
if (SrcTy->isIntegerTy())
return secondOp;
return 0;
case 6:
- // no-op cast in first op implies secondOp as long as the SrcTy
+ // No-op cast in first op implies secondOp as long as the SrcTy
// is a floating point.
if (SrcTy->isFloatingPointTy())
return secondOp;
return 0;
case 7: {
+ // Cannot simplify if address spaces are different!
+ if (SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace())
+ return 0;
+
unsigned MidSize = MidTy->getScalarSizeInBits();
- // Check the address spaces first. If we know they are in the same address
- // space, the pointer sizes must be the same so we can still fold this
- // without knowing the actual sizes as long we know that the intermediate
- // pointer is the largest possible pointer size.
- if (MidSize == 64 &&
- SrcTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace())
+ // We can still fold this without knowing the actual sizes as long we
+ // know that the intermediate pointer is the largest possible
+ // pointer size.
+ // FIXME: Is this always true?
+ if (MidSize == 64)
return Instruction::BitCast;
// ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size.
@@ -2254,7 +2261,8 @@ unsigned CastInst::isEliminableCastPair(
return firstOp;
return secondOp;
}
- case 9: // zext, sext -> zext, because sext can't sign extend after zext
+ case 9:
+ // zext, sext -> zext, because sext can't sign extend after zext
return Instruction::ZExt;
case 10:
// fpext followed by ftrunc is allowed if the bit size returned to is
@@ -2263,46 +2271,6 @@ unsigned CastInst::isEliminableCastPair(
return Instruction::BitCast;
return 0; // If the types are not the same we can't eliminate it.
case 11: {
- // bitcast followed by ptrtoint is allowed as long as the bitcast is a
- // pointer to pointer cast, and the pointers are the same size.
- PointerType *SrcPtrTy = dyn_cast<PointerType>(SrcTy);
- PointerType *MidPtrTy = dyn_cast<PointerType>(MidTy);
- if (!SrcPtrTy || !MidPtrTy)
- return 0;
-
- // If the address spaces are the same, we know they are the same size
- // without size information
- if (SrcPtrTy->getAddressSpace() == MidPtrTy->getAddressSpace())
- return secondOp;
-
- if (!SrcIntPtrTy || !MidIntPtrTy)
- return 0;
-
- if (SrcIntPtrTy->getScalarSizeInBits() ==
- MidIntPtrTy->getScalarSizeInBits())
- return secondOp;
-
- return 0;
- }
- case 12: {
- // inttoptr, bitcast -> inttoptr if bitcast is a ptr to ptr cast
- // and the ptrs are to address spaces of the same size
- PointerType *MidPtrTy = dyn_cast<PointerType>(MidTy);
- PointerType *DstPtrTy = dyn_cast<PointerType>(DstTy);
- if (!MidPtrTy || !DstPtrTy)
- return 0;
-
- if (MidPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace())
- return firstOp;
-
- if (MidIntPtrTy &&
- DstIntPtrTy &&
- MidIntPtrTy->getScalarSizeInBits() ==
- DstIntPtrTy->getScalarSizeInBits())
- return firstOp;
- return 0;
- }
- case 13: {
// inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
if (!MidIntPtrTy)
return 0;
@@ -2313,8 +2281,65 @@ unsigned CastInst::isEliminableCastPair(
return Instruction::BitCast;
return 0;
}
+ case 12: {
+ // addrspacecast, addrspacecast -> bitcast, if SrcAS == DstAS
+ // addrspacecast, addrspacecast -> addrspacecast, if SrcAS != DstAS
+ if (SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace())
+ return Instruction::AddrSpaceCast;
+ return Instruction::BitCast;
+ }
+ case 13:
+ // FIXME: this state can be merged with (1), but the following assert
+ // is useful to check the correcteness of the sequence due to semantic
+ // change of bitcast.
+ assert(
+ SrcTy->isPtrOrPtrVectorTy() &&
+ MidTy->isPtrOrPtrVectorTy() &&
+ DstTy->isPtrOrPtrVectorTy() &&
+ SrcTy->getPointerAddressSpace() != MidTy->getPointerAddressSpace() &&
+ MidTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace() &&
+ "Illegal addrspacecast, bitcast sequence!");
+ // Allowed, use first cast's opcode
+ return firstOp;
+ case 14:
+ // FIXME: this state can be merged with (2), but the following assert
+ // is useful to check the correcteness of the sequence due to semantic
+ // change of bitcast.
+ assert(
+ SrcTy->isPtrOrPtrVectorTy() &&
+ MidTy->isPtrOrPtrVectorTy() &&
+ DstTy->isPtrOrPtrVectorTy() &&
+ SrcTy->getPointerAddressSpace() == MidTy->getPointerAddressSpace() &&
+ MidTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace() &&
+ "Illegal bitcast, addrspacecast sequence!");
+ // Allowed, use second cast's opcode
+ return secondOp;
+ case 15:
+ // FIXME: this state can be merged with (1), but the following assert
+ // is useful to check the correcteness of the sequence due to semantic
+ // change of bitcast.
+ assert(
+ SrcTy->isIntOrIntVectorTy() &&
+ MidTy->isPtrOrPtrVectorTy() &&
+ DstTy->isPtrOrPtrVectorTy() &&
+ MidTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace() &&
+ "Illegal inttoptr, bitcast sequence!");
+ // Allowed, use first cast's opcode
+ return firstOp;
+ case 16:
+ // FIXME: this state can be merged with (2), but the following assert
+ // is useful to check the correcteness of the sequence due to semantic
+ // change of bitcast.
+ assert(
+ SrcTy->isPtrOrPtrVectorTy() &&
+ MidTy->isPtrOrPtrVectorTy() &&
+ DstTy->isIntOrIntVectorTy() &&
+ SrcTy->getPointerAddressSpace() == MidTy->getPointerAddressSpace() &&
+ "Illegal bitcast, ptrtoint sequence!");
+ // Allowed, use second cast's opcode
+ return secondOp;
case 99:
- // cast combination can't happen (error in input). This is for all cases
+ // Cast combination can't happen (error in input). This is for all cases
// where the MidTy is not the same for the two cast instructions.
llvm_unreachable("Invalid Cast Combination");
default:
@@ -2327,19 +2352,20 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
assert(castIsValid(op, S, Ty) && "Invalid cast!");
// Construct and return the appropriate CastInst subclass
switch (op) {
- case Trunc: return new TruncInst (S, Ty, Name, InsertBefore);
- case ZExt: return new ZExtInst (S, Ty, Name, InsertBefore);
- case SExt: return new SExtInst (S, Ty, Name, InsertBefore);
- case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertBefore);
- case FPExt: return new FPExtInst (S, Ty, Name, InsertBefore);
- case UIToFP: return new UIToFPInst (S, Ty, Name, InsertBefore);
- case SIToFP: return new SIToFPInst (S, Ty, Name, InsertBefore);
- case FPToUI: return new FPToUIInst (S, Ty, Name, InsertBefore);
- case FPToSI: return new FPToSIInst (S, Ty, Name, InsertBefore);
- case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore);
- case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
- case BitCast: return new BitCastInst (S, Ty, Name, InsertBefore);
- default: llvm_unreachable("Invalid opcode provided");
+ case Trunc: return new TruncInst (S, Ty, Name, InsertBefore);
+ case ZExt: return new ZExtInst (S, Ty, Name, InsertBefore);
+ case SExt: return new SExtInst (S, Ty, Name, InsertBefore);
+ case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertBefore);
+ case FPExt: return new FPExtInst (S, Ty, Name, InsertBefore);
+ case UIToFP: return new UIToFPInst (S, Ty, Name, InsertBefore);
+ case SIToFP: return new SIToFPInst (S, Ty, Name, InsertBefore);
+ case FPToUI: return new FPToUIInst (S, Ty, Name, InsertBefore);
+ case FPToSI: return new FPToSIInst (S, Ty, Name, InsertBefore);
+ case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore);
+ case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
+ case BitCast: return new BitCastInst (S, Ty, Name, InsertBefore);
+ case AddrSpaceCast: return new AddrSpaceCastInst (S, Ty, Name, InsertBefore);
+ default: llvm_unreachable("Invalid opcode provided");
}
}
@@ -2348,19 +2374,20 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
assert(castIsValid(op, S, Ty) && "Invalid cast!");
// Construct and return the appropriate CastInst subclass
switch (op) {
- case Trunc: return new TruncInst (S, Ty, Name, InsertAtEnd);
- case ZExt: return new ZExtInst (S, Ty, Name, InsertAtEnd);
- case SExt: return new SExtInst (S, Ty, Name, InsertAtEnd);
- case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertAtEnd);
- case FPExt: return new FPExtInst (S, Ty, Name, InsertAtEnd);
- case UIToFP: return new UIToFPInst (S, Ty, Name, InsertAtEnd);
- case SIToFP: return new SIToFPInst (S, Ty, Name, InsertAtEnd);
- case FPToUI: return new FPToUIInst (S, Ty, Name, InsertAtEnd);
- case FPToSI: return new FPToSIInst (S, Ty, Name, InsertAtEnd);
- case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd);
- case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd);
- case BitCast: return new BitCastInst (S, Ty, Name, InsertAtEnd);
- default: llvm_unreachable("Invalid opcode provided");
+ case Trunc: return new TruncInst (S, Ty, Name, InsertAtEnd);
+ case ZExt: return new ZExtInst (S, Ty, Name, InsertAtEnd);
+ case SExt: return new SExtInst (S, Ty, Name, InsertAtEnd);
+ case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertAtEnd);
+ case FPExt: return new FPExtInst (S, Ty, Name, InsertAtEnd);
+ case UIToFP: return new UIToFPInst (S, Ty, Name, InsertAtEnd);
+ case SIToFP: return new SIToFPInst (S, Ty, Name, InsertAtEnd);
+ case FPToUI: return new FPToUIInst (S, Ty, Name, InsertAtEnd);
+ case FPToSI: return new FPToSIInst (S, Ty, Name, InsertAtEnd);
+ case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd);
+ case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd);
+ case BitCast: return new BitCastInst (S, Ty, Name, InsertAtEnd);
+ case AddrSpaceCast: return new AddrSpaceCastInst (S, Ty, Name, InsertAtEnd);
+ default: llvm_unreachable("Invalid opcode provided");
}
}
@@ -2425,6 +2452,11 @@ CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty,
if (Ty->isIntOrIntVectorTy())
return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd);
+
+ Type *STy = S->getType();
+ if (STy->getPointerAddressSpace() != Ty->getPointerAddressSpace())
+ return Create(Instruction::AddrSpaceCast, S, Ty, Name, InsertAtEnd);
+
return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
}
@@ -2442,6 +2474,11 @@ CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty,
if (Ty->isIntOrIntVectorTy())
return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore);
+
+ Type *STy = S->getType();
+ if (STy->getPointerAddressSpace() != Ty->getPointerAddressSpace())
+ return Create(Instruction::AddrSpaceCast, S, Ty, Name, InsertBefore);
+
return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
}
@@ -2687,7 +2724,8 @@ CastInst::getCastOpcode(
return BitCast;
} else if (DestTy->isPointerTy()) {
if (SrcTy->isPointerTy()) {
- // TODO: Address space pointer sizes may not match
+ if (DestTy->getPointerAddressSpace() != SrcTy->getPointerAddressSpace())
+ return AddrSpaceCast;
return BitCast; // ptr -> ptr
} else if (SrcTy->isIntegerTy()) {
return IntToPtr; // int -> ptr
@@ -2782,13 +2820,27 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
case Instruction::BitCast:
// BitCast implies a no-op cast of type only. No bits change.
// However, you can't cast pointers to anything but pointers.
- if (SrcTy->isPointerTy() != DstTy->isPointerTy())
+ if (SrcTy->isPtrOrPtrVectorTy() != DstTy->isPtrOrPtrVectorTy())
return false;
- // Now we know we're not dealing with a pointer/non-pointer mismatch. In all
- // these cases, the cast is okay if the source and destination bit widths
- // are identical.
- return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits();
+ // For non pointer cases, the cast is okay if the source and destination bit
+ // widths are identical.
+ if (!SrcTy->isPtrOrPtrVectorTy())
+ return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits();
+
+ // If both are pointers then the address spaces must match and vector of
+ // pointers must have the same number of elements.
+ return SrcTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace() &&
+ SrcTy->isVectorTy() == DstTy->isVectorTy() &&
+ (!SrcTy->isVectorTy() ||
+ SrcTy->getVectorNumElements() == SrcTy->getVectorNumElements());
+
+ case Instruction::AddrSpaceCast:
+ return SrcTy->isPtrOrPtrVectorTy() && DstTy->isPtrOrPtrVectorTy() &&
+ SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace() &&
+ SrcTy->isVectorTy() == DstTy->isVectorTy() &&
+ (!SrcTy->isVectorTy() ||
+ SrcTy->getVectorNumElements() == SrcTy->getVectorNumElements());
}
}
@@ -2935,6 +2987,18 @@ BitCastInst::BitCastInst(
assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
}
+AddrSpaceCastInst::AddrSpaceCastInst(
+ Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, AddrSpaceCast, S, Name, InsertBefore) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal AddrSpaceCast");
+}
+
+AddrSpaceCastInst::AddrSpaceCastInst(
+ Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, AddrSpaceCast, S, Name, InsertAtEnd) {
+ assert(castIsValid(getOpcode(), S, Ty) && "Illegal AddrSpaceCast");
+}
+
//===----------------------------------------------------------------------===//
// CmpInst Classes
//===----------------------------------------------------------------------===//
@@ -3267,7 +3331,6 @@ SwitchInst::SwitchInst(const SwitchInst &SI)
OL[i] = InOL[i];
OL[i+1] = InOL[i+1];
}
- TheSubsets = SI.TheSubsets;
SubclassOptionalData = SI.SubclassOptionalData;
}
@@ -3279,16 +3342,6 @@ SwitchInst::~SwitchInst() {
/// addCase - Add an entry to the switch instruction...
///
void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
- IntegersSubsetToBB Mapping;
-
- // FIXME: Currently we work with ConstantInt based cases.
- // So inititalize IntItem container directly from ConstantInt.
- Mapping.add(IntItem::fromConstantInt(OnVal));
- IntegersSubset CaseRanges = Mapping.getCase();
- addCase(CaseRanges, Dest);
-}
-
-void SwitchInst::addCase(IntegersSubset& OnVal, BasicBlock *Dest) {
unsigned NewCaseIdx = getNumCases();
unsigned OpNo = NumOperands;
if (OpNo+2 > ReservedSpace)
@@ -3296,17 +3349,14 @@ void SwitchInst::addCase(IntegersSubset& OnVal, BasicBlock *Dest) {
// Initialize some new operands.
assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
NumOperands = OpNo+2;
-
- SubsetsIt TheSubsetsIt = TheSubsets.insert(TheSubsets.end(), OnVal);
-
- CaseIt Case(this, NewCaseIdx, TheSubsetsIt);
- Case.updateCaseValueOperand(OnVal);
+ CaseIt Case(this, NewCaseIdx);
+ Case.setValue(OnVal);
Case.setSuccessor(Dest);
}
/// removeCase - This method removes the specified case and its successor
/// from the switch instruction.
-void SwitchInst::removeCase(CaseIt& i) {
+void SwitchInst::removeCase(CaseIt i) {
unsigned idx = i.getCaseIndex();
assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!");
@@ -3323,16 +3373,6 @@ void SwitchInst::removeCase(CaseIt& i) {
// Nuke the last value.
OL[NumOps-2].set(0);
OL[NumOps-2+1].set(0);
-
- // Do the same with TheCases collection:
- if (i.SubsetIt != --TheSubsets.end()) {
- *i.SubsetIt = TheSubsets.back();
- TheSubsets.pop_back();
- } else {
- TheSubsets.pop_back();
- i.SubsetIt = TheSubsets.end();
- }
-
NumOperands = NumOps-2;
}
@@ -3577,6 +3617,10 @@ BitCastInst *BitCastInst::clone_impl() const {
return new BitCastInst(getOperand(0), getType());
}
+AddrSpaceCastInst *AddrSpaceCastInst::clone_impl() const {
+ return new AddrSpaceCastInst(getOperand(0), getType());
+}
+
CallInst *CallInst::clone_impl() const {
return new(getNumOperands()) CallInst(*this);
}
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 0c659b8..407b985 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -355,6 +355,11 @@ public:
typedef DenseMap<const Function*, unsigned> IntrinsicIDCacheTy;
IntrinsicIDCacheTy IntrinsicIDCache;
+ /// \brief Mapping from a function to its prefix data, which is stored as the
+ /// operand of an unparented ReturnInst so that the prefix data has a Use.
+ typedef DenseMap<const Function *, ReturnInst *> PrefixDataMapTy;
+ PrefixDataMapTy PrefixDataMap;
+
int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx);
int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx);
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
new file mode 100644
index 0000000..a431d82
--- /dev/null
+++ b/lib/IR/LegacyPassManager.cpp
@@ -0,0 +1,1920 @@
+//===- LegacyPassManager.cpp - LLVM Pass Infrastructure Implementation ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the legacy LLVM Pass Manager infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/LegacyPassManagers.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+using namespace llvm::legacy;
+
+// See PassManagers.h for Pass Manager infrastructure overview.
+
+//===----------------------------------------------------------------------===//
+// Pass debugging information. Often it is useful to find out what pass is
+// running when a crash occurs in a utility. When this library is compiled with
+// debugging on, a command line option (--debug-pass) is enabled that causes the
+// pass name to be printed before it executes.
+//
+
+namespace {
+// Different debug levels that can be enabled...
+enum PassDebugLevel {
+ Disabled, Arguments, Structure, Executions, Details
+};
+}
+
+static cl::opt<enum PassDebugLevel>
+PassDebugging("debug-pass", cl::Hidden,
+ cl::desc("Print PassManager debugging information"),
+ cl::values(
+ clEnumVal(Disabled , "disable debug output"),
+ clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
+ clEnumVal(Structure , "print pass structure before run()"),
+ clEnumVal(Executions, "print pass name before it is executed"),
+ clEnumVal(Details , "print pass details when it is executed"),
+ clEnumValEnd));
+
+namespace {
+typedef llvm::cl::list<const llvm::PassInfo *, bool, PassNameParser>
+PassOptionList;
+}
+
+// Print IR out before/after specified passes.
+static PassOptionList
+PrintBefore("print-before",
+ llvm::cl::desc("Print IR before specified passes"),
+ cl::Hidden);
+
+static PassOptionList
+PrintAfter("print-after",
+ llvm::cl::desc("Print IR after specified passes"),
+ cl::Hidden);
+
+static cl::opt<bool>
+PrintBeforeAll("print-before-all",
+ llvm::cl::desc("Print IR before each pass"),
+ cl::init(false));
+static cl::opt<bool>
+PrintAfterAll("print-after-all",
+ llvm::cl::desc("Print IR after each pass"),
+ cl::init(false));
+
+/// This is a helper to determine whether to print IR before or
+/// after a pass.
+
+static bool ShouldPrintBeforeOrAfterPass(const PassInfo *PI,
+ PassOptionList &PassesToPrint) {
+ for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
+ const llvm::PassInfo *PassInf = PassesToPrint[i];
+ if (PassInf)
+ if (PassInf->getPassArgument() == PI->getPassArgument()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// This is a utility to check whether a pass should have IR dumped
+/// before it.
+static bool ShouldPrintBeforePass(const PassInfo *PI) {
+ return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PI, PrintBefore);
+}
+
+/// This is a utility to check whether a pass should have IR dumped
+/// after it.
+static bool ShouldPrintAfterPass(const PassInfo *PI) {
+ return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter);
+}
+
+/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
+/// or higher is specified.
+bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
+ return PassDebugging >= Executions;
+}
+
+
+
+
+void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
+ if (V == 0 && M == 0)
+ OS << "Releasing pass '";
+ else
+ OS << "Running pass '";
+
+ OS << P->getPassName() << "'";
+
+ if (M) {
+ OS << " on module '" << M->getModuleIdentifier() << "'.\n";
+ return;
+ }
+ if (V == 0) {
+ OS << '\n';
+ return;
+ }
+
+ OS << " on ";
+ if (isa<Function>(V))
+ OS << "function";
+ else if (isa<BasicBlock>(V))
+ OS << "basic block";
+ else
+ OS << "value";
+
+ OS << " '";
+ WriteAsOperand(OS, V, /*PrintTy=*/false, M);
+ OS << "'\n";
+}
+
+
+namespace {
+//===----------------------------------------------------------------------===//
+// BBPassManager
+//
+/// BBPassManager manages BasicBlockPass. It batches all the
+/// pass together and sequence them to process one basic block before
+/// processing next basic block.
+class BBPassManager : public PMDataManager, public FunctionPass {
+
+public:
+ static char ID;
+ explicit BBPassManager()
+ : PMDataManager(), FunctionPass(ID) {}
+
+ /// Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the function, and if so, return true.
+ bool runOnFunction(Function &F);
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ bool doInitialization(Module &M);
+ bool doInitialization(Function &F);
+ bool doFinalization(Module &M);
+ bool doFinalization(Function &F);
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+
+ virtual const char *getPassName() const {
+ return "BasicBlock Pass Manager";
+ }
+
+ // Print passes managed by this manager
+ void dumpPassStructure(unsigned Offset) {
+ llvm::dbgs().indent(Offset*2) << "BasicBlockPass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ BP->dumpPassStructure(Offset + 1);
+ dumpLastUses(BP, Offset+1);
+ }
+ }
+
+ BasicBlockPass *getContainedPass(unsigned N) {
+ assert(N < PassVector.size() && "Pass number out of range!");
+ BasicBlockPass *BP = static_cast<BasicBlockPass *>(PassVector[N]);
+ return BP;
+ }
+
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_BasicBlockPassManager;
+ }
+};
+
+char BBPassManager::ID = 0;
+} // End anonymous namespace
+
+namespace llvm {
+namespace legacy {
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl
+//
+/// FunctionPassManagerImpl manages FPPassManagers
+class FunctionPassManagerImpl : public Pass,
+ public PMDataManager,
+ public PMTopLevelManager {
+ virtual void anchor();
+private:
+ bool wasRun;
+public:
+ static char ID;
+ explicit FunctionPassManagerImpl() :
+ Pass(PT_PassManager, ID), PMDataManager(),
+ PMTopLevelManager(new FPPassManager()), wasRun(false) {}
+
+ /// add - Add a pass to the queue of passes to run. This passes ownership of
+ /// the Pass to the PassManager. When the PassManager is destroyed, the pass
+ /// will be destroyed as well, so there is no need to delete the pass. This
+ /// implies that all passes MUST be allocated with 'new'.
+ void add(Pass *P) {
+ schedulePass(P);
+ }
+
+ /// createPrinterPass - Get a function printer pass.
+ Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+ return createPrintFunctionPass(Banner, &O);
+ }
+
+ // Prepare for running an on the fly pass, freeing memory if needed
+ // from a previous run.
+ void releaseMemoryOnTheFly();
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool run(Function &F);
+
+ /// doInitialization - Run all of the initializers for the function passes.
+ ///
+ bool doInitialization(Module &M);
+
+ /// doFinalization - Run all of the finalizers for the function passes.
+ ///
+ bool doFinalization(Module &M);
+
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+ virtual PassManagerType getTopLevelPassManagerType() {
+ return PMT_FunctionPassManager;
+ }
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ FPPassManager *getContainedManager(unsigned N) {
+ assert(N < PassManagers.size() && "Pass number out of range!");
+ FPPassManager *FP = static_cast<FPPassManager *>(PassManagers[N]);
+ return FP;
+ }
+};
+
+void FunctionPassManagerImpl::anchor() {}
+
+char FunctionPassManagerImpl::ID = 0;
+} // End of legacy namespace
+} // End of llvm namespace
+
+namespace {
+//===----------------------------------------------------------------------===//
+// MPPassManager
+//
+/// MPPassManager manages ModulePasses and function pass managers.
+/// It batches all Module passes and function pass managers together and
+/// sequences them to process one module.
+class MPPassManager : public Pass, public PMDataManager {
+public:
+ static char ID;
+ explicit MPPassManager() :
+ Pass(PT_PassManager, ID), PMDataManager() { }
+
+ // Delete on the fly managers.
+ virtual ~MPPassManager() {
+ for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+ I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+ I != E; ++I) {
+ FunctionPassManagerImpl *FPP = I->second;
+ delete FPP;
+ }
+ }
+
+ /// createPrinterPass - Get a module printer pass.
+ Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+ return createPrintModulePass(&O, false, Banner);
+ }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool runOnModule(Module &M);
+
+ using llvm::Pass::doInitialization;
+ using llvm::Pass::doFinalization;
+
+ /// doInitialization - Run all of the initializers for the module passes.
+ ///
+ bool doInitialization();
+
+ /// doFinalization - Run all of the finalizers for the module passes.
+ ///
+ bool doFinalization();
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ /// Add RequiredPass into list of lower level passes required by pass P.
+ /// RequiredPass is run on the fly by Pass Manager when P requests it
+ /// through getAnalysis interface.
+ virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
+
+ /// Return function pass corresponding to PassInfo PI, that is
+ /// required by module pass MP. Instantiate analysis pass, by using
+ /// its runOnFunction() for function F.
+ virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F);
+
+ virtual const char *getPassName() const {
+ return "Module Pass Manager";
+ }
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+
+ // Print passes managed by this manager
+ void dumpPassStructure(unsigned Offset) {
+ llvm::dbgs().indent(Offset*2) << "ModulePass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ ModulePass *MP = getContainedPass(Index);
+ MP->dumpPassStructure(Offset + 1);
+ std::map<Pass *, FunctionPassManagerImpl *>::const_iterator I =
+ OnTheFlyManagers.find(MP);
+ if (I != OnTheFlyManagers.end())
+ I->second->dumpPassStructure(Offset + 2);
+ dumpLastUses(MP, Offset+1);
+ }
+ }
+
+ ModulePass *getContainedPass(unsigned N) {
+ assert(N < PassVector.size() && "Pass number out of range!");
+ return static_cast<ModulePass *>(PassVector[N]);
+ }
+
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_ModulePassManager;
+ }
+
+ private:
+ /// Collection of on the fly FPPassManagers. These managers manage
+ /// function passes that are required by module passes.
+ std::map<Pass *, FunctionPassManagerImpl *> OnTheFlyManagers;
+};
+
+char MPPassManager::ID = 0;
+} // End anonymous namespace
+
+namespace llvm {
+namespace legacy {
+//===----------------------------------------------------------------------===//
+// PassManagerImpl
+//
+
+/// PassManagerImpl manages MPPassManagers
+class PassManagerImpl : public Pass,
+ public PMDataManager,
+ public PMTopLevelManager {
+ virtual void anchor();
+
+public:
+ static char ID;
+ explicit PassManagerImpl() :
+ Pass(PT_PassManager, ID), PMDataManager(),
+ PMTopLevelManager(new MPPassManager()) {}
+
+ /// add - Add a pass to the queue of passes to run. This passes ownership of
+ /// the Pass to the PassManager. When the PassManager is destroyed, the pass
+ /// will be destroyed as well, so there is no need to delete the pass. This
+ /// implies that all passes MUST be allocated with 'new'.
+ void add(Pass *P) {
+ schedulePass(P);
+ }
+
+ /// createPrinterPass - Get a module printer pass.
+ Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+ return createPrintModulePass(&O, false, Banner);
+ }
+
+ /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// whether any of the passes modifies the module, and if so, return true.
+ bool run(Module &M);
+
+ using llvm::Pass::doInitialization;
+ using llvm::Pass::doFinalization;
+
+ /// doInitialization - Run all of the initializers for the module passes.
+ ///
+ bool doInitialization();
+
+ /// doFinalization - Run all of the finalizers for the module passes.
+ ///
+ bool doFinalization();
+
+ /// Pass Manager itself does not invalidate any analysis info.
+ void getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.setPreservesAll();
+ }
+
+ virtual PMDataManager *getAsPMDataManager() { return this; }
+ virtual Pass *getAsPass() { return this; }
+ virtual PassManagerType getTopLevelPassManagerType() {
+ return PMT_ModulePassManager;
+ }
+
+ MPPassManager *getContainedManager(unsigned N) {
+ assert(N < PassManagers.size() && "Pass number out of range!");
+ MPPassManager *MP = static_cast<MPPassManager *>(PassManagers[N]);
+ return MP;
+ }
+};
+
+void PassManagerImpl::anchor() {}
+
+char PassManagerImpl::ID = 0;
+} // End of legacy namespace
+} // End of llvm namespace
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+/// TimingInfo Class - This class is used to calculate information about the
+/// amount of time each pass takes to execute. This only happens when
+/// -time-passes is enabled on the command line.
+///
+
+static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
+
+class TimingInfo {
+ DenseMap<Pass*, Timer*> TimingData;
+ TimerGroup TG;
+public:
+ // Use 'create' member to get this.
+ TimingInfo() : TG("... Pass execution timing report ...") {}
+
+ // TimingDtor - Print out information about timing information
+ ~TimingInfo() {
+ // Delete all of the timers, which accumulate their info into the
+ // TimerGroup.
+ for (DenseMap<Pass*, Timer*>::iterator I = TimingData.begin(),
+ E = TimingData.end(); I != E; ++I)
+ delete I->second;
+ // TimerGroup is deleted next, printing the report.
+ }
+
+ // createTheTimeInfo - This method either initializes the TheTimeInfo pointer
+ // to a non null value (if the -time-passes option is enabled) or it leaves it
+ // null. It may be called multiple times.
+ static void createTheTimeInfo();
+
+ /// getPassTimer - Return the timer for the specified pass if it exists.
+ Timer *getPassTimer(Pass *P) {
+ if (P->getAsPMDataManager())
+ return 0;
+
+ sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
+ Timer *&T = TimingData[P];
+ if (T == 0)
+ T = new Timer(P->getPassName(), TG);
+ return T;
+ }
+};
+
+} // End of anon namespace
+
+static TimingInfo *TheTimeInfo;
+
+//===----------------------------------------------------------------------===//
+// PMTopLevelManager implementation
+
+/// Initialize top level manager. Create first pass manager.
+PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) {
+ PMDM->setTopLevelManager(this);
+ addPassManager(PMDM);
+ activeStack.push(PMDM);
+}
+
+/// Set pass P as the last user of the given analysis passes.
+void
+PMTopLevelManager::setLastUser(ArrayRef<Pass*> AnalysisPasses, Pass *P) {
+ unsigned PDepth = 0;
+ if (P->getResolver())
+ PDepth = P->getResolver()->getPMDataManager().getDepth();
+
+ for (SmallVectorImpl<Pass *>::const_iterator I = AnalysisPasses.begin(),
+ E = AnalysisPasses.end(); I != E; ++I) {
+ Pass *AP = *I;
+ LastUser[AP] = P;
+
+ if (P == AP)
+ continue;
+
+ // Update the last users of passes that are required transitive by AP.
+ AnalysisUsage *AnUsage = findAnalysisUsage(AP);
+ const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+ SmallVector<Pass *, 12> LastUses;
+ SmallVector<Pass *, 12> LastPMUses;
+ for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+ E = IDs.end(); I != E; ++I) {
+ Pass *AnalysisPass = findAnalysisPass(*I);
+ assert(AnalysisPass && "Expected analysis pass to exist.");
+ AnalysisResolver *AR = AnalysisPass->getResolver();
+ assert(AR && "Expected analysis resolver to exist.");
+ unsigned APDepth = AR->getPMDataManager().getDepth();
+
+ if (PDepth == APDepth)
+ LastUses.push_back(AnalysisPass);
+ else if (PDepth > APDepth)
+ LastPMUses.push_back(AnalysisPass);
+ }
+
+ setLastUser(LastUses, P);
+
+ // If this pass has a corresponding pass manager, push higher level
+ // analysis to this pass manager.
+ if (P->getResolver())
+ setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass());
+
+
+ // If AP is the last user of other passes then make P last user of
+ // such passes.
+ for (DenseMap<Pass *, Pass *>::iterator LUI = LastUser.begin(),
+ LUE = LastUser.end(); LUI != LUE; ++LUI) {
+ if (LUI->second == AP)
+ // DenseMap iterator is not invalidated here because
+ // this is just updating existing entries.
+ LastUser[LUI->first] = P;
+ }
+ }
+}
+
+/// Collect passes whose last user is P
+void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
+ Pass *P) {
+ DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
+ InversedLastUser.find(P);
+ if (DMI == InversedLastUser.end())
+ return;
+
+ SmallPtrSet<Pass *, 8> &LU = DMI->second;
+ for (SmallPtrSet<Pass *, 8>::iterator I = LU.begin(),
+ E = LU.end(); I != E; ++I) {
+ LastUses.push_back(*I);
+ }
+
+}
+
+AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
+ AnalysisUsage *AnUsage = NULL;
+ DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
+ if (DMI != AnUsageMap.end())
+ AnUsage = DMI->second;
+ else {
+ AnUsage = new AnalysisUsage();
+ P->getAnalysisUsage(*AnUsage);
+ AnUsageMap[P] = AnUsage;
+ }
+ return AnUsage;
+}
+
+/// Schedule pass P for execution. Make sure that passes required by
+/// P are run before P is run. Update analysis info maintained by
+/// the manager. Remove dead passes. This is a recursive function.
+void PMTopLevelManager::schedulePass(Pass *P) {
+
+ // TODO : Allocate function manager for this pass, other wise required set
+ // may be inserted into previous function manager
+
+ // Give pass a chance to prepare the stage.
+ P->preparePassManager(activeStack);
+
+ // If P is an analysis pass and it is available then do not
+ // generate the analysis again. Stale analysis info should not be
+ // available at this point.
+ const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
+ if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
+ delete P;
+ return;
+ }
+
+ AnalysisUsage *AnUsage = findAnalysisUsage(P);
+
+ bool checkAnalysis = true;
+ while (checkAnalysis) {
+ checkAnalysis = false;
+
+ const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+ for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(),
+ E = RequiredSet.end(); I != E; ++I) {
+
+ Pass *AnalysisPass = findAnalysisPass(*I);
+ if (!AnalysisPass) {
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+
+ if (PI == NULL) {
+ // Pass P is not in the global PassRegistry
+ dbgs() << "Pass '" << P->getPassName() << "' is not initialized." << "\n";
+ dbgs() << "Verify if there is a pass dependency cycle." << "\n";
+ dbgs() << "Required Passes:" << "\n";
+ for (AnalysisUsage::VectorType::const_iterator I2 = RequiredSet.begin(),
+ E = RequiredSet.end(); I2 != E && I2 != I; ++I2) {
+ Pass *AnalysisPass2 = findAnalysisPass(*I2);
+ if (AnalysisPass2) {
+ dbgs() << "\t" << AnalysisPass2->getPassName() << "\n";
+ } else {
+ dbgs() << "\t" << "Error: Required pass not found! Possible causes:" << "\n";
+ dbgs() << "\t\t" << "- Pass misconfiguration (e.g.: missing macros)" << "\n";
+ dbgs() << "\t\t" << "- Corruption of the global PassRegistry" << "\n";
+ }
+ }
+ }
+
+ assert(PI && "Expected required passes to be initialized");
+ AnalysisPass = PI->createPass();
+ if (P->getPotentialPassManagerType () ==
+ AnalysisPass->getPotentialPassManagerType())
+ // Schedule analysis pass that is managed by the same pass manager.
+ schedulePass(AnalysisPass);
+ else if (P->getPotentialPassManagerType () >
+ AnalysisPass->getPotentialPassManagerType()) {
+ // Schedule analysis pass that is managed by a new manager.
+ schedulePass(AnalysisPass);
+ // Recheck analysis passes to ensure that required analyses that
+ // are already checked are still available.
+ checkAnalysis = true;
+ } else
+ // Do not schedule this analysis. Lower level analsyis
+ // passes are run on the fly.
+ delete AnalysisPass;
+ }
+ }
+ }
+
+ // Now all required passes are available.
+ if (ImmutablePass *IP = P->getAsImmutablePass()) {
+ // P is a immutable pass and it will be managed by this
+ // top level manager. Set up analysis resolver to connect them.
+ PMDataManager *DM = getAsPMDataManager();
+ AnalysisResolver *AR = new AnalysisResolver(*DM);
+ P->setResolver(AR);
+ DM->initializeAnalysisImpl(P);
+ addImmutablePass(IP);
+ DM->recordAvailableAnalysis(IP);
+ return;
+ }
+
+ if (PI && !PI->isAnalysis() && ShouldPrintBeforePass(PI)) {
+ Pass *PP = P->createPrinterPass(
+ dbgs(), std::string("*** IR Dump Before ") + P->getPassName() + " ***");
+ PP->assignPassManager(activeStack, getTopLevelPassManagerType());
+ }
+
+ // Add the requested pass to the best available pass manager.
+ P->assignPassManager(activeStack, getTopLevelPassManagerType());
+
+ if (PI && !PI->isAnalysis() && ShouldPrintAfterPass(PI)) {
+ Pass *PP = P->createPrinterPass(
+ dbgs(), std::string("*** IR Dump After ") + P->getPassName() + " ***");
+ PP->assignPassManager(activeStack, getTopLevelPassManagerType());
+ }
+}
+
+/// Find the pass that implements Analysis AID. Search immutable
+/// passes and all pass managers. If desired pass is not found
+/// then return NULL.
+Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
+
+ // Check pass managers
+ for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ if (Pass *P = (*I)->findAnalysisPass(AID, false))
+ return P;
+
+ // Check other pass managers
+ for (SmallVectorImpl<PMDataManager *>::iterator
+ I = IndirectPassManagers.begin(),
+ E = IndirectPassManagers.end(); I != E; ++I)
+ if (Pass *P = (*I)->findAnalysisPass(AID, false))
+ return P;
+
+ // Check the immutable passes. Iterate in reverse order so that we find
+ // the most recently registered passes first.
+ for (SmallVectorImpl<ImmutablePass *>::reverse_iterator I =
+ ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) {
+ AnalysisID PI = (*I)->getPassID();
+ if (PI == AID)
+ return *I;
+
+ // If Pass not found then check the interfaces implemented by Immutable Pass
+ const PassInfo *PassInf =
+ PassRegistry::getPassRegistry()->getPassInfo(PI);
+ assert(PassInf && "Expected all immutable passes to be initialized");
+ const std::vector<const PassInfo*> &ImmPI =
+ PassInf->getInterfacesImplemented();
+ for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
+ EE = ImmPI.end(); II != EE; ++II) {
+ if ((*II)->getTypeInfo() == AID)
+ return *I;
+ }
+ }
+
+ return 0;
+}
+
+// Print passes managed by this top level manager.
+void PMTopLevelManager::dumpPasses() const {
+
+ if (PassDebugging < Structure)
+ return;
+
+ // Print out the immutable passes
+ for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) {
+ ImmutablePasses[i]->dumpPassStructure(0);
+ }
+
+ // Every class that derives from PMDataManager also derives from Pass
+ // (sometimes indirectly), but there's no inheritance relationship
+ // between PMDataManager and Pass, so we have to getAsPass to get
+ // from a PMDataManager* to a Pass*.
+ for (SmallVectorImpl<PMDataManager *>::const_iterator I =
+ PassManagers.begin(), E = PassManagers.end(); I != E; ++I)
+ (*I)->getAsPass()->dumpPassStructure(1);
+}
+
+void PMTopLevelManager::dumpArguments() const {
+
+ if (PassDebugging < Arguments)
+ return;
+
+ dbgs() << "Pass Arguments: ";
+ for (SmallVectorImpl<ImmutablePass *>::const_iterator I =
+ ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+ if (const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) {
+ assert(PI && "Expected all immutable passes to be initialized");
+ if (!PI->isAnalysisGroup())
+ dbgs() << " -" << PI->getPassArgument();
+ }
+ for (SmallVectorImpl<PMDataManager *>::const_iterator I =
+ PassManagers.begin(), E = PassManagers.end(); I != E; ++I)
+ (*I)->dumpPassArguments();
+ dbgs() << "\n";
+}
+
+void PMTopLevelManager::initializeAllAnalysisInfo() {
+ for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ (*I)->initializeAnalysisInfo();
+
+ // Initailize other pass managers
+ for (SmallVectorImpl<PMDataManager *>::iterator
+ I = IndirectPassManagers.begin(), E = IndirectPassManagers.end();
+ I != E; ++I)
+ (*I)->initializeAnalysisInfo();
+
+ for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
+ DME = LastUser.end(); DMI != DME; ++DMI) {
+ DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
+ InversedLastUser.find(DMI->second);
+ if (InvDMI != InversedLastUser.end()) {
+ SmallPtrSet<Pass *, 8> &L = InvDMI->second;
+ L.insert(DMI->first);
+ } else {
+ SmallPtrSet<Pass *, 8> L; L.insert(DMI->first);
+ InversedLastUser[DMI->second] = L;
+ }
+ }
+}
+
+/// Destructor
+PMTopLevelManager::~PMTopLevelManager() {
+ for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ delete *I;
+
+ for (SmallVectorImpl<ImmutablePass *>::iterator
+ I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+ delete *I;
+
+ for (DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.begin(),
+ DME = AnUsageMap.end(); DMI != DME; ++DMI)
+ delete DMI->second;
+}
+
+//===----------------------------------------------------------------------===//
+// PMDataManager implementation
+
+/// Augement AvailableAnalysis by adding analysis made available by pass P.
+void PMDataManager::recordAvailableAnalysis(Pass *P) {
+ AnalysisID PI = P->getPassID();
+
+ AvailableAnalysis[PI] = P;
+
+ assert(!AvailableAnalysis.empty());
+
+ // This pass is the current implementation of all of the interfaces it
+ // implements as well.
+ const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
+ if (PInf == 0) return;
+ const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
+ for (unsigned i = 0, e = II.size(); i != e; ++i)
+ AvailableAnalysis[II[i]->getTypeInfo()] = P;
+}
+
+// Return true if P preserves high level analysis used by other
+// passes managed by this manager
+bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ if (AnUsage->getPreservesAll())
+ return true;
+
+ const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+ for (SmallVectorImpl<Pass *>::iterator I = HigherLevelAnalysis.begin(),
+ E = HigherLevelAnalysis.end(); I != E; ++I) {
+ Pass *P1 = *I;
+ if (P1->getAsImmutablePass() == 0 &&
+ std::find(PreservedSet.begin(), PreservedSet.end(),
+ P1->getPassID()) ==
+ PreservedSet.end())
+ return false;
+ }
+
+ return true;
+}
+
+/// verifyPreservedAnalysis -- Verify analysis preserved by pass P.
+void PMDataManager::verifyPreservedAnalysis(Pass *P) {
+ // Don't do this unless assertions are enabled.
+#ifdef NDEBUG
+ return;
+#endif
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+
+ // Verify preserved analysis
+ for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(),
+ E = PreservedSet.end(); I != E; ++I) {
+ AnalysisID AID = *I;
+ if (Pass *AP = findAnalysisPass(AID, true)) {
+ TimeRegion PassTimer(getPassTimer(AP));
+ AP->verifyAnalysis();
+ }
+ }
+}
+
+/// Remove Analysis not preserved by Pass P
+void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ if (AnUsage->getPreservesAll())
+ return;
+
+ const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+ for (DenseMap<AnalysisID, Pass*>::iterator I = AvailableAnalysis.begin(),
+ E = AvailableAnalysis.end(); I != E; ) {
+ DenseMap<AnalysisID, Pass*>::iterator Info = I++;
+ if (Info->second->getAsImmutablePass() == 0 &&
+ std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+ PreservedSet.end()) {
+ // Remove this analysis
+ if (PassDebugging >= Details) {
+ Pass *S = Info->second;
+ dbgs() << " -- '" << P->getPassName() << "' is not preserving '";
+ dbgs() << S->getPassName() << "'\n";
+ }
+ AvailableAnalysis.erase(Info);
+ }
+ }
+
+ // Check inherited analysis also. If P is not preserving analysis
+ // provided by parent manager then remove it here.
+ for (unsigned Index = 0; Index < PMT_Last; ++Index) {
+
+ if (!InheritedAnalysis[Index])
+ continue;
+
+ for (DenseMap<AnalysisID, Pass*>::iterator
+ I = InheritedAnalysis[Index]->begin(),
+ E = InheritedAnalysis[Index]->end(); I != E; ) {
+ DenseMap<AnalysisID, Pass *>::iterator Info = I++;
+ if (Info->second->getAsImmutablePass() == 0 &&
+ std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+ PreservedSet.end()) {
+ // Remove this analysis
+ if (PassDebugging >= Details) {
+ Pass *S = Info->second;
+ dbgs() << " -- '" << P->getPassName() << "' is not preserving '";
+ dbgs() << S->getPassName() << "'\n";
+ }
+ InheritedAnalysis[Index]->erase(Info);
+ }
+ }
+ }
+}
+
+/// Remove analysis passes that are not used any longer
+void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
+ enum PassDebuggingString DBG_STR) {
+
+ SmallVector<Pass *, 12> DeadPasses;
+
+ // If this is a on the fly manager then it does not have TPM.
+ if (!TPM)
+ return;
+
+ TPM->collectLastUses(DeadPasses, P);
+
+ if (PassDebugging >= Details && !DeadPasses.empty()) {
+ dbgs() << " -*- '" << P->getPassName();
+ dbgs() << "' is the last user of following pass instances.";
+ dbgs() << " Free these instances\n";
+ }
+
+ for (SmallVectorImpl<Pass *>::iterator I = DeadPasses.begin(),
+ E = DeadPasses.end(); I != E; ++I)
+ freePass(*I, Msg, DBG_STR);
+}
+
+void PMDataManager::freePass(Pass *P, StringRef Msg,
+ enum PassDebuggingString DBG_STR) {
+ dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg);
+
+ {
+ // If the pass crashes releasing memory, remember this.
+ PassManagerPrettyStackEntry X(P);
+ TimeRegion PassTimer(getPassTimer(P));
+
+ P->releaseMemory();
+ }
+
+ AnalysisID PI = P->getPassID();
+ if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
+ // Remove the pass itself (if it is not already removed).
+ AvailableAnalysis.erase(PI);
+
+ // Remove all interfaces this pass implements, for which it is also
+ // listed as the available implementation.
+ const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
+ for (unsigned i = 0, e = II.size(); i != e; ++i) {
+ DenseMap<AnalysisID, Pass*>::iterator Pos =
+ AvailableAnalysis.find(II[i]->getTypeInfo());
+ if (Pos != AvailableAnalysis.end() && Pos->second == P)
+ AvailableAnalysis.erase(Pos);
+ }
+ }
+}
+
+/// Add pass P into the PassVector. Update
+/// AvailableAnalysis appropriately if ProcessAnalysis is true.
+void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
+ // This manager is going to manage pass P. Set up analysis resolver
+ // to connect them.
+ AnalysisResolver *AR = new AnalysisResolver(*this);
+ P->setResolver(AR);
+
+ // If a FunctionPass F is the last user of ModulePass info M
+ // then the F's manager, not F, records itself as a last user of M.
+ SmallVector<Pass *, 12> TransferLastUses;
+
+ if (!ProcessAnalysis) {
+ // Add pass
+ PassVector.push_back(P);
+ return;
+ }
+
+ // At the moment, this pass is the last user of all required passes.
+ SmallVector<Pass *, 12> LastUses;
+ SmallVector<Pass *, 8> RequiredPasses;
+ SmallVector<AnalysisID, 8> ReqAnalysisNotAvailable;
+
+ unsigned PDepth = this->getDepth();
+
+ collectRequiredAnalysis(RequiredPasses,
+ ReqAnalysisNotAvailable, P);
+ for (SmallVectorImpl<Pass *>::iterator I = RequiredPasses.begin(),
+ E = RequiredPasses.end(); I != E; ++I) {
+ Pass *PRequired = *I;
+ unsigned RDepth = 0;
+
+ assert(PRequired->getResolver() && "Analysis Resolver is not set");
+ PMDataManager &DM = PRequired->getResolver()->getPMDataManager();
+ RDepth = DM.getDepth();
+
+ if (PDepth == RDepth)
+ LastUses.push_back(PRequired);
+ else if (PDepth > RDepth) {
+ // Let the parent claim responsibility of last use
+ TransferLastUses.push_back(PRequired);
+ // Keep track of higher level analysis used by this manager.
+ HigherLevelAnalysis.push_back(PRequired);
+ } else
+ llvm_unreachable("Unable to accommodate Required Pass");
+ }
+
+ // Set P as P's last user until someone starts using P.
+ // However, if P is a Pass Manager then it does not need
+ // to record its last user.
+ if (P->getAsPMDataManager() == 0)
+ LastUses.push_back(P);
+ TPM->setLastUser(LastUses, P);
+
+ if (!TransferLastUses.empty()) {
+ Pass *My_PM = getAsPass();
+ TPM->setLastUser(TransferLastUses, My_PM);
+ TransferLastUses.clear();
+ }
+
+ // Now, take care of required analyses that are not available.
+ for (SmallVectorImpl<AnalysisID>::iterator
+ I = ReqAnalysisNotAvailable.begin(),
+ E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+ Pass *AnalysisPass = PI->createPass();
+ this->addLowerLevelRequiredPass(P, AnalysisPass);
+ }
+
+ // Take a note of analysis required and made available by this pass.
+ // Remove the analysis not preserved by this pass
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+
+ // Add pass
+ PassVector.push_back(P);
+}
+
+
+/// Populate RP with analysis pass that are required by
+/// pass P and are available. Populate RP_NotAvail with analysis
+/// pass that are required by pass P but are not available.
+void PMDataManager::collectRequiredAnalysis(SmallVectorImpl<Pass *> &RP,
+ SmallVectorImpl<AnalysisID> &RP_NotAvail,
+ Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+ const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+ for (AnalysisUsage::VectorType::const_iterator
+ I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
+ if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+ RP.push_back(AnalysisPass);
+ else
+ RP_NotAvail.push_back(*I);
+ }
+
+ const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+ for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+ E = IDs.end(); I != E; ++I) {
+ if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+ RP.push_back(AnalysisPass);
+ else
+ RP_NotAvail.push_back(*I);
+ }
+}
+
+// All Required analyses should be available to the pass as it runs! Here
+// we fill in the AnalysisImpls member of the pass so that it can
+// successfully use the getAnalysis() method to retrieve the
+// implementations it needs.
+//
+void PMDataManager::initializeAnalysisImpl(Pass *P) {
+ AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+
+ for (AnalysisUsage::VectorType::const_iterator
+ I = AnUsage->getRequiredSet().begin(),
+ E = AnUsage->getRequiredSet().end(); I != E; ++I) {
+ Pass *Impl = findAnalysisPass(*I, true);
+ if (Impl == 0)
+ // This may be analysis pass that is initialized on the fly.
+ // If that is not the case then it will raise an assert when it is used.
+ continue;
+ AnalysisResolver *AR = P->getResolver();
+ assert(AR && "Analysis Resolver is not set");
+ AR->addAnalysisImplsPair(*I, Impl);
+ }
+}
+
+/// Find the pass that implements Analysis AID. If desired pass is not found
+/// then return NULL.
+Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
+
+ // Check if AvailableAnalysis map has one entry.
+ DenseMap<AnalysisID, Pass*>::const_iterator I = AvailableAnalysis.find(AID);
+
+ if (I != AvailableAnalysis.end())
+ return I->second;
+
+ // Search Parents through TopLevelManager
+ if (SearchParent)
+ return TPM->findAnalysisPass(AID);
+
+ return NULL;
+}
+
+// Print list of passes that are last used by P.
+void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
+
+ SmallVector<Pass *, 12> LUses;
+
+ // If this is a on the fly manager then it does not have TPM.
+ if (!TPM)
+ return;
+
+ TPM->collectLastUses(LUses, P);
+
+ for (SmallVectorImpl<Pass *>::iterator I = LUses.begin(),
+ E = LUses.end(); I != E; ++I) {
+ llvm::dbgs() << "--" << std::string(Offset*2, ' ');
+ (*I)->dumpPassStructure(0);
+ }
+}
+
+void PMDataManager::dumpPassArguments() const {
+ for (SmallVectorImpl<Pass *>::const_iterator I = PassVector.begin(),
+ E = PassVector.end(); I != E; ++I) {
+ if (PMDataManager *PMD = (*I)->getAsPMDataManager())
+ PMD->dumpPassArguments();
+ else
+ if (const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
+ if (!PI->isAnalysisGroup())
+ dbgs() << " -" << PI->getPassArgument();
+ }
+}
+
+void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
+ enum PassDebuggingString S2,
+ StringRef Msg) {
+ if (PassDebugging < Executions)
+ return;
+ dbgs() << (void*)this << std::string(getDepth()*2+1, ' ');
+ switch (S1) {
+ case EXECUTION_MSG:
+ dbgs() << "Executing Pass '" << P->getPassName();
+ break;
+ case MODIFICATION_MSG:
+ dbgs() << "Made Modification '" << P->getPassName();
+ break;
+ case FREEING_MSG:
+ dbgs() << " Freeing Pass '" << P->getPassName();
+ break;
+ default:
+ break;
+ }
+ switch (S2) {
+ case ON_BASICBLOCK_MSG:
+ dbgs() << "' on BasicBlock '" << Msg << "'...\n";
+ break;
+ case ON_FUNCTION_MSG:
+ dbgs() << "' on Function '" << Msg << "'...\n";
+ break;
+ case ON_MODULE_MSG:
+ dbgs() << "' on Module '" << Msg << "'...\n";
+ break;
+ case ON_REGION_MSG:
+ dbgs() << "' on Region '" << Msg << "'...\n";
+ break;
+ case ON_LOOP_MSG:
+ dbgs() << "' on Loop '" << Msg << "'...\n";
+ break;
+ case ON_CG_MSG:
+ dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n";
+ break;
+ default:
+ break;
+ }
+}
+
+void PMDataManager::dumpRequiredSet(const Pass *P) const {
+ if (PassDebugging < Details)
+ return;
+
+ AnalysisUsage analysisUsage;
+ P->getAnalysisUsage(analysisUsage);
+ dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet());
+}
+
+void PMDataManager::dumpPreservedSet(const Pass *P) const {
+ if (PassDebugging < Details)
+ return;
+
+ AnalysisUsage analysisUsage;
+ P->getAnalysisUsage(analysisUsage);
+ dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
+}
+
+void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
+ const AnalysisUsage::VectorType &Set) const {
+ assert(PassDebugging >= Details);
+ if (Set.empty())
+ return;
+ dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
+ for (unsigned i = 0; i != Set.size(); ++i) {
+ if (i) dbgs() << ',';
+ const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]);
+ if (!PInf) {
+ // Some preserved passes, such as AliasAnalysis, may not be initialized by
+ // all drivers.
+ dbgs() << " Uninitialized Pass";
+ continue;
+ }
+ dbgs() << ' ' << PInf->getPassName();
+ }
+ dbgs() << '\n';
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+/// This should be handled by specific pass manager.
+void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+ if (TPM) {
+ TPM->dumpArguments();
+ TPM->dumpPasses();
+ }
+
+ // Module Level pass may required Function Level analysis info
+ // (e.g. dominator info). Pass manager uses on the fly function pass manager
+ // to provide this on demand. In that case, in Pass manager terminology,
+ // module level pass is requiring lower level analysis info managed by
+ // lower level pass manager.
+
+ // When Pass manager is not able to order required analysis info, Pass manager
+ // checks whether any lower level manager will be able to provide this
+ // analysis info on demand or not.
+#ifndef NDEBUG
+ dbgs() << "Unable to schedule '" << RequiredPass->getPassName();
+ dbgs() << "' required by '" << P->getPassName() << "'\n";
+#endif
+ llvm_unreachable("Unable to schedule pass");
+}
+
+Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
+ llvm_unreachable("Unable to find on the fly pass");
+}
+
+// Destructor
+PMDataManager::~PMDataManager() {
+ for (SmallVectorImpl<Pass *>::iterator I = PassVector.begin(),
+ E = PassVector.end(); I != E; ++I)
+ delete *I;
+}
+
+//===----------------------------------------------------------------------===//
+// NOTE: Is this the right place to define this method ?
+// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist.
+Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const {
+ return PM.findAnalysisPass(ID, dir);
+}
+
+Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI,
+ Function &F) {
+ return PM.getOnTheFlyPass(P, AnalysisPI, F);
+}
+
+//===----------------------------------------------------------------------===//
+// BBPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnBasicBlock method. Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool BBPassManager::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ bool Changed = doInitialization(F);
+
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ bool LocalChanged = false;
+
+ dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
+ dumpRequiredSet(BP);
+
+ initializeAnalysisImpl(BP);
+
+ {
+ // If the pass crashes, remember this.
+ PassManagerPrettyStackEntry X(BP, *I);
+ TimeRegion PassTimer(getPassTimer(BP));
+
+ LocalChanged |= BP->runOnBasicBlock(*I);
+ }
+
+ Changed |= LocalChanged;
+ if (LocalChanged)
+ dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
+ I->getName());
+ dumpPreservedSet(BP);
+
+ verifyPreservedAnalysis(BP);
+ removeNotPreservedAnalysis(BP);
+ recordAvailableAnalysis(BP);
+ removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG);
+ }
+
+ return doFinalization(F) || Changed;
+}
+
+// Implement doInitialization and doFinalization
+bool BBPassManager::doInitialization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doInitialization(M);
+
+ return Changed;
+}
+
+bool BBPassManager::doFinalization(Module &M) {
+ bool Changed = false;
+
+ for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
+ Changed |= getContainedPass(Index)->doFinalization(M);
+
+ return Changed;
+}
+
+bool BBPassManager::doInitialization(Function &F) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ Changed |= BP->doInitialization(F);
+ }
+
+ return Changed;
+}
+
+bool BBPassManager::doFinalization(Function &F) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ BasicBlockPass *BP = getContainedPass(Index);
+ Changed |= BP->doFinalization(F);
+ }
+
+ return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManager implementation
+
+/// Create new Function pass manager
+FunctionPassManager::FunctionPassManager(Module *m) : M(m) {
+ FPM = new FunctionPassManagerImpl();
+ // FPM is the top level manager.
+ FPM->setTopLevelManager(FPM);
+
+ AnalysisResolver *AR = new AnalysisResolver(*FPM);
+ FPM->setResolver(AR);
+}
+
+FunctionPassManager::~FunctionPassManager() {
+ delete FPM;
+}
+
+/// add - Add a pass to the queue of passes to run. This passes
+/// ownership of the Pass to the PassManager. When the
+/// PassManager_X is destroyed, the pass will be destroyed as well, so
+/// there is no need to delete the pass. (TODO delete passes.)
+/// This implies that all passes MUST be allocated with 'new'.
+void FunctionPassManager::add(Pass *P) {
+ FPM->add(P);
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep
+/// track of whether any of the passes modifies the function, and if
+/// so, return true.
+///
+bool FunctionPassManager::run(Function &F) {
+ if (F.isMaterializable()) {
+ std::string errstr;
+ if (F.Materialize(&errstr))
+ report_fatal_error("Error reading bitcode file: " + Twine(errstr));
+ }
+ return FPM->run(F);
+}
+
+
+/// doInitialization - Run all of the initializers for the function passes.
+///
+bool FunctionPassManager::doInitialization() {
+ return FPM->doInitialization(*M);
+}
+
+/// doFinalization - Run all of the finalizers for the function passes.
+///
+bool FunctionPassManager::doFinalization() {
+ return FPM->doFinalization(*M);
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl implementation
+//
+bool FunctionPassManagerImpl::doInitialization(Module &M) {
+ bool Changed = false;
+
+ dumpArguments();
+ dumpPasses();
+
+ SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
+ for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+ E = IPV.end(); I != E; ++I) {
+ Changed |= (*I)->doInitialization(M);
+ }
+
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->doInitialization(M);
+
+ return Changed;
+}
+
+bool FunctionPassManagerImpl::doFinalization(Module &M) {
+ bool Changed = false;
+
+ for (int Index = getNumContainedManagers() - 1; Index >= 0; --Index)
+ Changed |= getContainedManager(Index)->doFinalization(M);
+
+ SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
+ for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+ E = IPV.end(); I != E; ++I) {
+ Changed |= (*I)->doFinalization(M);
+ }
+
+ return Changed;
+}
+
+/// cleanup - After running all passes, clean up pass manager cache.
+void FPPassManager::cleanup() {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ FunctionPass *FP = getContainedPass(Index);
+ AnalysisResolver *AR = FP->getResolver();
+ assert(AR && "Analysis Resolver is not set");
+ AR->clearAnalysisImpls();
+ }
+}
+
+void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
+ if (!wasRun)
+ return;
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
+ FPPassManager *FPPM = getContainedManager(Index);
+ for (unsigned Index = 0; Index < FPPM->getNumContainedPasses(); ++Index) {
+ FPPM->getContainedPass(Index)->releaseMemory();
+ }
+ }
+ wasRun = false;
+}
+
+// Execute all the passes managed by this top level manager.
+// Return true if any function is modified by a pass.
+bool FunctionPassManagerImpl::run(Function &F) {
+ bool Changed = false;
+ TimingInfo::createTheTimeInfo();
+
+ initializeAllAnalysisInfo();
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->runOnFunction(F);
+
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ getContainedManager(Index)->cleanup();
+
+ wasRun = true;
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// FPPassManager implementation
+
+char FPPassManager::ID = 0;
+/// Print passes managed by this manager
+void FPPassManager::dumpPassStructure(unsigned Offset) {
+ dbgs().indent(Offset*2) << "FunctionPass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ FunctionPass *FP = getContainedPass(Index);
+ FP->dumpPassStructure(Offset + 1);
+ dumpLastUses(FP, Offset+1);
+ }
+}
+
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnFunction method. Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool FPPassManager::runOnFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ bool Changed = false;
+
+ // Collect inherited analysis from Module level pass manager.
+ populateInheritedAnalysis(TPM->activeStack);
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ FunctionPass *FP = getContainedPass(Index);
+ bool LocalChanged = false;
+
+ dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
+ dumpRequiredSet(FP);
+
+ initializeAnalysisImpl(FP);
+
+ {
+ PassManagerPrettyStackEntry X(FP, F);
+ TimeRegion PassTimer(getPassTimer(FP));
+
+ LocalChanged |= FP->runOnFunction(F);
+ }
+
+ Changed |= LocalChanged;
+ if (LocalChanged)
+ dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
+ dumpPreservedSet(FP);
+
+ verifyPreservedAnalysis(FP);
+ removeNotPreservedAnalysis(FP);
+ recordAvailableAnalysis(FP);
+ removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
+ }
+ return Changed;
+}
+
+bool FPPassManager::runOnModule(Module &M) {
+ bool Changed = false;
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ Changed |= runOnFunction(*I);
+
+ return Changed;
+}
+
+bool FPPassManager::doInitialization(Module &M) {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doInitialization(M);
+
+ return Changed;
+}
+
+bool FPPassManager::doFinalization(Module &M) {
+ bool Changed = false;
+
+ for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
+ Changed |= getContainedPass(Index)->doFinalization(M);
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MPPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnModule method. Keep track of whether any of the passes modifies
+/// the module, and if so, return true.
+bool
+MPPassManager::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Initialize on-the-fly passes
+ for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+ I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+ I != E; ++I) {
+ FunctionPassManagerImpl *FPP = I->second;
+ Changed |= FPP->doInitialization(M);
+ }
+
+ // Initialize module passes
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doInitialization(M);
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ ModulePass *MP = getContainedPass(Index);
+ bool LocalChanged = false;
+
+ dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
+ dumpRequiredSet(MP);
+
+ initializeAnalysisImpl(MP);
+
+ {
+ PassManagerPrettyStackEntry X(MP, M);
+ TimeRegion PassTimer(getPassTimer(MP));
+
+ LocalChanged |= MP->runOnModule(M);
+ }
+
+ Changed |= LocalChanged;
+ if (LocalChanged)
+ dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
+ M.getModuleIdentifier());
+ dumpPreservedSet(MP);
+
+ verifyPreservedAnalysis(MP);
+ removeNotPreservedAnalysis(MP);
+ recordAvailableAnalysis(MP);
+ removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG);
+ }
+
+ // Finalize module passes
+ for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
+ Changed |= getContainedPass(Index)->doFinalization(M);
+
+ // Finalize on-the-fly passes
+ for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+ I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+ I != E; ++I) {
+ FunctionPassManagerImpl *FPP = I->second;
+ // We don't know when is the last time an on-the-fly pass is run,
+ // so we need to releaseMemory / finalize here
+ FPP->releaseMemoryOnTheFly();
+ Changed |= FPP->doFinalization(M);
+ }
+
+ return Changed;
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+ assert(P->getPotentialPassManagerType() == PMT_ModulePassManager &&
+ "Unable to handle Pass that requires lower level Analysis pass");
+ assert((P->getPotentialPassManagerType() <
+ RequiredPass->getPotentialPassManagerType()) &&
+ "Unable to handle Pass that requires lower level Analysis pass");
+
+ FunctionPassManagerImpl *FPP = OnTheFlyManagers[P];
+ if (!FPP) {
+ FPP = new FunctionPassManagerImpl();
+ // FPP is the top level manager.
+ FPP->setTopLevelManager(FPP);
+
+ OnTheFlyManagers[P] = FPP;
+ }
+ FPP->add(RequiredPass);
+
+ // Register P as the last user of RequiredPass.
+ if (RequiredPass) {
+ SmallVector<Pass *, 1> LU;
+ LU.push_back(RequiredPass);
+ FPP->setLastUser(LU, P);
+ }
+}
+
+/// Return function pass corresponding to PassInfo PI, that is
+/// required by module pass MP. Instantiate analysis pass, by using
+/// its runOnFunction() for function F.
+Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
+ FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP];
+ assert(FPP && "Unable to find on the fly pass");
+
+ FPP->releaseMemoryOnTheFly();
+ FPP->run(F);
+ return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// PassManagerImpl implementation
+
+//
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManagerImpl::run(Module &M) {
+ bool Changed = false;
+ TimingInfo::createTheTimeInfo();
+
+ dumpArguments();
+ dumpPasses();
+
+ SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
+ for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+ E = IPV.end(); I != E; ++I) {
+ Changed |= (*I)->doInitialization(M);
+ }
+
+ initializeAllAnalysisInfo();
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->runOnModule(M);
+
+ for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+ E = IPV.end(); I != E; ++I) {
+ Changed |= (*I)->doFinalization(M);
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// PassManager implementation
+
+/// Create new pass manager
+PassManager::PassManager() {
+ PM = new PassManagerImpl();
+ // PM is the top level manager
+ PM->setTopLevelManager(PM);
+}
+
+PassManager::~PassManager() {
+ delete PM;
+}
+
+/// add - Add a pass to the queue of passes to run. This passes ownership of
+/// the Pass to the PassManager. When the PassManager is destroyed, the pass
+/// will be destroyed as well, so there is no need to delete the pass. This
+/// implies that all passes MUST be allocated with 'new'.
+void PassManager::add(Pass *P) {
+ PM->add(P);
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManager::run(Module &M) {
+ return PM->run(M);
+}
+
+//===----------------------------------------------------------------------===//
+// TimingInfo implementation
+
+bool llvm::TimePassesIsEnabled = false;
+static cl::opt<bool,true>
+EnableTiming("time-passes", cl::location(TimePassesIsEnabled),
+ cl::desc("Time each pass, printing elapsed time for each on exit"));
+
+// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to
+// a non null value (if the -time-passes option is enabled) or it leaves it
+// null. It may be called multiple times.
+void TimingInfo::createTheTimeInfo() {
+ if (!TimePassesIsEnabled || TheTimeInfo) return;
+
+ // Constructed the first time this is called, iff -time-passes is enabled.
+ // This guarantees that the object will be constructed before static globals,
+ // thus it will be destroyed before them.
+ static ManagedStatic<TimingInfo> TTI;
+ TheTimeInfo = &*TTI;
+}
+
+/// If TimingInfo is enabled then start pass timer.
+Timer *llvm::getPassTimer(Pass *P) {
+ if (TheTimeInfo)
+ return TheTimeInfo->getPassTimer(P);
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// PMStack implementation
+//
+
+// Pop Pass Manager from the stack and clear its analysis info.
+void PMStack::pop() {
+
+ PMDataManager *Top = this->top();
+ Top->initializeAnalysisInfo();
+
+ S.pop_back();
+}
+
+// Push PM on the stack and set its top level manager.
+void PMStack::push(PMDataManager *PM) {
+ assert(PM && "Unable to push. Pass Manager expected");
+ assert(PM->getDepth()==0 && "Pass Manager depth set too early");
+
+ if (!this->empty()) {
+ assert(PM->getPassManagerType() > this->top()->getPassManagerType()
+ && "pushing bad pass manager to PMStack");
+ PMTopLevelManager *TPM = this->top()->getTopLevelManager();
+
+ assert(TPM && "Unable to find top level manager");
+ TPM->addIndirectPassManager(PM);
+ PM->setTopLevelManager(TPM);
+ PM->setDepth(this->top()->getDepth()+1);
+ } else {
+ assert((PM->getPassManagerType() == PMT_ModulePassManager
+ || PM->getPassManagerType() == PMT_FunctionPassManager)
+ && "pushing bad pass manager to PMStack");
+ PM->setDepth(1);
+ }
+
+ S.push_back(PM);
+}
+
+// Dump content of the pass manager stack.
+void PMStack::dump() const {
+ for (std::vector<PMDataManager *>::const_iterator I = S.begin(),
+ E = S.end(); I != E; ++I)
+ dbgs() << (*I)->getAsPass()->getPassName() << ' ';
+
+ if (!S.empty())
+ dbgs() << '\n';
+}
+
+/// Find appropriate Module Pass Manager in the PM Stack and
+/// add self into that manager.
+void ModulePass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find Module Pass Manager
+ while (!PMS.empty()) {
+ PassManagerType TopPMType = PMS.top()->getPassManagerType();
+ if (TopPMType == PreferredType)
+ break; // We found desired pass manager
+ else if (TopPMType > PMT_ModulePassManager)
+ PMS.pop(); // Pop children pass managers
+ else
+ break;
+ }
+ assert(!PMS.empty() && "Unable to find appropriate Pass Manager");
+ PMS.top()->add(this);
+}
+
+/// Find appropriate Function Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void FunctionPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+
+ // Find Function Pass Manager
+ while (!PMS.empty()) {
+ if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
+ PMS.pop();
+ else
+ break;
+ }
+
+ // Create new Function Pass Manager if needed.
+ FPPassManager *FPP;
+ if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) {
+ FPP = (FPPassManager *)PMS.top();
+ } else {
+ assert(!PMS.empty() && "Unable to create Function Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Function Pass Manager
+ FPP = new FPPassManager();
+ FPP->populateInheritedAnalysis(PMS);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(FPP);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ FPP->assignPassManager(PMS, PMD->getPassManagerType());
+
+ // [4] Push new manager into PMS
+ PMS.push(FPP);
+ }
+
+ // Assign FPP as the manager of this pass.
+ FPP->add(this);
+}
+
+/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void BasicBlockPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ BBPassManager *BBP;
+
+ // Basic Pass Manager is a leaf pass manager. It does not handle
+ // any other pass manager.
+ if (!PMS.empty() &&
+ PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) {
+ BBP = (BBPassManager *)PMS.top();
+ } else {
+ // If leaf manager is not Basic Block Pass manager then create new
+ // basic Block Pass manager.
+ assert(!PMS.empty() && "Unable to create BasicBlock Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Basic Block Manager
+ BBP = new BBPassManager();
+
+ // [2] Set up new manager's top level manager
+ // Basic Block Pass Manager does not live by itself
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(BBP);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ BBP->assignPassManager(PMS, PreferredType);
+
+ // [4] Push new manager into PMS
+ PMS.push(BBP);
+ }
+
+ // Assign BBP as the manager of this pass.
+ BBP->add(this);
+}
+
+PassManagerBase::~PassManagerBase() {}
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index bd4d9c0..a32d25c 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -65,7 +65,7 @@ class MDNodeOperand : public CallbackVH {
public:
MDNodeOperand(Value *V) : CallbackVH(V) {}
- ~MDNodeOperand() {}
+ virtual ~MDNodeOperand();
void set(Value *V) {
unsigned IsFirst = this->getValPtrInt();
@@ -82,6 +82,8 @@ public:
};
} // end namespace llvm.
+// Provide out-of-line definition to prevent weak vtable.
+MDNodeOperand::~MDNodeOperand() {}
void MDNodeOperand::deleted() {
getParent()->replaceOperand(this, 0);
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index 968b8f4..4f240c7 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -245,7 +245,7 @@ GlobalVariable *Module::getGlobalVariable(StringRef Name, bool AllowLocal) {
/// 1. If it does not exist, add a declaration of the global and return it.
/// 2. Else, the global exists but has the wrong type: return the function
/// with a constantexpr cast to the right type.
-/// 3. Finally, if the existing global is the correct delclaration, return the
+/// 3. Finally, if the existing global is the correct declaration, return the
/// existing global.
Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
// See if we have a definition for the specified global already.
@@ -260,8 +260,10 @@ Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
// If the variable exists but has the wrong type, return a bitcast to the
// right type.
- if (GV->getType() != PointerType::getUnqual(Ty))
- return ConstantExpr::getBitCast(GV, PointerType::getUnqual(Ty));
+ Type *GVTy = GV->getType();
+ PointerType *PTy = PointerType::get(Ty, GVTy->getPointerAddressSpace());
+ if (GVTy != PTy)
+ return ConstantExpr::getBitCast(GV, PTy);
// Otherwise, we just found the existing function or a prototype.
return GV;
@@ -316,11 +318,16 @@ getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const {
for (unsigned i = 0, e = ModFlags->getNumOperands(); i != e; ++i) {
MDNode *Flag = ModFlags->getOperand(i);
- ConstantInt *Behavior = cast<ConstantInt>(Flag->getOperand(0));
- MDString *Key = cast<MDString>(Flag->getOperand(1));
- Value *Val = Flag->getOperand(2);
- Flags.push_back(ModuleFlagEntry(ModFlagBehavior(Behavior->getZExtValue()),
- Key, Val));
+ if (Flag->getNumOperands() >= 3 && isa<ConstantInt>(Flag->getOperand(0)) &&
+ isa<MDString>(Flag->getOperand(1))) {
+ // Check the operands of the MDNode before accessing the operands.
+ // The verifier will actually catch these failures.
+ ConstantInt *Behavior = cast<ConstantInt>(Flag->getOperand(0));
+ MDString *Key = cast<MDString>(Flag->getOperand(1));
+ Value *Val = Flag->getOperand(2);
+ Flags.push_back(ModuleFlagEntry(ModFlagBehavior(Behavior->getZExtValue()),
+ Key, Val));
+ }
}
}
@@ -399,9 +406,15 @@ bool Module::isDematerializable(const GlobalValue *GV) const {
}
bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) {
- if (Materializer)
- return Materializer->Materialize(GV, ErrInfo);
- return false;
+ if (!Materializer)
+ return false;
+
+ error_code EC = Materializer->Materialize(GV);
+ if (!EC)
+ return false;
+ if (ErrInfo)
+ *ErrInfo = EC.message();
+ return true;
}
void Module::Dematerialize(GlobalValue *GV) {
@@ -412,7 +425,12 @@ void Module::Dematerialize(GlobalValue *GV) {
bool Module::MaterializeAll(std::string *ErrInfo) {
if (!Materializer)
return false;
- return Materializer->MaterializeModule(this, ErrInfo);
+ error_code EC = Materializer->MaterializeModule(this);
+ if (!EC)
+ return false;
+ if (ErrInfo)
+ *ErrInfo = EC.message();
+ return true;
}
bool Module::MaterializeAllPermanently(std::string *ErrInfo) {
diff --git a/lib/IR/PassManager.cpp b/lib/IR/PassManager.cpp
index ee53c85..966af7d 100644
--- a/lib/IR/PassManager.cpp
+++ b/lib/IR/PassManager.cpp
@@ -1,4 +1,4 @@
-//===- PassManager.cpp - LLVM Pass Infrastructure Implementation ----------===//
+//===- PassManager.h - Infrastructure for managing & running IR passes ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,1907 +6,152 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file implements the LLVM Pass Manager infrastructure.
-//
-//===----------------------------------------------------------------------===//
+#include "llvm/IR/PassManager.h"
+#include "llvm/ADT/STLExtras.h"
-#include "llvm/PassManagers.h"
-#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/PassNameParser.h"
-#include "llvm/Support/Timer.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <map>
using namespace llvm;
-// See PassManagers.h for Pass Manager infrastructure overview.
-
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-// Pass debugging information. Often it is useful to find out what pass is
-// running when a crash occurs in a utility. When this library is compiled with
-// debugging on, a command line option (--debug-pass) is enabled that causes the
-// pass name to be printed before it executes.
-//
-
-// Different debug levels that can be enabled...
-enum PassDebugLevel {
- Disabled, Arguments, Structure, Executions, Details
-};
-
-static cl::opt<enum PassDebugLevel>
-PassDebugging("debug-pass", cl::Hidden,
- cl::desc("Print PassManager debugging information"),
- cl::values(
- clEnumVal(Disabled , "disable debug output"),
- clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
- clEnumVal(Structure , "print pass structure before run()"),
- clEnumVal(Executions, "print pass name before it is executed"),
- clEnumVal(Details , "print pass details when it is executed"),
- clEnumValEnd));
-
-typedef llvm::cl::list<const llvm::PassInfo *, bool, PassNameParser>
-PassOptionList;
-
-// Print IR out before/after specified passes.
-static PassOptionList
-PrintBefore("print-before",
- llvm::cl::desc("Print IR before specified passes"),
- cl::Hidden);
-
-static PassOptionList
-PrintAfter("print-after",
- llvm::cl::desc("Print IR after specified passes"),
- cl::Hidden);
-
-static cl::opt<bool>
-PrintBeforeAll("print-before-all",
- llvm::cl::desc("Print IR before each pass"),
- cl::init(false));
-static cl::opt<bool>
-PrintAfterAll("print-after-all",
- llvm::cl::desc("Print IR after each pass"),
- cl::init(false));
-
-/// This is a helper to determine whether to print IR before or
-/// after a pass.
-
-static bool ShouldPrintBeforeOrAfterPass(const PassInfo *PI,
- PassOptionList &PassesToPrint) {
- for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
- const llvm::PassInfo *PassInf = PassesToPrint[i];
- if (PassInf)
- if (PassInf->getPassArgument() == PI->getPassArgument()) {
- return true;
- }
- }
- return false;
-}
-
-/// This is a utility to check whether a pass should have IR dumped
-/// before it.
-static bool ShouldPrintBeforePass(const PassInfo *PI) {
- return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PI, PrintBefore);
-}
-
-/// This is a utility to check whether a pass should have IR dumped
-/// after it.
-static bool ShouldPrintAfterPass(const PassInfo *PI) {
- return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter);
-}
-
-} // End of llvm namespace
-
-/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
-/// or higher is specified.
-bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
- return PassDebugging >= Executions;
-}
-
-
-
-
-void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
- if (V == 0 && M == 0)
- OS << "Releasing pass '";
- else
- OS << "Running pass '";
-
- OS << P->getPassName() << "'";
-
- if (M) {
- OS << " on module '" << M->getModuleIdentifier() << "'.\n";
- return;
- }
- if (V == 0) {
- OS << '\n';
- return;
- }
-
- OS << " on ";
- if (isa<Function>(V))
- OS << "function";
- else if (isa<BasicBlock>(V))
- OS << "basic block";
- else
- OS << "value";
-
- OS << " '";
- WriteAsOperand(OS, V, /*PrintTy=*/false, M);
- OS << "'\n";
+void ModulePassManager::run() {
+ for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx)
+ if (Passes[Idx]->run(M))
+ if (AM) AM->invalidateAll(M);
}
-
-namespace {
-
-//===----------------------------------------------------------------------===//
-// BBPassManager
-//
-/// BBPassManager manages BasicBlockPass. It batches all the
-/// pass together and sequence them to process one basic block before
-/// processing next basic block.
-class BBPassManager : public PMDataManager, public FunctionPass {
-
-public:
- static char ID;
- explicit BBPassManager()
- : PMDataManager(), FunctionPass(ID) {}
-
- /// Execute all of the passes scheduled for execution. Keep track of
- /// whether any of the passes modifies the function, and if so, return true.
- bool runOnFunction(Function &F);
-
- /// Pass Manager itself does not invalidate any analysis info.
- void getAnalysisUsage(AnalysisUsage &Info) const {
- Info.setPreservesAll();
- }
-
- bool doInitialization(Module &M);
- bool doInitialization(Function &F);
- bool doFinalization(Module &M);
- bool doFinalization(Function &F);
-
- virtual PMDataManager *getAsPMDataManager() { return this; }
- virtual Pass *getAsPass() { return this; }
-
- virtual const char *getPassName() const {
- return "BasicBlock Pass Manager";
- }
-
- // Print passes managed by this manager
- void dumpPassStructure(unsigned Offset) {
- llvm::dbgs().indent(Offset*2) << "BasicBlockPass Manager\n";
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- BasicBlockPass *BP = getContainedPass(Index);
- BP->dumpPassStructure(Offset + 1);
- dumpLastUses(BP, Offset+1);
- }
- }
-
- BasicBlockPass *getContainedPass(unsigned N) {
- assert(N < PassVector.size() && "Pass number out of range!");
- BasicBlockPass *BP = static_cast<BasicBlockPass *>(PassVector[N]);
- return BP;
- }
-
- virtual PassManagerType getPassManagerType() const {
- return PMT_BasicBlockPassManager;
- }
-};
-
-char BBPassManager::ID = 0;
-}
-
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-// FunctionPassManagerImpl
-//
-/// FunctionPassManagerImpl manages FPPassManagers
-class FunctionPassManagerImpl : public Pass,
- public PMDataManager,
- public PMTopLevelManager {
- virtual void anchor();
-private:
- bool wasRun;
-public:
- static char ID;
- explicit FunctionPassManagerImpl() :
- Pass(PT_PassManager, ID), PMDataManager(),
- PMTopLevelManager(new FPPassManager()), wasRun(false) {}
-
- /// add - Add a pass to the queue of passes to run. This passes ownership of
- /// the Pass to the PassManager. When the PassManager is destroyed, the pass
- /// will be destroyed as well, so there is no need to delete the pass. This
- /// implies that all passes MUST be allocated with 'new'.
- void add(Pass *P) {
- schedulePass(P);
- }
-
- /// createPrinterPass - Get a function printer pass.
- Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
- return createPrintFunctionPass(Banner, &O);
- }
-
- // Prepare for running an on the fly pass, freeing memory if needed
- // from a previous run.
- void releaseMemoryOnTheFly();
-
- /// run - Execute all of the passes scheduled for execution. Keep track of
- /// whether any of the passes modifies the module, and if so, return true.
- bool run(Function &F);
-
- /// doInitialization - Run all of the initializers for the function passes.
- ///
- bool doInitialization(Module &M);
-
- /// doFinalization - Run all of the finalizers for the function passes.
- ///
- bool doFinalization(Module &M);
-
-
- virtual PMDataManager *getAsPMDataManager() { return this; }
- virtual Pass *getAsPass() { return this; }
- virtual PassManagerType getTopLevelPassManagerType() {
- return PMT_FunctionPassManager;
- }
-
- /// Pass Manager itself does not invalidate any analysis info.
- void getAnalysisUsage(AnalysisUsage &Info) const {
- Info.setPreservesAll();
- }
-
- FPPassManager *getContainedManager(unsigned N) {
- assert(N < PassManagers.size() && "Pass number out of range!");
- FPPassManager *FP = static_cast<FPPassManager *>(PassManagers[N]);
- return FP;
- }
-};
-
-void FunctionPassManagerImpl::anchor() {}
-
-char FunctionPassManagerImpl::ID = 0;
-
-//===----------------------------------------------------------------------===//
-// MPPassManager
-//
-/// MPPassManager manages ModulePasses and function pass managers.
-/// It batches all Module passes and function pass managers together and
-/// sequences them to process one module.
-class MPPassManager : public Pass, public PMDataManager {
-public:
- static char ID;
- explicit MPPassManager() :
- Pass(PT_PassManager, ID), PMDataManager() { }
-
- // Delete on the fly managers.
- virtual ~MPPassManager() {
- for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
- I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
- I != E; ++I) {
- FunctionPassManagerImpl *FPP = I->second;
- delete FPP;
- }
- }
-
- /// createPrinterPass - Get a module printer pass.
- Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
- return createPrintModulePass(&O, false, Banner);
- }
-
- /// run - Execute all of the passes scheduled for execution. Keep track of
- /// whether any of the passes modifies the module, and if so, return true.
- bool runOnModule(Module &M);
-
- using llvm::Pass::doInitialization;
- using llvm::Pass::doFinalization;
-
- /// doInitialization - Run all of the initializers for the module passes.
- ///
- bool doInitialization();
-
- /// doFinalization - Run all of the finalizers for the module passes.
- ///
- bool doFinalization();
-
- /// Pass Manager itself does not invalidate any analysis info.
- void getAnalysisUsage(AnalysisUsage &Info) const {
- Info.setPreservesAll();
- }
-
- /// Add RequiredPass into list of lower level passes required by pass P.
- /// RequiredPass is run on the fly by Pass Manager when P requests it
- /// through getAnalysis interface.
- virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
-
- /// Return function pass corresponding to PassInfo PI, that is
- /// required by module pass MP. Instantiate analysis pass, by using
- /// its runOnFunction() for function F.
- virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F);
-
- virtual const char *getPassName() const {
- return "Module Pass Manager";
- }
-
- virtual PMDataManager *getAsPMDataManager() { return this; }
- virtual Pass *getAsPass() { return this; }
-
- // Print passes managed by this manager
- void dumpPassStructure(unsigned Offset) {
- llvm::dbgs().indent(Offset*2) << "ModulePass Manager\n";
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- ModulePass *MP = getContainedPass(Index);
- MP->dumpPassStructure(Offset + 1);
- std::map<Pass *, FunctionPassManagerImpl *>::const_iterator I =
- OnTheFlyManagers.find(MP);
- if (I != OnTheFlyManagers.end())
- I->second->dumpPassStructure(Offset + 2);
- dumpLastUses(MP, Offset+1);
- }
- }
-
- ModulePass *getContainedPass(unsigned N) {
- assert(N < PassVector.size() && "Pass number out of range!");
- return static_cast<ModulePass *>(PassVector[N]);
- }
-
- virtual PassManagerType getPassManagerType() const {
- return PMT_ModulePassManager;
- }
-
- private:
- /// Collection of on the fly FPPassManagers. These managers manage
- /// function passes that are required by module passes.
- std::map<Pass *, FunctionPassManagerImpl *> OnTheFlyManagers;
-};
-
-char MPPassManager::ID = 0;
-//===----------------------------------------------------------------------===//
-// PassManagerImpl
-//
-
-/// PassManagerImpl manages MPPassManagers
-class PassManagerImpl : public Pass,
- public PMDataManager,
- public PMTopLevelManager {
- virtual void anchor();
-
-public:
- static char ID;
- explicit PassManagerImpl() :
- Pass(PT_PassManager, ID), PMDataManager(),
- PMTopLevelManager(new MPPassManager()) {}
-
- /// add - Add a pass to the queue of passes to run. This passes ownership of
- /// the Pass to the PassManager. When the PassManager is destroyed, the pass
- /// will be destroyed as well, so there is no need to delete the pass. This
- /// implies that all passes MUST be allocated with 'new'.
- void add(Pass *P) {
- schedulePass(P);
- }
-
- /// createPrinterPass - Get a module printer pass.
- Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
- return createPrintModulePass(&O, false, Banner);
- }
-
- /// run - Execute all of the passes scheduled for execution. Keep track of
- /// whether any of the passes modifies the module, and if so, return true.
- bool run(Module &M);
-
- using llvm::Pass::doInitialization;
- using llvm::Pass::doFinalization;
-
- /// doInitialization - Run all of the initializers for the module passes.
- ///
- bool doInitialization();
-
- /// doFinalization - Run all of the finalizers for the module passes.
- ///
- bool doFinalization();
-
- /// Pass Manager itself does not invalidate any analysis info.
- void getAnalysisUsage(AnalysisUsage &Info) const {
- Info.setPreservesAll();
- }
-
- virtual PMDataManager *getAsPMDataManager() { return this; }
- virtual Pass *getAsPass() { return this; }
- virtual PassManagerType getTopLevelPassManagerType() {
- return PMT_ModulePassManager;
- }
-
- MPPassManager *getContainedManager(unsigned N) {
- assert(N < PassManagers.size() && "Pass number out of range!");
- MPPassManager *MP = static_cast<MPPassManager *>(PassManagers[N]);
- return MP;
- }
-};
-
-void PassManagerImpl::anchor() {}
-
-char PassManagerImpl::ID = 0;
-} // End of llvm namespace
-
-namespace {
-
-//===----------------------------------------------------------------------===//
-/// TimingInfo Class - This class is used to calculate information about the
-/// amount of time each pass takes to execute. This only happens when
-/// -time-passes is enabled on the command line.
-///
-
-static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
-
-class TimingInfo {
- DenseMap<Pass*, Timer*> TimingData;
- TimerGroup TG;
-public:
- // Use 'create' member to get this.
- TimingInfo() : TG("... Pass execution timing report ...") {}
-
- // TimingDtor - Print out information about timing information
- ~TimingInfo() {
- // Delete all of the timers, which accumulate their info into the
- // TimerGroup.
- for (DenseMap<Pass*, Timer*>::iterator I = TimingData.begin(),
- E = TimingData.end(); I != E; ++I)
- delete I->second;
- // TimerGroup is deleted next, printing the report.
- }
-
- // createTheTimeInfo - This method either initializes the TheTimeInfo pointer
- // to a non null value (if the -time-passes option is enabled) or it leaves it
- // null. It may be called multiple times.
- static void createTheTimeInfo();
-
- /// getPassTimer - Return the timer for the specified pass if it exists.
- Timer *getPassTimer(Pass *P) {
- if (P->getAsPMDataManager())
- return 0;
-
- sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
- Timer *&T = TimingData[P];
- if (T == 0)
- T = new Timer(P->getPassName(), TG);
- return T;
- }
-};
-
-} // End of anon namespace
-
-static TimingInfo *TheTimeInfo;
-
-//===----------------------------------------------------------------------===//
-// PMTopLevelManager implementation
-
-/// Initialize top level manager. Create first pass manager.
-PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) {
- PMDM->setTopLevelManager(this);
- addPassManager(PMDM);
- activeStack.push(PMDM);
-}
-
-/// Set pass P as the last user of the given analysis passes.
-void
-PMTopLevelManager::setLastUser(ArrayRef<Pass*> AnalysisPasses, Pass *P) {
- unsigned PDepth = 0;
- if (P->getResolver())
- PDepth = P->getResolver()->getPMDataManager().getDepth();
-
- for (SmallVectorImpl<Pass *>::const_iterator I = AnalysisPasses.begin(),
- E = AnalysisPasses.end(); I != E; ++I) {
- Pass *AP = *I;
- LastUser[AP] = P;
-
- if (P == AP)
- continue;
-
- // Update the last users of passes that are required transitive by AP.
- AnalysisUsage *AnUsage = findAnalysisUsage(AP);
- const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
- SmallVector<Pass *, 12> LastUses;
- SmallVector<Pass *, 12> LastPMUses;
- for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
- E = IDs.end(); I != E; ++I) {
- Pass *AnalysisPass = findAnalysisPass(*I);
- assert(AnalysisPass && "Expected analysis pass to exist.");
- AnalysisResolver *AR = AnalysisPass->getResolver();
- assert(AR && "Expected analysis resolver to exist.");
- unsigned APDepth = AR->getPMDataManager().getDepth();
-
- if (PDepth == APDepth)
- LastUses.push_back(AnalysisPass);
- else if (PDepth > APDepth)
- LastPMUses.push_back(AnalysisPass);
- }
-
- setLastUser(LastUses, P);
-
- // If this pass has a corresponding pass manager, push higher level
- // analysis to this pass manager.
- if (P->getResolver())
- setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass());
-
-
- // If AP is the last user of other passes then make P last user of
- // such passes.
- for (DenseMap<Pass *, Pass *>::iterator LUI = LastUser.begin(),
- LUE = LastUser.end(); LUI != LUE; ++LUI) {
- if (LUI->second == AP)
- // DenseMap iterator is not invalidated here because
- // this is just updating existing entries.
- LastUser[LUI->first] = P;
- }
- }
-}
-
-/// Collect passes whose last user is P
-void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
- Pass *P) {
- DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
- InversedLastUser.find(P);
- if (DMI == InversedLastUser.end())
- return;
-
- SmallPtrSet<Pass *, 8> &LU = DMI->second;
- for (SmallPtrSet<Pass *, 8>::iterator I = LU.begin(),
- E = LU.end(); I != E; ++I) {
- LastUses.push_back(*I);
- }
-
-}
-
-AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
- AnalysisUsage *AnUsage = NULL;
- DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
- if (DMI != AnUsageMap.end())
- AnUsage = DMI->second;
- else {
- AnUsage = new AnalysisUsage();
- P->getAnalysisUsage(*AnUsage);
- AnUsageMap[P] = AnUsage;
- }
- return AnUsage;
-}
-
-/// Schedule pass P for execution. Make sure that passes required by
-/// P are run before P is run. Update analysis info maintained by
-/// the manager. Remove dead passes. This is a recursive function.
-void PMTopLevelManager::schedulePass(Pass *P) {
-
- // TODO : Allocate function manager for this pass, other wise required set
- // may be inserted into previous function manager
-
- // Give pass a chance to prepare the stage.
- P->preparePassManager(activeStack);
-
- // If P is an analysis pass and it is available then do not
- // generate the analysis again. Stale analysis info should not be
- // available at this point.
- const PassInfo *PI =
- PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
- if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
- delete P;
- return;
- }
-
- AnalysisUsage *AnUsage = findAnalysisUsage(P);
-
- bool checkAnalysis = true;
- while (checkAnalysis) {
- checkAnalysis = false;
-
- const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
- for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(),
- E = RequiredSet.end(); I != E; ++I) {
-
- Pass *AnalysisPass = findAnalysisPass(*I);
- if (!AnalysisPass) {
- const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
-
- if (PI == NULL) {
- // Pass P is not in the global PassRegistry
- dbgs() << "Pass '" << P->getPassName() << "' is not initialized." << "\n";
- dbgs() << "Verify if there is a pass dependency cycle." << "\n";
- dbgs() << "Required Passes:" << "\n";
- for (AnalysisUsage::VectorType::const_iterator I2 = RequiredSet.begin(),
- E = RequiredSet.end(); I2 != E && I2 != I; ++I2) {
- Pass *AnalysisPass2 = findAnalysisPass(*I2);
- if (AnalysisPass2) {
- dbgs() << "\t" << AnalysisPass2->getPassName() << "\n";
- } else {
- dbgs() << "\t" << "Error: Required pass not found! Possible causes:" << "\n";
- dbgs() << "\t\t" << "- Pass misconfiguration (e.g.: missing macros)" << "\n";
- dbgs() << "\t\t" << "- Corruption of the global PassRegistry" << "\n";
- }
- }
- }
-
- assert(PI && "Expected required passes to be initialized");
- AnalysisPass = PI->createPass();
- if (P->getPotentialPassManagerType () ==
- AnalysisPass->getPotentialPassManagerType())
- // Schedule analysis pass that is managed by the same pass manager.
- schedulePass(AnalysisPass);
- else if (P->getPotentialPassManagerType () >
- AnalysisPass->getPotentialPassManagerType()) {
- // Schedule analysis pass that is managed by a new manager.
- schedulePass(AnalysisPass);
- // Recheck analysis passes to ensure that required analyses that
- // are already checked are still available.
- checkAnalysis = true;
- } else
- // Do not schedule this analysis. Lower level analsyis
- // passes are run on the fly.
- delete AnalysisPass;
+bool FunctionPassManager::run(Module *M) {
+ bool Changed = false;
+ for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+ for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx)
+ if (Passes[Idx]->run(I)) {
+ Changed = true;
+ if (AM) AM->invalidateAll(I);
}
- }
- }
-
- // Now all required passes are available.
- if (ImmutablePass *IP = P->getAsImmutablePass()) {
- // P is a immutable pass and it will be managed by this
- // top level manager. Set up analysis resolver to connect them.
- PMDataManager *DM = getAsPMDataManager();
- AnalysisResolver *AR = new AnalysisResolver(*DM);
- P->setResolver(AR);
- DM->initializeAnalysisImpl(P);
- addImmutablePass(IP);
- DM->recordAvailableAnalysis(IP);
- return;
- }
-
- if (PI && !PI->isAnalysis() && ShouldPrintBeforePass(PI)) {
- Pass *PP = P->createPrinterPass(
- dbgs(), std::string("*** IR Dump Before ") + P->getPassName() + " ***");
- PP->assignPassManager(activeStack, getTopLevelPassManagerType());
- }
-
- // Add the requested pass to the best available pass manager.
- P->assignPassManager(activeStack, getTopLevelPassManagerType());
-
- if (PI && !PI->isAnalysis() && ShouldPrintAfterPass(PI)) {
- Pass *PP = P->createPrinterPass(
- dbgs(), std::string("*** IR Dump After ") + P->getPassName() + " ***");
- PP->assignPassManager(activeStack, getTopLevelPassManagerType());
- }
-}
-
-/// Find the pass that implements Analysis AID. Search immutable
-/// passes and all pass managers. If desired pass is not found
-/// then return NULL.
-Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
-
- // Check pass managers
- for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
- E = PassManagers.end(); I != E; ++I)
- if (Pass *P = (*I)->findAnalysisPass(AID, false))
- return P;
-
- // Check other pass managers
- for (SmallVectorImpl<PMDataManager *>::iterator
- I = IndirectPassManagers.begin(),
- E = IndirectPassManagers.end(); I != E; ++I)
- if (Pass *P = (*I)->findAnalysisPass(AID, false))
- return P;
-
- // Check the immutable passes. Iterate in reverse order so that we find
- // the most recently registered passes first.
- for (SmallVectorImpl<ImmutablePass *>::reverse_iterator I =
- ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) {
- AnalysisID PI = (*I)->getPassID();
- if (PI == AID)
- return *I;
-
- // If Pass not found then check the interfaces implemented by Immutable Pass
- const PassInfo *PassInf =
- PassRegistry::getPassRegistry()->getPassInfo(PI);
- assert(PassInf && "Expected all immutable passes to be initialized");
- const std::vector<const PassInfo*> &ImmPI =
- PassInf->getInterfacesImplemented();
- for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
- EE = ImmPI.end(); II != EE; ++II) {
- if ((*II)->getTypeInfo() == AID)
- return *I;
- }
- }
-
- return 0;
-}
-
-// Print passes managed by this top level manager.
-void PMTopLevelManager::dumpPasses() const {
-
- if (PassDebugging < Structure)
- return;
-
- // Print out the immutable passes
- for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) {
- ImmutablePasses[i]->dumpPassStructure(0);
- }
-
- // Every class that derives from PMDataManager also derives from Pass
- // (sometimes indirectly), but there's no inheritance relationship
- // between PMDataManager and Pass, so we have to getAsPass to get
- // from a PMDataManager* to a Pass*.
- for (SmallVectorImpl<PMDataManager *>::const_iterator I =
- PassManagers.begin(), E = PassManagers.end(); I != E; ++I)
- (*I)->getAsPass()->dumpPassStructure(1);
-}
-
-void PMTopLevelManager::dumpArguments() const {
-
- if (PassDebugging < Arguments)
- return;
-
- dbgs() << "Pass Arguments: ";
- for (SmallVectorImpl<ImmutablePass *>::const_iterator I =
- ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
- if (const PassInfo *PI =
- PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) {
- assert(PI && "Expected all immutable passes to be initialized");
- if (!PI->isAnalysisGroup())
- dbgs() << " -" << PI->getPassArgument();
- }
- for (SmallVectorImpl<PMDataManager *>::const_iterator I =
- PassManagers.begin(), E = PassManagers.end(); I != E; ++I)
- (*I)->dumpPassArguments();
- dbgs() << "\n";
+ return Changed;
}
-void PMTopLevelManager::initializeAllAnalysisInfo() {
- for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
- E = PassManagers.end(); I != E; ++I)
- (*I)->initializeAnalysisInfo();
+void AnalysisManager::invalidateAll(Function *F) {
+ assert(F->getParent() == M && "Invalidating a function from another module!");
- // Initailize other pass managers
- for (SmallVectorImpl<PMDataManager *>::iterator
- I = IndirectPassManagers.begin(), E = IndirectPassManagers.end();
+ // First invalidate any module results we still have laying about.
+ // FIXME: This is a total hack based on the fact that erasure doesn't
+ // invalidate iteration for DenseMap.
+ for (ModuleAnalysisResultMapT::iterator I = ModuleAnalysisResults.begin(),
+ E = ModuleAnalysisResults.end();
I != E; ++I)
- (*I)->initializeAnalysisInfo();
-
- for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
- DME = LastUser.end(); DMI != DME; ++DMI) {
- DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
- InversedLastUser.find(DMI->second);
- if (InvDMI != InversedLastUser.end()) {
- SmallPtrSet<Pass *, 8> &L = InvDMI->second;
- L.insert(DMI->first);
+ if (I->second->invalidate(M))
+ ModuleAnalysisResults.erase(I);
+
+ // Now clear all the invalidated results associated specifically with this
+ // function.
+ SmallVector<void *, 8> InvalidatedPassIDs;
+ FunctionAnalysisResultListT &ResultsList = FunctionAnalysisResultLists[F];
+ for (FunctionAnalysisResultListT::iterator I = ResultsList.begin(),
+ E = ResultsList.end();
+ I != E;)
+ if (I->second->invalidate(F)) {
+ InvalidatedPassIDs.push_back(I->first);
+ I = ResultsList.erase(I);
} else {
- SmallPtrSet<Pass *, 8> L; L.insert(DMI->first);
- InversedLastUser[DMI->second] = L;
+ ++I;
}
- }
-}
-
-/// Destructor
-PMTopLevelManager::~PMTopLevelManager() {
- for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
- E = PassManagers.end(); I != E; ++I)
- delete *I;
-
- for (SmallVectorImpl<ImmutablePass *>::iterator
- I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
- delete *I;
-
- for (DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.begin(),
- DME = AnUsageMap.end(); DMI != DME; ++DMI)
- delete DMI->second;
-}
-
-//===----------------------------------------------------------------------===//
-// PMDataManager implementation
-
-/// Augement AvailableAnalysis by adding analysis made available by pass P.
-void PMDataManager::recordAvailableAnalysis(Pass *P) {
- AnalysisID PI = P->getPassID();
-
- AvailableAnalysis[PI] = P;
-
- assert(!AvailableAnalysis.empty());
-
- // This pass is the current implementation of all of the interfaces it
- // implements as well.
- const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
- if (PInf == 0) return;
- const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
- for (unsigned i = 0, e = II.size(); i != e; ++i)
- AvailableAnalysis[II[i]->getTypeInfo()] = P;
+ while (!InvalidatedPassIDs.empty())
+ FunctionAnalysisResults.erase(
+ std::make_pair(InvalidatedPassIDs.pop_back_val(), F));
}
-// Return true if P preserves high level analysis used by other
-// passes managed by this manager
-bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
- AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
- if (AnUsage->getPreservesAll())
- return true;
-
- const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
- for (SmallVectorImpl<Pass *>::iterator I = HigherLevelAnalysis.begin(),
- E = HigherLevelAnalysis.end(); I != E; ++I) {
- Pass *P1 = *I;
- if (P1->getAsImmutablePass() == 0 &&
- std::find(PreservedSet.begin(), PreservedSet.end(),
- P1->getPassID()) ==
- PreservedSet.end())
- return false;
- }
-
- return true;
-}
-
-/// verifyPreservedAnalysis -- Verify analysis preserved by pass P.
-void PMDataManager::verifyPreservedAnalysis(Pass *P) {
- // Don't do this unless assertions are enabled.
-#ifdef NDEBUG
- return;
-#endif
- AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
- const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
-
- // Verify preserved analysis
- for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(),
- E = PreservedSet.end(); I != E; ++I) {
- AnalysisID AID = *I;
- if (Pass *AP = findAnalysisPass(AID, true)) {
- TimeRegion PassTimer(getPassTimer(AP));
- AP->verifyAnalysis();
- }
- }
-}
-
-/// Remove Analysis not preserved by Pass P
-void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
- AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
- if (AnUsage->getPreservesAll())
- return;
-
- const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
- for (DenseMap<AnalysisID, Pass*>::iterator I = AvailableAnalysis.begin(),
- E = AvailableAnalysis.end(); I != E; ) {
- DenseMap<AnalysisID, Pass*>::iterator Info = I++;
- if (Info->second->getAsImmutablePass() == 0 &&
- std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
- PreservedSet.end()) {
- // Remove this analysis
- if (PassDebugging >= Details) {
- Pass *S = Info->second;
- dbgs() << " -- '" << P->getPassName() << "' is not preserving '";
- dbgs() << S->getPassName() << "'\n";
- }
- AvailableAnalysis.erase(Info);
- }
- }
-
- // Check inherited analysis also. If P is not preserving analysis
- // provided by parent manager then remove it here.
- for (unsigned Index = 0; Index < PMT_Last; ++Index) {
-
- if (!InheritedAnalysis[Index])
- continue;
-
- for (DenseMap<AnalysisID, Pass*>::iterator
- I = InheritedAnalysis[Index]->begin(),
- E = InheritedAnalysis[Index]->end(); I != E; ) {
- DenseMap<AnalysisID, Pass *>::iterator Info = I++;
- if (Info->second->getAsImmutablePass() == 0 &&
- std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
- PreservedSet.end()) {
- // Remove this analysis
- if (PassDebugging >= Details) {
- Pass *S = Info->second;
- dbgs() << " -- '" << P->getPassName() << "' is not preserving '";
- dbgs() << S->getPassName() << "'\n";
- }
- InheritedAnalysis[Index]->erase(Info);
- }
- }
- }
-}
-
-/// Remove analysis passes that are not used any longer
-void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
- enum PassDebuggingString DBG_STR) {
-
- SmallVector<Pass *, 12> DeadPasses;
-
- // If this is a on the fly manager then it does not have TPM.
- if (!TPM)
- return;
-
- TPM->collectLastUses(DeadPasses, P);
-
- if (PassDebugging >= Details && !DeadPasses.empty()) {
- dbgs() << " -*- '" << P->getPassName();
- dbgs() << "' is the last user of following pass instances.";
- dbgs() << " Free these instances\n";
- }
-
- for (SmallVectorImpl<Pass *>::iterator I = DeadPasses.begin(),
- E = DeadPasses.end(); I != E; ++I)
- freePass(*I, Msg, DBG_STR);
-}
-
-void PMDataManager::freePass(Pass *P, StringRef Msg,
- enum PassDebuggingString DBG_STR) {
- dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg);
-
- {
- // If the pass crashes releasing memory, remember this.
- PassManagerPrettyStackEntry X(P);
- TimeRegion PassTimer(getPassTimer(P));
-
- P->releaseMemory();
- }
-
- AnalysisID PI = P->getPassID();
- if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
- // Remove the pass itself (if it is not already removed).
- AvailableAnalysis.erase(PI);
-
- // Remove all interfaces this pass implements, for which it is also
- // listed as the available implementation.
- const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
- for (unsigned i = 0, e = II.size(); i != e; ++i) {
- DenseMap<AnalysisID, Pass*>::iterator Pos =
- AvailableAnalysis.find(II[i]->getTypeInfo());
- if (Pos != AvailableAnalysis.end() && Pos->second == P)
- AvailableAnalysis.erase(Pos);
- }
- }
-}
-
-/// Add pass P into the PassVector. Update
-/// AvailableAnalysis appropriately if ProcessAnalysis is true.
-void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
- // This manager is going to manage pass P. Set up analysis resolver
- // to connect them.
- AnalysisResolver *AR = new AnalysisResolver(*this);
- P->setResolver(AR);
-
- // If a FunctionPass F is the last user of ModulePass info M
- // then the F's manager, not F, records itself as a last user of M.
- SmallVector<Pass *, 12> TransferLastUses;
-
- if (!ProcessAnalysis) {
- // Add pass
- PassVector.push_back(P);
- return;
- }
-
- // At the moment, this pass is the last user of all required passes.
- SmallVector<Pass *, 12> LastUses;
- SmallVector<Pass *, 8> RequiredPasses;
- SmallVector<AnalysisID, 8> ReqAnalysisNotAvailable;
-
- unsigned PDepth = this->getDepth();
-
- collectRequiredAnalysis(RequiredPasses,
- ReqAnalysisNotAvailable, P);
- for (SmallVectorImpl<Pass *>::iterator I = RequiredPasses.begin(),
- E = RequiredPasses.end(); I != E; ++I) {
- Pass *PRequired = *I;
- unsigned RDepth = 0;
-
- assert(PRequired->getResolver() && "Analysis Resolver is not set");
- PMDataManager &DM = PRequired->getResolver()->getPMDataManager();
- RDepth = DM.getDepth();
-
- if (PDepth == RDepth)
- LastUses.push_back(PRequired);
- else if (PDepth > RDepth) {
- // Let the parent claim responsibility of last use
- TransferLastUses.push_back(PRequired);
- // Keep track of higher level analysis used by this manager.
- HigherLevelAnalysis.push_back(PRequired);
- } else
- llvm_unreachable("Unable to accommodate Required Pass");
- }
-
- // Set P as P's last user until someone starts using P.
- // However, if P is a Pass Manager then it does not need
- // to record its last user.
- if (P->getAsPMDataManager() == 0)
- LastUses.push_back(P);
- TPM->setLastUser(LastUses, P);
-
- if (!TransferLastUses.empty()) {
- Pass *My_PM = getAsPass();
- TPM->setLastUser(TransferLastUses, My_PM);
- TransferLastUses.clear();
- }
-
- // Now, take care of required analyses that are not available.
- for (SmallVectorImpl<AnalysisID>::iterator
- I = ReqAnalysisNotAvailable.begin(),
- E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
- const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
- Pass *AnalysisPass = PI->createPass();
- this->addLowerLevelRequiredPass(P, AnalysisPass);
- }
-
- // Take a note of analysis required and made available by this pass.
- // Remove the analysis not preserved by this pass
- removeNotPreservedAnalysis(P);
- recordAvailableAnalysis(P);
-
- // Add pass
- PassVector.push_back(P);
-}
-
-
-/// Populate RP with analysis pass that are required by
-/// pass P and are available. Populate RP_NotAvail with analysis
-/// pass that are required by pass P but are not available.
-void PMDataManager::collectRequiredAnalysis(SmallVectorImpl<Pass *> &RP,
- SmallVectorImpl<AnalysisID> &RP_NotAvail,
- Pass *P) {
- AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
- const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
- for (AnalysisUsage::VectorType::const_iterator
- I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
- if (Pass *AnalysisPass = findAnalysisPass(*I, true))
- RP.push_back(AnalysisPass);
- else
- RP_NotAvail.push_back(*I);
- }
-
- const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
- for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
- E = IDs.end(); I != E; ++I) {
- if (Pass *AnalysisPass = findAnalysisPass(*I, true))
- RP.push_back(AnalysisPass);
- else
- RP_NotAvail.push_back(*I);
- }
-}
-
-// All Required analyses should be available to the pass as it runs! Here
-// we fill in the AnalysisImpls member of the pass so that it can
-// successfully use the getAnalysis() method to retrieve the
-// implementations it needs.
-//
-void PMDataManager::initializeAnalysisImpl(Pass *P) {
- AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
-
- for (AnalysisUsage::VectorType::const_iterator
- I = AnUsage->getRequiredSet().begin(),
- E = AnUsage->getRequiredSet().end(); I != E; ++I) {
- Pass *Impl = findAnalysisPass(*I, true);
- if (Impl == 0)
- // This may be analysis pass that is initialized on the fly.
- // If that is not the case then it will raise an assert when it is used.
- continue;
- AnalysisResolver *AR = P->getResolver();
- assert(AR && "Analysis Resolver is not set");
- AR->addAnalysisImplsPair(*I, Impl);
- }
-}
-
-/// Find the pass that implements Analysis AID. If desired pass is not found
-/// then return NULL.
-Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
-
- // Check if AvailableAnalysis map has one entry.
- DenseMap<AnalysisID, Pass*>::const_iterator I = AvailableAnalysis.find(AID);
-
- if (I != AvailableAnalysis.end())
- return I->second;
-
- // Search Parents through TopLevelManager
- if (SearchParent)
- return TPM->findAnalysisPass(AID);
-
- return NULL;
-}
-
-// Print list of passes that are last used by P.
-void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
-
- SmallVector<Pass *, 12> LUses;
-
- // If this is a on the fly manager then it does not have TPM.
- if (!TPM)
- return;
-
- TPM->collectLastUses(LUses, P);
-
- for (SmallVectorImpl<Pass *>::iterator I = LUses.begin(),
- E = LUses.end(); I != E; ++I) {
- llvm::dbgs() << "--" << std::string(Offset*2, ' ');
- (*I)->dumpPassStructure(0);
- }
-}
-
-void PMDataManager::dumpPassArguments() const {
- for (SmallVectorImpl<Pass *>::const_iterator I = PassVector.begin(),
- E = PassVector.end(); I != E; ++I) {
- if (PMDataManager *PMD = (*I)->getAsPMDataManager())
- PMD->dumpPassArguments();
- else
- if (const PassInfo *PI =
- PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
- if (!PI->isAnalysisGroup())
- dbgs() << " -" << PI->getPassArgument();
- }
-}
-
-void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
- enum PassDebuggingString S2,
- StringRef Msg) {
- if (PassDebugging < Executions)
- return;
- dbgs() << (void*)this << std::string(getDepth()*2+1, ' ');
- switch (S1) {
- case EXECUTION_MSG:
- dbgs() << "Executing Pass '" << P->getPassName();
- break;
- case MODIFICATION_MSG:
- dbgs() << "Made Modification '" << P->getPassName();
- break;
- case FREEING_MSG:
- dbgs() << " Freeing Pass '" << P->getPassName();
- break;
- default:
- break;
- }
- switch (S2) {
- case ON_BASICBLOCK_MSG:
- dbgs() << "' on BasicBlock '" << Msg << "'...\n";
- break;
- case ON_FUNCTION_MSG:
- dbgs() << "' on Function '" << Msg << "'...\n";
- break;
- case ON_MODULE_MSG:
- dbgs() << "' on Module '" << Msg << "'...\n";
- break;
- case ON_REGION_MSG:
- dbgs() << "' on Region '" << Msg << "'...\n";
- break;
- case ON_LOOP_MSG:
- dbgs() << "' on Loop '" << Msg << "'...\n";
- break;
- case ON_CG_MSG:
- dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n";
- break;
- default:
- break;
- }
-}
-
-void PMDataManager::dumpRequiredSet(const Pass *P) const {
- if (PassDebugging < Details)
- return;
-
- AnalysisUsage analysisUsage;
- P->getAnalysisUsage(analysisUsage);
- dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet());
-}
-
-void PMDataManager::dumpPreservedSet(const Pass *P) const {
- if (PassDebugging < Details)
- return;
-
- AnalysisUsage analysisUsage;
- P->getAnalysisUsage(analysisUsage);
- dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
-}
-
-void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
- const AnalysisUsage::VectorType &Set) const {
- assert(PassDebugging >= Details);
- if (Set.empty())
- return;
- dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
- for (unsigned i = 0; i != Set.size(); ++i) {
- if (i) dbgs() << ',';
- const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]);
- if (!PInf) {
- // Some preserved passes, such as AliasAnalysis, may not be initialized by
- // all drivers.
- dbgs() << " Uninitialized Pass";
- continue;
- }
- dbgs() << ' ' << PInf->getPassName();
- }
- dbgs() << '\n';
-}
-
-/// Add RequiredPass into list of lower level passes required by pass P.
-/// RequiredPass is run on the fly by Pass Manager when P requests it
-/// through getAnalysis interface.
-/// This should be handled by specific pass manager.
-void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
- if (TPM) {
- TPM->dumpArguments();
- TPM->dumpPasses();
- }
-
- // Module Level pass may required Function Level analysis info
- // (e.g. dominator info). Pass manager uses on the fly function pass manager
- // to provide this on demand. In that case, in Pass manager terminology,
- // module level pass is requiring lower level analysis info managed by
- // lower level pass manager.
-
- // When Pass manager is not able to order required analysis info, Pass manager
- // checks whether any lower level manager will be able to provide this
- // analysis info on demand or not.
-#ifndef NDEBUG
- dbgs() << "Unable to schedule '" << RequiredPass->getPassName();
- dbgs() << "' required by '" << P->getPassName() << "'\n";
-#endif
- llvm_unreachable("Unable to schedule pass");
-}
-
-Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
- llvm_unreachable("Unable to find on the fly pass");
-}
-
-// Destructor
-PMDataManager::~PMDataManager() {
- for (SmallVectorImpl<Pass *>::iterator I = PassVector.begin(),
- E = PassVector.end(); I != E; ++I)
- delete *I;
-}
-
-//===----------------------------------------------------------------------===//
-// NOTE: Is this the right place to define this method ?
-// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist.
-Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const {
- return PM.findAnalysisPass(ID, dir);
-}
-
-Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI,
- Function &F) {
- return PM.getOnTheFlyPass(P, AnalysisPI, F);
-}
-
-//===----------------------------------------------------------------------===//
-// BBPassManager implementation
-
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnBasicBlock method. Keep track of whether any of the passes modifies
-/// the function, and if so, return true.
-bool BBPassManager::runOnFunction(Function &F) {
- if (F.isDeclaration())
- return false;
-
- bool Changed = doInitialization(F);
-
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- BasicBlockPass *BP = getContainedPass(Index);
- bool LocalChanged = false;
-
- dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
- dumpRequiredSet(BP);
-
- initializeAnalysisImpl(BP);
-
- {
- // If the pass crashes, remember this.
- PassManagerPrettyStackEntry X(BP, *I);
- TimeRegion PassTimer(getPassTimer(BP));
-
- LocalChanged |= BP->runOnBasicBlock(*I);
+void AnalysisManager::invalidateAll(Module *M) {
+ // First invalidate any module results we still have laying about.
+ // FIXME: This is a total hack based on the fact that erasure doesn't
+ // invalidate iteration for DenseMap.
+ for (ModuleAnalysisResultMapT::iterator I = ModuleAnalysisResults.begin(),
+ E = ModuleAnalysisResults.end();
+ I != E; ++I)
+ if (I->second->invalidate(M))
+ ModuleAnalysisResults.erase(I);
+
+ // Now walk all of the functions for which there are cached results, and
+ // attempt to invalidate each of those as the entire module may have changed.
+ // FIXME: How do we handle functions which have been deleted or RAUWed?
+ SmallVector<void *, 8> InvalidatedPassIDs;
+ for (FunctionAnalysisResultListMapT::iterator
+ FI = FunctionAnalysisResultLists.begin(),
+ FE = FunctionAnalysisResultLists.end();
+ FI != FE; ++FI) {
+ Function *F = FI->first;
+ FunctionAnalysisResultListT &ResultsList = FI->second;
+ for (FunctionAnalysisResultListT::iterator I = ResultsList.begin(),
+ E = ResultsList.end();
+ I != E;)
+ if (I->second->invalidate(F)) {
+ InvalidatedPassIDs.push_back(I->first);
+ I = ResultsList.erase(I);
+ } else {
+ ++I;
}
-
- Changed |= LocalChanged;
- if (LocalChanged)
- dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
- I->getName());
- dumpPreservedSet(BP);
-
- verifyPreservedAnalysis(BP);
- removeNotPreservedAnalysis(BP);
- recordAvailableAnalysis(BP);
- removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG);
- }
-
- return doFinalization(F) || Changed;
-}
-
-// Implement doInitialization and doFinalization
-bool BBPassManager::doInitialization(Module &M) {
- bool Changed = false;
-
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
- Changed |= getContainedPass(Index)->doInitialization(M);
-
- return Changed;
-}
-
-bool BBPassManager::doFinalization(Module &M) {
- bool Changed = false;
-
- for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
- Changed |= getContainedPass(Index)->doFinalization(M);
-
- return Changed;
-}
-
-bool BBPassManager::doInitialization(Function &F) {
- bool Changed = false;
-
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- BasicBlockPass *BP = getContainedPass(Index);
- Changed |= BP->doInitialization(F);
+ while (!InvalidatedPassIDs.empty())
+ FunctionAnalysisResults.erase(
+ std::make_pair(InvalidatedPassIDs.pop_back_val(), F));
}
-
- return Changed;
}
-bool BBPassManager::doFinalization(Function &F) {
- bool Changed = false;
+const AnalysisManager::AnalysisResultConcept<Module> &
+AnalysisManager::getResultImpl(void *PassID, Module *M) {
+ assert(M == this->M && "Wrong module used when querying the AnalysisManager");
+ ModuleAnalysisResultMapT::iterator RI;
+ bool Inserted;
+ llvm::tie(RI, Inserted) = ModuleAnalysisResults.insert(std::make_pair(
+ PassID, polymorphic_ptr<AnalysisResultConcept<Module> >()));
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- BasicBlockPass *BP = getContainedPass(Index);
- Changed |= BP->doFinalization(F);
+ if (Inserted) {
+ // We don't have a cached result for this result. Look up the pass and run
+ // it to produce a result, which we then add to the cache.
+ ModuleAnalysisPassMapT::const_iterator PI =
+ ModuleAnalysisPasses.find(PassID);
+ assert(PI != ModuleAnalysisPasses.end() &&
+ "Analysis passes must be registered prior to being queried!");
+ RI->second = PI->second->run(M);
}
- return Changed;
+ return *RI->second;
}
+const AnalysisManager::AnalysisResultConcept<Function> &
+AnalysisManager::getResultImpl(void *PassID, Function *F) {
+ assert(F->getParent() == M && "Analyzing a function from another module!");
-//===----------------------------------------------------------------------===//
-// FunctionPassManager implementation
-
-/// Create new Function pass manager
-FunctionPassManager::FunctionPassManager(Module *m) : M(m) {
- FPM = new FunctionPassManagerImpl();
- // FPM is the top level manager.
- FPM->setTopLevelManager(FPM);
-
- AnalysisResolver *AR = new AnalysisResolver(*FPM);
- FPM->setResolver(AR);
-}
-
-FunctionPassManager::~FunctionPassManager() {
- delete FPM;
-}
+ FunctionAnalysisResultMapT::iterator RI;
+ bool Inserted;
+ llvm::tie(RI, Inserted) = FunctionAnalysisResults.insert(std::make_pair(
+ std::make_pair(PassID, F), FunctionAnalysisResultListT::iterator()));
-/// add - Add a pass to the queue of passes to run. This passes
-/// ownership of the Pass to the PassManager. When the
-/// PassManager_X is destroyed, the pass will be destroyed as well, so
-/// there is no need to delete the pass. (TODO delete passes.)
-/// This implies that all passes MUST be allocated with 'new'.
-void FunctionPassManager::add(Pass *P) {
- FPM->add(P);
-}
-
-/// run - Execute all of the passes scheduled for execution. Keep
-/// track of whether any of the passes modifies the function, and if
-/// so, return true.
-///
-bool FunctionPassManager::run(Function &F) {
- if (F.isMaterializable()) {
- std::string errstr;
- if (F.Materialize(&errstr))
- report_fatal_error("Error reading bitcode file: " + Twine(errstr));
+ if (Inserted) {
+ // We don't have a cached result for this result. Look up the pass and run
+ // it to produce a result, which we then add to the cache.
+ FunctionAnalysisPassMapT::const_iterator PI =
+ FunctionAnalysisPasses.find(PassID);
+ assert(PI != FunctionAnalysisPasses.end() &&
+ "Analysis passes must be registered prior to being queried!");
+ FunctionAnalysisResultListT &ResultList = FunctionAnalysisResultLists[F];
+ ResultList.push_back(std::make_pair(PassID, PI->second->run(F)));
+ RI->second = llvm::prior(ResultList.end());
}
- return FPM->run(F);
-}
-
-/// doInitialization - Run all of the initializers for the function passes.
-///
-bool FunctionPassManager::doInitialization() {
- return FPM->doInitialization(*M);
-}
-
-/// doFinalization - Run all of the finalizers for the function passes.
-///
-bool FunctionPassManager::doFinalization() {
- return FPM->doFinalization(*M);
-}
-
-//===----------------------------------------------------------------------===//
-// FunctionPassManagerImpl implementation
-//
-bool FunctionPassManagerImpl::doInitialization(Module &M) {
- bool Changed = false;
-
- dumpArguments();
- dumpPasses();
-
- SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
- for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
- E = IPV.end(); I != E; ++I) {
- Changed |= (*I)->doInitialization(M);
- }
-
- for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
- Changed |= getContainedManager(Index)->doInitialization(M);
-
- return Changed;
+ return *RI->second->second;
}
-bool FunctionPassManagerImpl::doFinalization(Module &M) {
- bool Changed = false;
-
- for (int Index = getNumContainedManagers() - 1; Index >= 0; --Index)
- Changed |= getContainedManager(Index)->doFinalization(M);
-
- SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
- for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
- E = IPV.end(); I != E; ++I) {
- Changed |= (*I)->doFinalization(M);
- }
-
- return Changed;
+void AnalysisManager::invalidateImpl(void *PassID, Module *M) {
+ assert(M == this->M && "Invalidating a pass over a different module!");
+ ModuleAnalysisResults.erase(PassID);
}
-/// cleanup - After running all passes, clean up pass manager cache.
-void FPPassManager::cleanup() {
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- FunctionPass *FP = getContainedPass(Index);
- AnalysisResolver *AR = FP->getResolver();
- assert(AR && "Analysis Resolver is not set");
- AR->clearAnalysisImpls();
- }
-}
+void AnalysisManager::invalidateImpl(void *PassID, Function *F) {
+ assert(F->getParent() == M &&
+ "Invalidating a pass over a function from another module!");
-void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
- if (!wasRun)
+ FunctionAnalysisResultMapT::iterator RI =
+ FunctionAnalysisResults.find(std::make_pair(PassID, F));
+ if (RI == FunctionAnalysisResults.end())
return;
- for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
- FPPassManager *FPPM = getContainedManager(Index);
- for (unsigned Index = 0; Index < FPPM->getNumContainedPasses(); ++Index) {
- FPPM->getContainedPass(Index)->releaseMemory();
- }
- }
- wasRun = false;
-}
-
-// Execute all the passes managed by this top level manager.
-// Return true if any function is modified by a pass.
-bool FunctionPassManagerImpl::run(Function &F) {
- bool Changed = false;
- TimingInfo::createTheTimeInfo();
-
- initializeAllAnalysisInfo();
- for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
- Changed |= getContainedManager(Index)->runOnFunction(F);
-
- for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
- getContainedManager(Index)->cleanup();
-
- wasRun = true;
- return Changed;
-}
-//===----------------------------------------------------------------------===//
-// FPPassManager implementation
-
-char FPPassManager::ID = 0;
-/// Print passes managed by this manager
-void FPPassManager::dumpPassStructure(unsigned Offset) {
- dbgs().indent(Offset*2) << "FunctionPass Manager\n";
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- FunctionPass *FP = getContainedPass(Index);
- FP->dumpPassStructure(Offset + 1);
- dumpLastUses(FP, Offset+1);
- }
-}
-
-
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnFunction method. Keep track of whether any of the passes modifies
-/// the function, and if so, return true.
-bool FPPassManager::runOnFunction(Function &F) {
- if (F.isDeclaration())
- return false;
-
- bool Changed = false;
-
- // Collect inherited analysis from Module level pass manager.
- populateInheritedAnalysis(TPM->activeStack);
-
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- FunctionPass *FP = getContainedPass(Index);
- bool LocalChanged = false;
-
- dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
- dumpRequiredSet(FP);
-
- initializeAnalysisImpl(FP);
-
- {
- PassManagerPrettyStackEntry X(FP, F);
- TimeRegion PassTimer(getPassTimer(FP));
-
- LocalChanged |= FP->runOnFunction(F);
- }
-
- Changed |= LocalChanged;
- if (LocalChanged)
- dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
- dumpPreservedSet(FP);
-
- verifyPreservedAnalysis(FP);
- removeNotPreservedAnalysis(FP);
- recordAvailableAnalysis(FP);
- removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
- }
- return Changed;
-}
-
-bool FPPassManager::runOnModule(Module &M) {
- bool Changed = false;
-
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- Changed |= runOnFunction(*I);
-
- return Changed;
-}
-
-bool FPPassManager::doInitialization(Module &M) {
- bool Changed = false;
-
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
- Changed |= getContainedPass(Index)->doInitialization(M);
-
- return Changed;
-}
-
-bool FPPassManager::doFinalization(Module &M) {
- bool Changed = false;
-
- for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
- Changed |= getContainedPass(Index)->doFinalization(M);
-
- return Changed;
+ FunctionAnalysisResultLists[F].erase(RI->second);
}
-
-//===----------------------------------------------------------------------===//
-// MPPassManager implementation
-
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnModule method. Keep track of whether any of the passes modifies
-/// the module, and if so, return true.
-bool
-MPPassManager::runOnModule(Module &M) {
- bool Changed = false;
-
- // Initialize on-the-fly passes
- for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
- I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
- I != E; ++I) {
- FunctionPassManagerImpl *FPP = I->second;
- Changed |= FPP->doInitialization(M);
- }
-
- // Initialize module passes
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
- Changed |= getContainedPass(Index)->doInitialization(M);
-
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- ModulePass *MP = getContainedPass(Index);
- bool LocalChanged = false;
-
- dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
- dumpRequiredSet(MP);
-
- initializeAnalysisImpl(MP);
-
- {
- PassManagerPrettyStackEntry X(MP, M);
- TimeRegion PassTimer(getPassTimer(MP));
-
- LocalChanged |= MP->runOnModule(M);
- }
-
- Changed |= LocalChanged;
- if (LocalChanged)
- dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
- M.getModuleIdentifier());
- dumpPreservedSet(MP);
-
- verifyPreservedAnalysis(MP);
- removeNotPreservedAnalysis(MP);
- recordAvailableAnalysis(MP);
- removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG);
- }
-
- // Finalize module passes
- for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
- Changed |= getContainedPass(Index)->doFinalization(M);
-
- // Finalize on-the-fly passes
- for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
- I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
- I != E; ++I) {
- FunctionPassManagerImpl *FPP = I->second;
- // We don't know when is the last time an on-the-fly pass is run,
- // so we need to releaseMemory / finalize here
- FPP->releaseMemoryOnTheFly();
- Changed |= FPP->doFinalization(M);
- }
-
- return Changed;
-}
-
-/// Add RequiredPass into list of lower level passes required by pass P.
-/// RequiredPass is run on the fly by Pass Manager when P requests it
-/// through getAnalysis interface.
-void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
- assert(P->getPotentialPassManagerType() == PMT_ModulePassManager &&
- "Unable to handle Pass that requires lower level Analysis pass");
- assert((P->getPotentialPassManagerType() <
- RequiredPass->getPotentialPassManagerType()) &&
- "Unable to handle Pass that requires lower level Analysis pass");
-
- FunctionPassManagerImpl *FPP = OnTheFlyManagers[P];
- if (!FPP) {
- FPP = new FunctionPassManagerImpl();
- // FPP is the top level manager.
- FPP->setTopLevelManager(FPP);
-
- OnTheFlyManagers[P] = FPP;
- }
- FPP->add(RequiredPass);
-
- // Register P as the last user of RequiredPass.
- if (RequiredPass) {
- SmallVector<Pass *, 1> LU;
- LU.push_back(RequiredPass);
- FPP->setLastUser(LU, P);
- }
-}
-
-/// Return function pass corresponding to PassInfo PI, that is
-/// required by module pass MP. Instantiate analysis pass, by using
-/// its runOnFunction() for function F.
-Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
- FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP];
- assert(FPP && "Unable to find on the fly pass");
-
- FPP->releaseMemoryOnTheFly();
- FPP->run(F);
- return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI);
-}
-
-
-//===----------------------------------------------------------------------===//
-// PassManagerImpl implementation
-
-//
-/// run - Execute all of the passes scheduled for execution. Keep track of
-/// whether any of the passes modifies the module, and if so, return true.
-bool PassManagerImpl::run(Module &M) {
- bool Changed = false;
- TimingInfo::createTheTimeInfo();
-
- dumpArguments();
- dumpPasses();
-
- SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
- for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
- E = IPV.end(); I != E; ++I) {
- Changed |= (*I)->doInitialization(M);
- }
-
- initializeAllAnalysisInfo();
- for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
- Changed |= getContainedManager(Index)->runOnModule(M);
-
- for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
- E = IPV.end(); I != E; ++I) {
- Changed |= (*I)->doFinalization(M);
- }
-
- return Changed;
-}
-
-//===----------------------------------------------------------------------===//
-// PassManager implementation
-
-/// Create new pass manager
-PassManager::PassManager() {
- PM = new PassManagerImpl();
- // PM is the top level manager
- PM->setTopLevelManager(PM);
-}
-
-PassManager::~PassManager() {
- delete PM;
-}
-
-/// add - Add a pass to the queue of passes to run. This passes ownership of
-/// the Pass to the PassManager. When the PassManager is destroyed, the pass
-/// will be destroyed as well, so there is no need to delete the pass. This
-/// implies that all passes MUST be allocated with 'new'.
-void PassManager::add(Pass *P) {
- PM->add(P);
-}
-
-/// run - Execute all of the passes scheduled for execution. Keep track of
-/// whether any of the passes modifies the module, and if so, return true.
-bool PassManager::run(Module &M) {
- return PM->run(M);
-}
-
-//===----------------------------------------------------------------------===//
-// TimingInfo implementation
-
-bool llvm::TimePassesIsEnabled = false;
-static cl::opt<bool,true>
-EnableTiming("time-passes", cl::location(TimePassesIsEnabled),
- cl::desc("Time each pass, printing elapsed time for each on exit"));
-
-// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to
-// a non null value (if the -time-passes option is enabled) or it leaves it
-// null. It may be called multiple times.
-void TimingInfo::createTheTimeInfo() {
- if (!TimePassesIsEnabled || TheTimeInfo) return;
-
- // Constructed the first time this is called, iff -time-passes is enabled.
- // This guarantees that the object will be constructed before static globals,
- // thus it will be destroyed before them.
- static ManagedStatic<TimingInfo> TTI;
- TheTimeInfo = &*TTI;
-}
-
-/// If TimingInfo is enabled then start pass timer.
-Timer *llvm::getPassTimer(Pass *P) {
- if (TheTimeInfo)
- return TheTimeInfo->getPassTimer(P);
- return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// PMStack implementation
-//
-
-// Pop Pass Manager from the stack and clear its analysis info.
-void PMStack::pop() {
-
- PMDataManager *Top = this->top();
- Top->initializeAnalysisInfo();
-
- S.pop_back();
-}
-
-// Push PM on the stack and set its top level manager.
-void PMStack::push(PMDataManager *PM) {
- assert(PM && "Unable to push. Pass Manager expected");
- assert(PM->getDepth()==0 && "Pass Manager depth set too early");
-
- if (!this->empty()) {
- assert(PM->getPassManagerType() > this->top()->getPassManagerType()
- && "pushing bad pass manager to PMStack");
- PMTopLevelManager *TPM = this->top()->getTopLevelManager();
-
- assert(TPM && "Unable to find top level manager");
- TPM->addIndirectPassManager(PM);
- PM->setTopLevelManager(TPM);
- PM->setDepth(this->top()->getDepth()+1);
- } else {
- assert((PM->getPassManagerType() == PMT_ModulePassManager
- || PM->getPassManagerType() == PMT_FunctionPassManager)
- && "pushing bad pass manager to PMStack");
- PM->setDepth(1);
- }
-
- S.push_back(PM);
-}
-
-// Dump content of the pass manager stack.
-void PMStack::dump() const {
- for (std::vector<PMDataManager *>::const_iterator I = S.begin(),
- E = S.end(); I != E; ++I)
- dbgs() << (*I)->getAsPass()->getPassName() << ' ';
-
- if (!S.empty())
- dbgs() << '\n';
-}
-
-/// Find appropriate Module Pass Manager in the PM Stack and
-/// add self into that manager.
-void ModulePass::assignPassManager(PMStack &PMS,
- PassManagerType PreferredType) {
- // Find Module Pass Manager
- while (!PMS.empty()) {
- PassManagerType TopPMType = PMS.top()->getPassManagerType();
- if (TopPMType == PreferredType)
- break; // We found desired pass manager
- else if (TopPMType > PMT_ModulePassManager)
- PMS.pop(); // Pop children pass managers
- else
- break;
- }
- assert(!PMS.empty() && "Unable to find appropriate Pass Manager");
- PMS.top()->add(this);
-}
-
-/// Find appropriate Function Pass Manager or Call Graph Pass Manager
-/// in the PM Stack and add self into that manager.
-void FunctionPass::assignPassManager(PMStack &PMS,
- PassManagerType PreferredType) {
-
- // Find Function Pass Manager
- while (!PMS.empty()) {
- if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
- PMS.pop();
- else
- break;
- }
-
- // Create new Function Pass Manager if needed.
- FPPassManager *FPP;
- if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) {
- FPP = (FPPassManager *)PMS.top();
- } else {
- assert(!PMS.empty() && "Unable to create Function Pass Manager");
- PMDataManager *PMD = PMS.top();
-
- // [1] Create new Function Pass Manager
- FPP = new FPPassManager();
- FPP->populateInheritedAnalysis(PMS);
-
- // [2] Set up new manager's top level manager
- PMTopLevelManager *TPM = PMD->getTopLevelManager();
- TPM->addIndirectPassManager(FPP);
-
- // [3] Assign manager to manage this new manager. This may create
- // and push new managers into PMS
- FPP->assignPassManager(PMS, PMD->getPassManagerType());
-
- // [4] Push new manager into PMS
- PMS.push(FPP);
- }
-
- // Assign FPP as the manager of this pass.
- FPP->add(this);
-}
-
-/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
-/// in the PM Stack and add self into that manager.
-void BasicBlockPass::assignPassManager(PMStack &PMS,
- PassManagerType PreferredType) {
- BBPassManager *BBP;
-
- // Basic Pass Manager is a leaf pass manager. It does not handle
- // any other pass manager.
- if (!PMS.empty() &&
- PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) {
- BBP = (BBPassManager *)PMS.top();
- } else {
- // If leaf manager is not Basic Block Pass manager then create new
- // basic Block Pass manager.
- assert(!PMS.empty() && "Unable to create BasicBlock Pass Manager");
- PMDataManager *PMD = PMS.top();
-
- // [1] Create new Basic Block Manager
- BBP = new BBPassManager();
-
- // [2] Set up new manager's top level manager
- // Basic Block Pass Manager does not live by itself
- PMTopLevelManager *TPM = PMD->getTopLevelManager();
- TPM->addIndirectPassManager(BBP);
-
- // [3] Assign manager to manage this new manager. This may create
- // and push new managers into PMS
- BBP->assignPassManager(PMS, PreferredType);
-
- // [4] Push new manager into PMS
- PMS.push(BBP);
- }
-
- // Assign BBP as the manager of this pass.
- BBP->add(this);
-}
-
-PassManagerBase::~PassManagerBase() {}
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index 46c61fc..432cbc9 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -616,11 +616,7 @@ bool StructType::isLayoutIdentical(StructType *Other) const {
/// getTypeByName - Return the type with the specified name, or null if there
/// is none by that name.
StructType *Module::getTypeByName(StringRef Name) const {
- StringMap<StructType*>::iterator I =
- getContext().pImpl->NamedStructTypes.find(Name);
- if (I != getContext().pImpl->NamedStructTypes.end())
- return I->second;
- return 0;
+ return getContext().pImpl->NamedStructTypes.lookup(Name);
}
diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp
index d5e6203..689b903 100644
--- a/lib/IR/TypeFinder.cpp
+++ b/lib/IR/TypeFinder.cpp
@@ -44,6 +44,9 @@ void TypeFinder::run(const Module &M, bool onlyNamed) {
for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
incorporateType(FI->getType());
+ if (FI->hasPrefixData())
+ incorporateValue(FI->getPrefixData());
+
// First incorporate the arguments.
for (Function::const_arg_iterator AI = FI->arg_begin(),
AE = FI->arg_end(); AI != AE; ++AI)
@@ -91,19 +94,27 @@ void TypeFinder::clear() {
/// incorporateType - This method adds the type to the list of used structures
/// if it's not in there already.
void TypeFinder::incorporateType(Type *Ty) {
- // Check to see if we're already visited this type.
+ // Check to see if we've already visited this type.
if (!VisitedTypes.insert(Ty).second)
return;
- // If this is a structure or opaque type, add a name for the type.
- if (StructType *STy = dyn_cast<StructType>(Ty))
- if (!OnlyNamed || STy->hasName())
- StructTypes.push_back(STy);
-
- // Recursively walk all contained types.
- for (Type::subtype_iterator I = Ty->subtype_begin(),
- E = Ty->subtype_end(); I != E; ++I)
- incorporateType(*I);
+ SmallVector<Type *, 4> TypeWorklist;
+ TypeWorklist.push_back(Ty);
+ do {
+ Ty = TypeWorklist.pop_back_val();
+
+ // If this is a structure or opaque type, add a name for the type.
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ if (!OnlyNamed || STy->hasName())
+ StructTypes.push_back(STy);
+
+ // Add all unvisited subtypes to worklist for processing
+ for (Type::subtype_reverse_iterator I = Ty->subtype_rbegin(),
+ E = Ty->subtype_rend();
+ I != E; ++I)
+ if (VisitedTypes.insert(*I).second)
+ TypeWorklist.push_back(*I);
+ } while (!TypeWorklist.empty());
}
/// incorporateValue - This method is used to walk operand lists finding types
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 81d7efa..62a3b31 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -365,7 +365,8 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
break;
}
V = GEP->getPointerOperand();
- } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ } else if (Operator::getOpcode(V) == Instruction::BitCast ||
+ Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
V = cast<Operator>(V)->getOperand(0);
} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
if (StripKind == PSK_ZeroIndices || GA->mayBeOverridden())
@@ -393,6 +394,42 @@ Value *Value::stripInBoundsConstantOffsets() {
return stripPointerCastsAndOffsets<PSK_InBoundsConstantIndices>(this);
}
+Value *Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
+ APInt &Offset) {
+ if (!getType()->isPointerTy())
+ return this;
+
+ assert(Offset.getBitWidth() == DL.getPointerSizeInBits(cast<PointerType>(
+ getType())->getAddressSpace()) &&
+ "The offset must have exactly as many bits as our pointer.");
+
+ // Even though we don't look through PHI nodes, we could be called on an
+ // instruction in an unreachable block, which may be on a cycle.
+ SmallPtrSet<Value *, 4> Visited;
+ Visited.insert(this);
+ Value *V = this;
+ do {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ if (!GEP->isInBounds())
+ return V;
+ APInt GEPOffset(Offset);
+ if (!GEP->accumulateConstantOffset(DL, GEPOffset))
+ return V;
+ Offset = GEPOffset;
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ V = GA->getAliasee();
+ } else {
+ return V;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ } while (Visited.insert(V));
+
+ return V;
+}
+
Value *Value::stripInBoundsOffsets() {
return stripPointerCastsAndOffsets<PSK_InBounds>(this);
}
@@ -698,9 +735,5 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
#endif
}
-// Default implementation for CallbackVH.
-void CallbackVH::allUsesReplacedWith(Value *) {}
-
-void CallbackVH::deleted() {
- setValPtr(NULL);
-}
+// Pin the vtable to this file.
+void CallbackVH::anchor() {}
diff --git a/lib/IR/ValueTypes.cpp b/lib/IR/ValueTypes.cpp
index ba04d60..2d4da95 100644
--- a/lib/IR/ValueTypes.cpp
+++ b/lib/IR/ValueTypes.cpp
@@ -134,6 +134,7 @@ std::string EVT::getEVTString() const {
case MVT::v16i1: return "v16i1";
case MVT::v32i1: return "v32i1";
case MVT::v64i1: return "v64i1";
+ case MVT::v1i8: return "v1i8";
case MVT::v2i8: return "v2i8";
case MVT::v4i8: return "v4i8";
case MVT::v8i8: return "v8i8";
@@ -156,11 +157,15 @@ std::string EVT::getEVTString() const {
case MVT::v4i64: return "v4i64";
case MVT::v8i64: return "v8i64";
case MVT::v16i64: return "v16i64";
+ case MVT::v1f32: return "v1f32";
case MVT::v2f32: return "v2f32";
case MVT::v2f16: return "v2f16";
+ case MVT::v4f16: return "v4f16";
+ case MVT::v8f16: return "v8f16";
case MVT::v4f32: return "v4f32";
case MVT::v8f32: return "v8f32";
case MVT::v16f32: return "v16f32";
+ case MVT::v1f64: return "v1f64";
case MVT::v2f64: return "v2f64";
case MVT::v4f64: return "v4f64";
case MVT::v8f64: return "v8f64";
@@ -197,6 +202,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16);
case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32);
case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64);
+ case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1);
case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2);
case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4);
case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8);
@@ -220,10 +226,14 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8);
case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16);
case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
+ case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4);
+ case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
+ case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8);
case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16);
+ case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1);
case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2);
case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4);
case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8);
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 0eda97f..da6b573 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -78,7 +78,7 @@
using namespace llvm;
static cl::opt<bool> DisableDebugInfoVerifier("disable-debug-info-verifier",
- cl::init(false));
+ cl::init(true));
namespace { // Anonymous namespace for class
struct PreVerifier : public FunctionPass {
@@ -167,7 +167,6 @@ namespace {
bool doInitialization(Module &M) {
Mod = &M;
Context = &M.getContext();
- Finder.reset();
DL = getAnalysisIfAvailable<DataLayout>();
@@ -183,10 +182,15 @@ namespace {
Mod = F.getParent();
if (!Context) Context = &F.getContext();
+ Finder.reset();
visit(F);
InstsInThisBlock.clear();
PersonalityFn = 0;
+ if (!DisableDebugInfoVerifier)
+ // Verify Debug Info.
+ verifyDebugInfo();
+
// We must abort before returning back to the pass manager, or else the
// pass manager may try to run other passes on the broken module.
return abortIfBroken();
@@ -214,9 +218,14 @@ namespace {
visitNamedMDNode(*I);
visitModuleFlags(M);
+ visitModuleIdents(M);
- // Verify Debug Info.
- verifyDebugInfo(M);
+ if (!DisableDebugInfoVerifier) {
+ Finder.reset();
+ Finder.processModule(M);
+ // Verify Debug Info.
+ verifyDebugInfo();
+ }
// If the module is broken, abort at this time.
return abortIfBroken();
@@ -258,6 +267,7 @@ namespace {
void visitGlobalAlias(GlobalAlias &GA);
void visitNamedMDNode(NamedMDNode &NMD);
void visitMDNode(MDNode &MD, Function *F);
+ void visitModuleIdents(Module &M);
void visitModuleFlags(Module &M);
void visitModuleFlag(MDNode *Op, DenseMap<MDString*, MDNode*> &SeenIDs,
SmallVectorImpl<MDNode*> &Requirements);
@@ -279,6 +289,7 @@ namespace {
void visitIntToPtrInst(IntToPtrInst &I);
void visitPtrToIntInst(PtrToIntInst &I);
void visitBitCastInst(BitCastInst &I);
+ void visitAddrSpaceCastInst(AddrSpaceCastInst &I);
void visitPHINode(PHINode &PN);
void visitBinaryOperator(BinaryOperator &B);
void visitICmpInst(ICmpInst &IC);
@@ -317,6 +328,8 @@ namespace {
bool VerifyIntrinsicType(Type *Ty,
ArrayRef<Intrinsic::IITDescriptor> &Infos,
SmallVectorImpl<Type*> &ArgTys);
+ bool VerifyIntrinsicIsVarArg(bool isVarArg,
+ ArrayRef<Intrinsic::IITDescriptor> &Infos);
bool VerifyAttributeCount(AttributeSet Attrs, unsigned Params);
void VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
bool isFunction, const Value *V);
@@ -328,7 +341,7 @@ namespace {
void VerifyBitcastType(const Value *V, Type *DestTy, Type *SrcTy);
void VerifyConstantExprBitcastType(const ConstantExpr *CE);
- void verifyDebugInfo(Module &M);
+ void verifyDebugInfo();
void WriteValue(const Value *V) {
if (!V) return;
@@ -427,10 +440,6 @@ void Verifier::visitGlobalValue(GlobalValue &GV) {
Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
"Only global arrays can have appending linkage!", GVar);
}
-
- Assert1(!GV.hasLinkOnceODRAutoHideLinkage() || GV.hasDefaultVisibility(),
- "linkonce_odr_auto_hide can only have default visibility!",
- &GV);
}
void Verifier::visitGlobalVariable(GlobalVariable &GV) {
@@ -527,8 +536,7 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) {
void Verifier::visitGlobalAlias(GlobalAlias &GA) {
Assert1(!GA.getName().empty(),
"Alias name cannot be empty!", &GA);
- Assert1(GA.hasExternalLinkage() || GA.hasLocalLinkage() ||
- GA.hasWeakLinkage(),
+ Assert1(GlobalAlias::isValidLinkage(GA.getLinkage()),
"Alias should have external or external weak linkage!", &GA);
Assert1(GA.getAliasee(),
"Aliasee cannot be NULL!", &GA);
@@ -612,6 +620,24 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) {
}
}
+void Verifier::visitModuleIdents(Module &M) {
+ const NamedMDNode *Idents = M.getNamedMetadata("llvm.ident");
+ if (!Idents)
+ return;
+
+ // llvm.ident takes a list of metadata entry. Each entry has only one string.
+ // Scan each llvm.ident entry and make sure that this requirement is met.
+ for (unsigned i = 0, e = Idents->getNumOperands(); i != e; ++i) {
+ const MDNode *N = Idents->getOperand(i);
+ Assert1(N->getNumOperands() == 1,
+ "incorrect number of operands in llvm.ident metadata", N);
+ Assert1(isa<MDString>(N->getOperand(0)),
+ ("invalid value for llvm.ident metadata entry operand"
+ "(the operand should be a string)"),
+ N->getOperand(0));
+ }
+}
+
void Verifier::visitModuleFlags(Module &M) {
const NamedMDNode *Flags = M.getModuleFlagsMetadata();
if (!Flags) return;
@@ -751,7 +777,8 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
I->getKindAsEnum() == Attribute::NoDuplicate ||
I->getKindAsEnum() == Attribute::Builtin ||
I->getKindAsEnum() == Attribute::NoBuiltin ||
- I->getKindAsEnum() == Attribute::Cold) {
+ I->getKindAsEnum() == Attribute::Cold ||
+ I->getKindAsEnum() == Attribute::OptimizeNone) {
if (!isFunction) {
CheckFailed("Attribute '" + I->getAsString() +
"' only applies to functions!", V);
@@ -897,6 +924,21 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
Attrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::AlwaysInline)),
"Attributes 'noinline and alwaysinline' are incompatible!", V);
+
+ if (Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeNone)) {
+ Assert1(Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoInline),
+ "Attribute 'optnone' requires 'noinline'!", V);
+
+ Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize),
+ "Attributes 'optsize and optnone' are incompatible!", V);
+
+ Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize),
+ "Attributes 'minsize and optnone' are incompatible!", V);
+ }
}
void Verifier::VerifyBitcastType(const Value *V, Type *DestTy, Type *SrcTy) {
@@ -930,11 +972,9 @@ void Verifier::VerifyBitcastType(const Value *V, Type *DestTy, Type *SrcTy) {
unsigned SrcAS = SrcTy->getPointerAddressSpace();
unsigned DstAS = DestTy->getPointerAddressSpace();
- unsigned SrcASSize = DL->getPointerSizeInBits(SrcAS);
- unsigned DstASSize = DL->getPointerSizeInBits(DstAS);
- Assert1(SrcASSize == DstASSize,
- "Bitcasts between pointers of different address spaces must have "
- "the same size pointers, otherwise use PtrToInt/IntToPtr.", V);
+ Assert1(SrcAS == DstAS,
+ "Bitcasts between pointers of different address spaces is not legal."
+ "Use AddrSpaceCast instead.", V);
}
void Verifier::VerifyConstantExprBitcastType(const ConstantExpr *CE) {
@@ -1152,27 +1192,12 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
// Check to make sure that all of the constants in the switch instruction
// have the same type as the switched-on value.
Type *SwitchTy = SI.getCondition()->getType();
- IntegerType *IntTy = cast<IntegerType>(SwitchTy);
- IntegersSubsetToBB Mapping;
- std::map<IntegersSubset::Range, unsigned> RangeSetMap;
+ SmallPtrSet<ConstantInt*, 32> Constants;
for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
- IntegersSubset CaseRanges = i.getCaseValueEx();
- for (unsigned ri = 0, rie = CaseRanges.getNumItems(); ri < rie; ++ri) {
- IntegersSubset::Range r = CaseRanges.getItem(ri);
- Assert1(((const APInt&)r.getLow()).getBitWidth() == IntTy->getBitWidth(),
- "Switch constants must all be same type as switch value!", &SI);
- Assert1(((const APInt&)r.getHigh()).getBitWidth() == IntTy->getBitWidth(),
- "Switch constants must all be same type as switch value!", &SI);
- Mapping.add(r);
- RangeSetMap[r] = i.getCaseIndex();
- }
- }
-
- IntegersSubsetToBB::RangeIterator errItem;
- if (!Mapping.verify(errItem)) {
- unsigned CaseIndex = RangeSetMap[errItem->first];
- SwitchInst::CaseIt i(&SI, CaseIndex);
- Assert2(false, "Duplicate integer as switch case", &SI, i.getCaseValueEx());
+ Assert1(i.getCaseValue()->getType() == SwitchTy,
+ "Switch constants must all be same type as switch value!", &SI);
+ Assert2(Constants.insert(i.getCaseValue()),
+ "Duplicate integer as switch case", &SI, i.getCaseValue());
}
visitTerminatorInst(SI);
@@ -1435,6 +1460,22 @@ void Verifier::visitBitCastInst(BitCastInst &I) {
visitInstruction(I);
}
+void Verifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
+ Type *SrcTy = I.getOperand(0)->getType();
+ Type *DestTy = I.getType();
+
+ Assert1(SrcTy->isPtrOrPtrVectorTy(),
+ "AddrSpaceCast source must be a pointer", &I);
+ Assert1(DestTy->isPtrOrPtrVectorTy(),
+ "AddrSpaceCast result must be a pointer", &I);
+ Assert1(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(),
+ "AddrSpaceCast must be between different address spaces", &I);
+ if (SrcTy->isVectorTy())
+ Assert1(SrcTy->getVectorNumElements() == DestTy->getVectorNumElements(),
+ "AddrSpaceCast vector pointer number of elements mismatch", &I);
+ visitInstruction(I);
+}
+
/// visitPHINode - Ensure that a PHI node is well formed.
///
void Verifier::visitPHINode(PHINode &PN) {
@@ -1537,14 +1578,6 @@ void Verifier::VerifyCallSite(CallSite CS) {
"Function has metadata parameter but isn't an intrinsic", I);
}
- // If the call site has the 'builtin' attribute, verify that it's applied to a
- // direct call to a function with the 'nobuiltin' attribute.
- if (CS.hasFnAttr(Attribute::Builtin))
- Assert1(CS.getCalledFunction() &&
- CS.getCalledFunction()->hasFnAttribute(Attribute::NoBuiltin),
- "Attribute 'builtin' can only be used in a call to a function with "
- "the 'nobuiltin' attribute.", I);
-
visitInstruction(*I);
}
@@ -2098,7 +2131,7 @@ void Verifier::visitInstruction(Instruction &I) {
if (!DisableDebugInfoVerifier) {
MD = I.getMetadata(LLVMContext::MD_dbg);
- Finder.processLocation(DILocation(MD));
+ Finder.processLocation(*Mod, DILocation(MD));
}
InstsInThisBlock.insert(&I);
@@ -2121,6 +2154,7 @@ bool Verifier::VerifyIntrinsicType(Type *Ty,
switch (D.Kind) {
case IITDescriptor::Void: return !Ty->isVoidTy();
+ case IITDescriptor::VarArg: return true;
case IITDescriptor::MMX: return !Ty->isX86_MMXTy();
case IITDescriptor::Metadata: return !Ty->isMetadataTy();
case IITDescriptor::Half: return !Ty->isHalfTy();
@@ -2185,6 +2219,33 @@ bool Verifier::VerifyIntrinsicType(Type *Ty,
llvm_unreachable("unhandled");
}
+/// \brief Verify if the intrinsic has variable arguments.
+/// This method is intended to be called after all the fixed arguments have been
+/// verified first.
+///
+/// This method returns true on error and does not print an error message.
+bool
+Verifier::VerifyIntrinsicIsVarArg(bool isVarArg,
+ ArrayRef<Intrinsic::IITDescriptor> &Infos) {
+ using namespace Intrinsic;
+
+ // If there are no descriptors left, then it can't be a vararg.
+ if (Infos.empty())
+ return isVarArg ? true : false;
+
+ // There should be only one descriptor remaining at this point.
+ if (Infos.size() != 1)
+ return true;
+
+ // Check and verify the descriptor.
+ IITDescriptor D = Infos.front();
+ Infos = Infos.slice(1);
+ if (D.Kind == IITDescriptor::VarArg)
+ return isVarArg ? false : true;
+
+ return true;
+}
+
/// visitIntrinsicFunction - Allow intrinsics to be verified in different ways.
///
void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
@@ -2195,7 +2256,7 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
// Verify that the intrinsic prototype lines up with what the .td files
// describe.
FunctionType *IFTy = IF->getFunctionType();
- Assert1(!IFTy->isVarArg(), "Intrinsic prototypes are not varargs", IF);
+ bool IsVarArg = IFTy->isVarArg();
SmallVector<Intrinsic::IITDescriptor, 8> Table;
getIntrinsicInfoTableEntries(ID, Table);
@@ -2207,6 +2268,16 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
for (unsigned i = 0, e = IFTy->getNumParams(); i != e; ++i)
Assert1(!VerifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys),
"Intrinsic has incorrect argument type!", IF);
+
+ // Verify if the intrinsic call matches the vararg property.
+ if (IsVarArg)
+ Assert1(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef),
+ "Intrinsic was not defined with variable arguments!", IF);
+ else
+ Assert1(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef),
+ "Callsite was not defined with variable arguments!", IF);
+
+ // All descriptors should be absorbed by now.
Assert1(TableRef.empty(), "Intrinsic has too few arguments!", IF);
// Now that we have the intrinsic ID and the actual argument types (and we
@@ -2238,13 +2309,13 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
Assert1(MD->getNumOperands() == 1,
"invalid llvm.dbg.declare intrinsic call 2", &CI);
if (!DisableDebugInfoVerifier)
- Finder.processDeclare(cast<DbgDeclareInst>(&CI));
+ Finder.processDeclare(*Mod, cast<DbgDeclareInst>(&CI));
} break;
case Intrinsic::dbg_value: { //llvm.dbg.value
if (!DisableDebugInfoVerifier) {
Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
"invalid llvm.dbg.value intrinsic call 1", &CI);
- Finder.processValue(cast<DbgValueInst>(&CI));
+ Finder.processValue(*Mod, cast<DbgValueInst>(&CI));
}
break;
}
@@ -2309,11 +2380,9 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
}
}
-void Verifier::verifyDebugInfo(Module &M) {
+void Verifier::verifyDebugInfo() {
// Verify Debug Info.
if (!DisableDebugInfoVerifier) {
- Finder.processModule(M);
-
for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(),
E = Finder.compile_unit_end(); I != E; ++I)
Assert1(DICompileUnit(*I).Verify(), "DICompileUnit does not Verify!", *I);
@@ -2352,6 +2421,7 @@ bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) {
FunctionPassManager FPM(F.getParent());
Verifier *V = new Verifier(action);
FPM.add(V);
+ FPM.doInitialization();
FPM.run(F);
return V->Broken;
}
diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp
index 656fe18..935e81d 100644
--- a/lib/IRReader/IRReader.cpp
+++ b/lib/IRReader/IRReader.cpp
@@ -11,10 +11,15 @@
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Assembly/Parser.h"
#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/system_error.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm-c/Core.h"
+#include "llvm-c/IRReader.h"
using namespace llvm;
@@ -87,3 +92,30 @@ Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
return ParseIR(File.take(), Err, Context);
}
+
+//===----------------------------------------------------------------------===//
+// C API.
+//===----------------------------------------------------------------------===//
+
+LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
+ char **OutMessage) {
+ SMDiagnostic Diag;
+
+ *OutM = wrap(ParseIR(unwrap(MemBuf), Diag, *unwrap(ContextRef)));
+
+ if(!*OutM) {
+ if (OutMessage) {
+ std::string buf;
+ raw_string_ostream os(buf);
+
+ Diag.print(NULL, os, false);
+ os.flush();
+
+ *OutMessage = strdup(buf.c_str());
+ }
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
index ff288bc..00280c8 100644
--- a/lib/LLVMBuild.txt
+++ b/lib/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = Analysis AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker IR IRReader MC Object Option Support TableGen Target Transforms
+subdirectories = Analysis AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker IR IRReader LTO MC Object Option Support TableGen Target Transforms
[component_0]
type = Group
diff --git a/lib/LTO/CMakeLists.txt b/lib/LTO/CMakeLists.txt
new file mode 100644
index 0000000..8e00bcb
--- /dev/null
+++ b/lib/LTO/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMLTO
+ LTOModule.cpp
+ LTOCodeGenerator.cpp
+ )
diff --git a/lib/LTO/LLVMBuild.txt b/lib/LTO/LLVMBuild.txt
new file mode 100644
index 0000000..38c1170
--- /dev/null
+++ b/lib/LTO/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/LTO/LLVMBuild.txt ----------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = LTO
+parent = Libraries
+required_libraries = Analysis BitReader BitWriter Core IPO Linker MC MCParser Scalar Support Target Vectorize \ No newline at end of file
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
new file mode 100644
index 0000000..2b3648e
--- /dev/null
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -0,0 +1,521 @@
+//===-LTOCodeGenerator.cpp - LLVM Link Time Optimizer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Link Time Optimization library. This library is
+// intended to be used by linker to optimize code at link time.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LTO/LTOCodeGenerator.h"
+#include "llvm/LTO/LTOModule.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/Config/config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Linker.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/ObjCARC.h"
+using namespace llvm;
+
+const char* LTOCodeGenerator::getVersionString() {
+#ifdef LLVM_VERSION_INFO
+ return PACKAGE_NAME " version " PACKAGE_VERSION ", " LLVM_VERSION_INFO;
+#else
+ return PACKAGE_NAME " version " PACKAGE_VERSION;
+#endif
+}
+
+LTOCodeGenerator::LTOCodeGenerator()
+ : Context(getGlobalContext()), Linker(new Module("ld-temp.o", Context)),
+ TargetMach(NULL), EmitDwarfDebugInfo(false), ScopeRestrictionsDone(false),
+ CodeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC), NativeObjectFile(NULL) {
+ initializeLTOPasses();
+}
+
+LTOCodeGenerator::~LTOCodeGenerator() {
+ delete TargetMach;
+ delete NativeObjectFile;
+ TargetMach = NULL;
+ NativeObjectFile = NULL;
+
+ Linker.deleteModule();
+
+ for (std::vector<char *>::iterator I = CodegenOptions.begin(),
+ E = CodegenOptions.end();
+ I != E; ++I)
+ free(*I);
+}
+
+// Initialize LTO passes. Please keep this funciton in sync with
+// PassManagerBuilder::populateLTOPassManager(), and make sure all LTO
+// passes are initialized.
+//
+void LTOCodeGenerator::initializeLTOPasses() {
+ PassRegistry &R = *PassRegistry::getPassRegistry();
+
+ initializeInternalizePassPass(R);
+ initializeIPSCCPPass(R);
+ initializeGlobalOptPass(R);
+ initializeConstantMergePass(R);
+ initializeDAHPass(R);
+ initializeInstCombinerPass(R);
+ initializeSimpleInlinerPass(R);
+ initializePruneEHPass(R);
+ initializeGlobalDCEPass(R);
+ initializeArgPromotionPass(R);
+ initializeJumpThreadingPass(R);
+ initializeSROAPass(R);
+ initializeSROA_DTPass(R);
+ initializeSROA_SSAUpPass(R);
+ initializeFunctionAttrsPass(R);
+ initializeGlobalsModRefPass(R);
+ initializeLICMPass(R);
+ initializeGVNPass(R);
+ initializeMemCpyOptPass(R);
+ initializeDCEPass(R);
+ initializeCFGSimplifyPassPass(R);
+}
+
+bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) {
+ bool ret = Linker.linkInModule(mod->getLLVVMModule(), &errMsg);
+
+ const std::vector<const char*> &undefs = mod->getAsmUndefinedRefs();
+ for (int i = 0, e = undefs.size(); i != e; ++i)
+ AsmUndefinedRefs[undefs[i]] = 1;
+
+ return !ret;
+}
+
+void LTOCodeGenerator::setTargetOptions(TargetOptions options) {
+ Options.LessPreciseFPMADOption = options.LessPreciseFPMADOption;
+ Options.NoFramePointerElim = options.NoFramePointerElim;
+ Options.AllowFPOpFusion = options.AllowFPOpFusion;
+ Options.UnsafeFPMath = options.UnsafeFPMath;
+ Options.NoInfsFPMath = options.NoInfsFPMath;
+ Options.NoNaNsFPMath = options.NoNaNsFPMath;
+ Options.HonorSignDependentRoundingFPMathOption =
+ options.HonorSignDependentRoundingFPMathOption;
+ Options.UseSoftFloat = options.UseSoftFloat;
+ Options.FloatABIType = options.FloatABIType;
+ Options.NoZerosInBSS = options.NoZerosInBSS;
+ Options.GuaranteedTailCallOpt = options.GuaranteedTailCallOpt;
+ Options.DisableTailCalls = options.DisableTailCalls;
+ Options.StackAlignmentOverride = options.StackAlignmentOverride;
+ Options.TrapFuncName = options.TrapFuncName;
+ Options.PositionIndependentExecutable = options.PositionIndependentExecutable;
+ Options.EnableSegmentedStacks = options.EnableSegmentedStacks;
+ Options.UseInitArray = options.UseInitArray;
+}
+
+void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) {
+ switch (debug) {
+ case LTO_DEBUG_MODEL_NONE:
+ EmitDwarfDebugInfo = false;
+ return;
+
+ case LTO_DEBUG_MODEL_DWARF:
+ EmitDwarfDebugInfo = true;
+ return;
+ }
+ llvm_unreachable("Unknown debug format!");
+}
+
+void LTOCodeGenerator::setCodePICModel(lto_codegen_model model) {
+ switch (model) {
+ case LTO_CODEGEN_PIC_MODEL_STATIC:
+ case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
+ case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
+ CodeModel = model;
+ return;
+ }
+ llvm_unreachable("Unknown PIC model!");
+}
+
+bool LTOCodeGenerator::writeMergedModules(const char *path,
+ std::string &errMsg) {
+ if (!determineTarget(errMsg))
+ return false;
+
+ // mark which symbols can not be internalized
+ applyScopeRestrictions();
+
+ // create output file
+ std::string ErrInfo;
+ tool_output_file Out(path, ErrInfo, sys::fs::F_Binary);
+ if (!ErrInfo.empty()) {
+ errMsg = "could not open bitcode file for writing: ";
+ errMsg += path;
+ return false;
+ }
+
+ // write bitcode to it
+ WriteBitcodeToFile(Linker.getModule(), Out.os());
+ Out.os().close();
+
+ if (Out.os().has_error()) {
+ errMsg = "could not write bitcode file: ";
+ errMsg += path;
+ Out.os().clear_error();
+ return false;
+ }
+
+ Out.keep();
+ return true;
+}
+
+bool LTOCodeGenerator::compile_to_file(const char** name,
+ bool disableOpt,
+ bool disableInline,
+ bool disableGVNLoadPRE,
+ std::string& errMsg) {
+ // make unique temp .o file to put generated object file
+ SmallString<128> Filename;
+ int FD;
+ error_code EC = sys::fs::createTemporaryFile("lto-llvm", "o", FD, Filename);
+ if (EC) {
+ errMsg = EC.message();
+ return false;
+ }
+
+ // generate object file
+ tool_output_file objFile(Filename.c_str(), FD);
+
+ bool genResult = generateObjectFile(objFile.os(), disableOpt, disableInline,
+ disableGVNLoadPRE, errMsg);
+ objFile.os().close();
+ if (objFile.os().has_error()) {
+ objFile.os().clear_error();
+ sys::fs::remove(Twine(Filename));
+ return false;
+ }
+
+ objFile.keep();
+ if (!genResult) {
+ sys::fs::remove(Twine(Filename));
+ return false;
+ }
+
+ NativeObjectPath = Filename.c_str();
+ *name = NativeObjectPath.c_str();
+ return true;
+}
+
+const void* LTOCodeGenerator::compile(size_t* length,
+ bool disableOpt,
+ bool disableInline,
+ bool disableGVNLoadPRE,
+ std::string& errMsg) {
+ const char *name;
+ if (!compile_to_file(&name, disableOpt, disableInline, disableGVNLoadPRE,
+ errMsg))
+ return NULL;
+
+ // remove old buffer if compile() called twice
+ delete NativeObjectFile;
+
+ // read .o file into memory buffer
+ OwningPtr<MemoryBuffer> BuffPtr;
+ if (error_code ec = MemoryBuffer::getFile(name, BuffPtr, -1, false)) {
+ errMsg = ec.message();
+ sys::fs::remove(NativeObjectPath);
+ return NULL;
+ }
+ NativeObjectFile = BuffPtr.take();
+
+ // remove temp files
+ sys::fs::remove(NativeObjectPath);
+
+ // return buffer, unless error
+ if (NativeObjectFile == NULL)
+ return NULL;
+ *length = NativeObjectFile->getBufferSize();
+ return NativeObjectFile->getBufferStart();
+}
+
+bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
+ if (TargetMach != NULL)
+ return true;
+
+ std::string TripleStr = Linker.getModule()->getTargetTriple();
+ if (TripleStr.empty())
+ TripleStr = sys::getDefaultTargetTriple();
+ llvm::Triple Triple(TripleStr);
+
+ // create target machine from info for merged modules
+ const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
+ if (march == NULL)
+ return false;
+
+ // The relocation model is actually a static member of TargetMachine and
+ // needs to be set before the TargetMachine is instantiated.
+ Reloc::Model RelocModel = Reloc::Default;
+ switch (CodeModel) {
+ case LTO_CODEGEN_PIC_MODEL_STATIC:
+ RelocModel = Reloc::Static;
+ break;
+ case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
+ RelocModel = Reloc::PIC_;
+ break;
+ case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
+ RelocModel = Reloc::DynamicNoPIC;
+ break;
+ }
+
+ // construct LTOModule, hand over ownership of module and target
+ SubtargetFeatures Features;
+ Features.getDefaultSubtargetFeatures(Triple);
+ std::string FeatureStr = Features.getString();
+ // Set a default CPU for Darwin triples.
+ if (MCpu.empty() && Triple.isOSDarwin()) {
+ if (Triple.getArch() == llvm::Triple::x86_64)
+ MCpu = "core2";
+ else if (Triple.getArch() == llvm::Triple::x86)
+ MCpu = "yonah";
+ }
+
+ TargetMach = march->createTargetMachine(TripleStr, MCpu, FeatureStr, Options,
+ RelocModel, CodeModel::Default,
+ CodeGenOpt::Aggressive);
+ return true;
+}
+
+void LTOCodeGenerator::
+applyRestriction(GlobalValue &GV,
+ const ArrayRef<StringRef> &Libcalls,
+ std::vector<const char*> &MustPreserveList,
+ SmallPtrSet<GlobalValue*, 8> &AsmUsed,
+ Mangler &Mangler) {
+ SmallString<64> Buffer;
+ Mangler.getNameWithPrefix(Buffer, &GV, false);
+
+ if (GV.isDeclaration())
+ return;
+ if (MustPreserveSymbols.count(Buffer))
+ MustPreserveList.push_back(GV.getName().data());
+ if (AsmUndefinedRefs.count(Buffer))
+ AsmUsed.insert(&GV);
+
+ // Conservatively append user-supplied runtime library functions to
+ // llvm.compiler.used. These could be internalized and deleted by
+ // optimizations like -globalopt, causing problems when later optimizations
+ // add new library calls (e.g., llvm.memset => memset and printf => puts).
+ // Leave it to the linker to remove any dead code (e.g. with -dead_strip).
+ if (isa<Function>(GV) &&
+ std::binary_search(Libcalls.begin(), Libcalls.end(), GV.getName()))
+ AsmUsed.insert(&GV);
+}
+
+static void findUsedValues(GlobalVariable *LLVMUsed,
+ SmallPtrSet<GlobalValue*, 8> &UsedValues) {
+ if (LLVMUsed == 0) return;
+
+ ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+ if (GlobalValue *GV =
+ dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+ UsedValues.insert(GV);
+}
+
+static void accumulateAndSortLibcalls(std::vector<StringRef> &Libcalls,
+ const TargetLibraryInfo& TLI,
+ const TargetLowering *Lowering)
+{
+ // TargetLibraryInfo has info on C runtime library calls on the current
+ // target.
+ for (unsigned I = 0, E = static_cast<unsigned>(LibFunc::NumLibFuncs);
+ I != E; ++I) {
+ LibFunc::Func F = static_cast<LibFunc::Func>(I);
+ if (TLI.has(F))
+ Libcalls.push_back(TLI.getName(F));
+ }
+
+ // TargetLowering has info on library calls that CodeGen expects to be
+ // available, both from the C runtime and compiler-rt.
+ if (Lowering)
+ for (unsigned I = 0, E = static_cast<unsigned>(RTLIB::UNKNOWN_LIBCALL);
+ I != E; ++I)
+ if (const char *Name
+ = Lowering->getLibcallName(static_cast<RTLIB::Libcall>(I)))
+ Libcalls.push_back(Name);
+
+ array_pod_sort(Libcalls.begin(), Libcalls.end());
+ Libcalls.erase(std::unique(Libcalls.begin(), Libcalls.end()),
+ Libcalls.end());
+}
+
+void LTOCodeGenerator::applyScopeRestrictions() {
+ if (ScopeRestrictionsDone)
+ return;
+ Module *mergedModule = Linker.getModule();
+
+ // Start off with a verification pass.
+ PassManager passes;
+ passes.add(createVerifierPass());
+
+ // mark which symbols can not be internalized
+ Mangler Mangler(TargetMach);
+ std::vector<const char*> MustPreserveList;
+ SmallPtrSet<GlobalValue*, 8> AsmUsed;
+ std::vector<StringRef> Libcalls;
+ TargetLibraryInfo TLI(Triple(TargetMach->getTargetTriple()));
+ accumulateAndSortLibcalls(Libcalls, TLI, TargetMach->getTargetLowering());
+
+ for (Module::iterator f = mergedModule->begin(),
+ e = mergedModule->end(); f != e; ++f)
+ applyRestriction(*f, Libcalls, MustPreserveList, AsmUsed, Mangler);
+ for (Module::global_iterator v = mergedModule->global_begin(),
+ e = mergedModule->global_end(); v != e; ++v)
+ applyRestriction(*v, Libcalls, MustPreserveList, AsmUsed, Mangler);
+ for (Module::alias_iterator a = mergedModule->alias_begin(),
+ e = mergedModule->alias_end(); a != e; ++a)
+ applyRestriction(*a, Libcalls, MustPreserveList, AsmUsed, Mangler);
+
+ GlobalVariable *LLVMCompilerUsed =
+ mergedModule->getGlobalVariable("llvm.compiler.used");
+ findUsedValues(LLVMCompilerUsed, AsmUsed);
+ if (LLVMCompilerUsed)
+ LLVMCompilerUsed->eraseFromParent();
+
+ if (!AsmUsed.empty()) {
+ llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(Context);
+ std::vector<Constant*> asmUsed2;
+ for (SmallPtrSet<GlobalValue*, 16>::const_iterator i = AsmUsed.begin(),
+ e = AsmUsed.end(); i !=e; ++i) {
+ GlobalValue *GV = *i;
+ Constant *c = ConstantExpr::getBitCast(GV, i8PTy);
+ asmUsed2.push_back(c);
+ }
+
+ llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size());
+ LLVMCompilerUsed =
+ new llvm::GlobalVariable(*mergedModule, ATy, false,
+ llvm::GlobalValue::AppendingLinkage,
+ llvm::ConstantArray::get(ATy, asmUsed2),
+ "llvm.compiler.used");
+
+ LLVMCompilerUsed->setSection("llvm.metadata");
+ }
+
+ passes.add(createInternalizePass(MustPreserveList));
+
+ // apply scope restrictions
+ passes.run(*mergedModule);
+
+ ScopeRestrictionsDone = true;
+}
+
+/// Optimize merged modules using various IPO passes
+bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
+ bool DisableOpt,
+ bool DisableInline,
+ bool DisableGVNLoadPRE,
+ std::string &errMsg) {
+ if (!this->determineTarget(errMsg))
+ return false;
+
+ Module *mergedModule = Linker.getModule();
+
+ // Mark which symbols can not be internalized
+ this->applyScopeRestrictions();
+
+ // Instantiate the pass manager to organize the passes.
+ PassManager passes;
+
+ // Start off with a verification pass.
+ passes.add(createVerifierPass());
+
+ // Add an appropriate DataLayout instance for this module...
+ passes.add(new DataLayout(*TargetMach->getDataLayout()));
+ TargetMach->addAnalysisPasses(passes);
+
+ // Enabling internalize here would use its AllButMain variant. It
+ // keeps only main if it exists and does nothing for libraries. Instead
+ // we create the pass ourselves with the symbol list provided by the linker.
+ if (!DisableOpt)
+ PassManagerBuilder().populateLTOPassManager(passes,
+ /*Internalize=*/false,
+ !DisableInline,
+ DisableGVNLoadPRE);
+
+ // Make sure everything is still good.
+ passes.add(createVerifierPass());
+
+ PassManager codeGenPasses;
+
+ codeGenPasses.add(new DataLayout(*TargetMach->getDataLayout()));
+ TargetMach->addAnalysisPasses(codeGenPasses);
+
+ formatted_raw_ostream Out(out);
+
+ // If the bitcode files contain ARC code and were compiled with optimization,
+ // the ObjCARCContractPass must be run, so do it unconditionally here.
+ codeGenPasses.add(createObjCARCContractPass());
+
+ if (TargetMach->addPassesToEmitFile(codeGenPasses, Out,
+ TargetMachine::CGFT_ObjectFile)) {
+ errMsg = "target file type not supported";
+ return false;
+ }
+
+ // Run our queue of passes all at once now, efficiently.
+ passes.run(*mergedModule);
+
+ // Run the code generator, and write assembly file
+ codeGenPasses.run(*mergedModule);
+
+ return true;
+}
+
+/// setCodeGenDebugOptions - Set codegen debugging options to aid in debugging
+/// LTO problems.
+void LTOCodeGenerator::setCodeGenDebugOptions(const char *options) {
+ for (std::pair<StringRef, StringRef> o = getToken(options);
+ !o.first.empty(); o = getToken(o.second)) {
+ // ParseCommandLineOptions() expects argv[0] to be program name. Lazily add
+ // that.
+ if (CodegenOptions.empty())
+ CodegenOptions.push_back(strdup("libLLVMLTO"));
+ CodegenOptions.push_back(strdup(o.first.str().c_str()));
+ }
+}
+
+void LTOCodeGenerator::parseCodeGenDebugOptions() {
+ // if options were requested, set them
+ if (!CodegenOptions.empty())
+ cl::ParseCommandLineOptions(CodegenOptions.size(),
+ const_cast<char **>(&CodegenOptions[0]));
+}
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
new file mode 100644
index 0000000..65416be
--- /dev/null
+++ b/lib/LTO/LTOModule.cpp
@@ -0,0 +1,794 @@
+//===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Link Time Optimization library. This library is
+// intended to be used by linker to optimize code at link time.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LTO/LTOModule.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+using namespace llvm;
+
+LTOModule::LTOModule(llvm::Module *m, llvm::TargetMachine *t)
+ : _module(m), _target(t),
+ _context(_target->getMCAsmInfo(), _target->getRegisterInfo(), NULL),
+ _mangler(t) {}
+
+/// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
+/// bitcode.
+bool LTOModule::isBitcodeFile(const void *mem, size_t length) {
+ return sys::fs::identify_magic(StringRef((const char *)mem, length)) ==
+ sys::fs::file_magic::bitcode;
+}
+
+bool LTOModule::isBitcodeFile(const char *path) {
+ sys::fs::file_magic type;
+ if (sys::fs::identify_magic(path, type))
+ return false;
+ return type == sys::fs::file_magic::bitcode;
+}
+
+/// isBitcodeFileForTarget - Returns 'true' if the file (or memory contents) is
+/// LLVM bitcode for the specified triple.
+bool LTOModule::isBitcodeFileForTarget(const void *mem, size_t length,
+ const char *triplePrefix) {
+ MemoryBuffer *buffer = makeBuffer(mem, length);
+ if (!buffer)
+ return false;
+ return isTargetMatch(buffer, triplePrefix);
+}
+
+bool LTOModule::isBitcodeFileForTarget(const char *path,
+ const char *triplePrefix) {
+ OwningPtr<MemoryBuffer> buffer;
+ if (MemoryBuffer::getFile(path, buffer))
+ return false;
+ return isTargetMatch(buffer.take(), triplePrefix);
+}
+
+/// isTargetMatch - Returns 'true' if the memory buffer is for the specified
+/// target triple.
+bool LTOModule::isTargetMatch(MemoryBuffer *buffer, const char *triplePrefix) {
+ std::string Triple = getBitcodeTargetTriple(buffer, getGlobalContext());
+ delete buffer;
+ return strncmp(Triple.c_str(), triplePrefix, strlen(triplePrefix)) == 0;
+}
+
+/// makeLTOModule - Create an LTOModule. N.B. These methods take ownership of
+/// the buffer.
+LTOModule *LTOModule::makeLTOModule(const char *path, TargetOptions options,
+ std::string &errMsg) {
+ OwningPtr<MemoryBuffer> buffer;
+ if (error_code ec = MemoryBuffer::getFile(path, buffer)) {
+ errMsg = ec.message();
+ return NULL;
+ }
+ return makeLTOModule(buffer.take(), options, errMsg);
+}
+
+LTOModule *LTOModule::makeLTOModule(int fd, const char *path,
+ size_t size, TargetOptions options,
+ std::string &errMsg) {
+ return makeLTOModule(fd, path, size, 0, options, errMsg);
+}
+
+LTOModule *LTOModule::makeLTOModule(int fd, const char *path,
+ size_t map_size,
+ off_t offset,
+ TargetOptions options,
+ std::string &errMsg) {
+ OwningPtr<MemoryBuffer> buffer;
+ if (error_code ec =
+ MemoryBuffer::getOpenFileSlice(fd, path, buffer, map_size, offset)) {
+ errMsg = ec.message();
+ return NULL;
+ }
+ return makeLTOModule(buffer.take(), options, errMsg);
+}
+
+LTOModule *LTOModule::makeLTOModule(const void *mem, size_t length,
+ TargetOptions options,
+ std::string &errMsg) {
+ OwningPtr<MemoryBuffer> buffer(makeBuffer(mem, length));
+ if (!buffer)
+ return NULL;
+ return makeLTOModule(buffer.take(), options, errMsg);
+}
+
+LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
+ TargetOptions options,
+ std::string &errMsg) {
+ // parse bitcode buffer
+ OwningPtr<Module> m(getLazyBitcodeModule(buffer, getGlobalContext(),
+ &errMsg));
+ if (!m) {
+ delete buffer;
+ return NULL;
+ }
+
+ std::string TripleStr = m->getTargetTriple();
+ if (TripleStr.empty())
+ TripleStr = sys::getDefaultTargetTriple();
+ llvm::Triple Triple(TripleStr);
+
+ // find machine architecture for this module
+ const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
+ if (!march)
+ return NULL;
+
+ // construct LTOModule, hand over ownership of module and target
+ SubtargetFeatures Features;
+ Features.getDefaultSubtargetFeatures(Triple);
+ std::string FeatureStr = Features.getString();
+ // Set a default CPU for Darwin triples.
+ std::string CPU;
+ if (Triple.isOSDarwin()) {
+ if (Triple.getArch() == llvm::Triple::x86_64)
+ CPU = "core2";
+ else if (Triple.getArch() == llvm::Triple::x86)
+ CPU = "yonah";
+ }
+
+ TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
+ options);
+ m->MaterializeAllPermanently();
+
+ LTOModule *Ret = new LTOModule(m.take(), target);
+ if (Ret->parseSymbols(errMsg)) {
+ delete Ret;
+ return NULL;
+ }
+
+ return Ret;
+}
+
+/// makeBuffer - Create a MemoryBuffer from a memory range.
+MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length) {
+ const char *startPtr = (const char*)mem;
+ return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), "", false);
+}
+
+/// objcClassNameFromExpression - Get string that the data pointer points to.
+bool
+LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
+ if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
+ Constant *op = ce->getOperand(0);
+ if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
+ Constant *cn = gvn->getInitializer();
+ if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
+ if (ca->isCString()) {
+ name = ".objc_class_name_" + ca->getAsCString().str();
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+/// addObjCClass - Parse i386/ppc ObjC class data structure.
+void LTOModule::addObjCClass(const GlobalVariable *clgv) {
+ const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
+ if (!c) return;
+
+ // second slot in __OBJC,__class is pointer to superclass name
+ std::string superclassName;
+ if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
+ NameAndAttributes info;
+ StringMap<NameAndAttributes>::value_type &entry =
+ _undefines.GetOrCreateValue(superclassName);
+ if (!entry.getValue().name) {
+ const char *symbolName = entry.getKey().data();
+ info.name = symbolName;
+ info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+ info.isFunction = false;
+ info.symbol = clgv;
+ entry.setValue(info);
+ }
+ }
+
+ // third slot in __OBJC,__class is pointer to class name
+ std::string className;
+ if (objcClassNameFromExpression(c->getOperand(2), className)) {
+ StringSet::value_type &entry = _defines.GetOrCreateValue(className);
+ entry.setValue(1);
+
+ NameAndAttributes info;
+ info.name = entry.getKey().data();
+ info.attributes = LTO_SYMBOL_PERMISSIONS_DATA |
+ LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT;
+ info.isFunction = false;
+ info.symbol = clgv;
+ _symbols.push_back(info);
+ }
+}
+
+/// addObjCCategory - Parse i386/ppc ObjC category data structure.
+void LTOModule::addObjCCategory(const GlobalVariable *clgv) {
+ const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
+ if (!c) return;
+
+ // second slot in __OBJC,__category is pointer to target class name
+ std::string targetclassName;
+ if (!objcClassNameFromExpression(c->getOperand(1), targetclassName))
+ return;
+
+ NameAndAttributes info;
+ StringMap<NameAndAttributes>::value_type &entry =
+ _undefines.GetOrCreateValue(targetclassName);
+
+ if (entry.getValue().name)
+ return;
+
+ const char *symbolName = entry.getKey().data();
+ info.name = symbolName;
+ info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+ info.isFunction = false;
+ info.symbol = clgv;
+ entry.setValue(info);
+}
+
+/// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
+void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
+ std::string targetclassName;
+ if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName))
+ return;
+
+ NameAndAttributes info;
+ StringMap<NameAndAttributes>::value_type &entry =
+ _undefines.GetOrCreateValue(targetclassName);
+ if (entry.getValue().name)
+ return;
+
+ const char *symbolName = entry.getKey().data();
+ info.name = symbolName;
+ info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+ info.isFunction = false;
+ info.symbol = clgv;
+ entry.setValue(info);
+}
+
+/// addDefinedDataSymbol - Add a data symbol as defined to the list.
+void LTOModule::addDefinedDataSymbol(const GlobalValue *v) {
+ // Add to list of defined symbols.
+ addDefinedSymbol(v, false);
+
+ if (!v->hasSection() /* || !isTargetDarwin */)
+ return;
+
+ // Special case i386/ppc ObjC data structures in magic sections:
+ // The issue is that the old ObjC object format did some strange
+ // contortions to avoid real linker symbols. For instance, the
+ // ObjC class data structure is allocated statically in the executable
+ // that defines that class. That data structures contains a pointer to
+ // its superclass. But instead of just initializing that part of the
+ // struct to the address of its superclass, and letting the static and
+ // dynamic linkers do the rest, the runtime works by having that field
+ // instead point to a C-string that is the name of the superclass.
+ // At runtime the objc initialization updates that pointer and sets
+ // it to point to the actual super class. As far as the linker
+ // knows it is just a pointer to a string. But then someone wanted the
+ // linker to issue errors at build time if the superclass was not found.
+ // So they figured out a way in mach-o object format to use an absolute
+ // symbols (.objc_class_name_Foo = 0) and a floating reference
+ // (.reference .objc_class_name_Bar) to cause the linker into erroring when
+ // a class was missing.
+ // The following synthesizes the implicit .objc_* symbols for the linker
+ // from the ObjC data structures generated by the front end.
+
+ // special case if this data blob is an ObjC class definition
+ if (v->getSection().compare(0, 15, "__OBJC,__class,") == 0) {
+ if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+ addObjCClass(gv);
+ }
+ }
+
+ // special case if this data blob is an ObjC category definition
+ else if (v->getSection().compare(0, 18, "__OBJC,__category,") == 0) {
+ if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+ addObjCCategory(gv);
+ }
+ }
+
+ // special case if this data blob is the list of referenced classes
+ else if (v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0) {
+ if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+ addObjCClassRef(gv);
+ }
+ }
+}
+
+/// addDefinedFunctionSymbol - Add a function symbol as defined to the list.
+void LTOModule::addDefinedFunctionSymbol(const Function *f) {
+ // add to list of defined symbols
+ addDefinedSymbol(f, true);
+}
+
+static bool canBeHidden(const GlobalValue *GV) {
+ GlobalValue::LinkageTypes L = GV->getLinkage();
+
+ if (L != GlobalValue::LinkOnceODRLinkage)
+ return false;
+
+ if (GV->hasUnnamedAddr())
+ return true;
+
+ GlobalStatus GS;
+ if (GlobalStatus::analyzeGlobal(GV, GS))
+ return false;
+
+ return !GS.IsCompared;
+}
+
+/// addDefinedSymbol - Add a defined symbol to the list.
+void LTOModule::addDefinedSymbol(const GlobalValue *def, bool isFunction) {
+ // ignore all llvm.* symbols
+ if (def->getName().startswith("llvm."))
+ return;
+
+ // string is owned by _defines
+ SmallString<64> Buffer;
+ _mangler.getNameWithPrefix(Buffer, def, false);
+
+ // set alignment part log2() can have rounding errors
+ uint32_t align = def->getAlignment();
+ uint32_t attr = align ? countTrailingZeros(def->getAlignment()) : 0;
+
+ // set permissions part
+ if (isFunction) {
+ attr |= LTO_SYMBOL_PERMISSIONS_CODE;
+ } else {
+ const GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
+ if (gv && gv->isConstant())
+ attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
+ else
+ attr |= LTO_SYMBOL_PERMISSIONS_DATA;
+ }
+
+ // set definition part
+ if (def->hasWeakLinkage() || def->hasLinkOnceLinkage() ||
+ def->hasLinkerPrivateWeakLinkage())
+ attr |= LTO_SYMBOL_DEFINITION_WEAK;
+ else if (def->hasCommonLinkage())
+ attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
+ else
+ attr |= LTO_SYMBOL_DEFINITION_REGULAR;
+
+ // set scope part
+ if (def->hasHiddenVisibility())
+ attr |= LTO_SYMBOL_SCOPE_HIDDEN;
+ else if (def->hasProtectedVisibility())
+ attr |= LTO_SYMBOL_SCOPE_PROTECTED;
+ else if (canBeHidden(def))
+ attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
+ else if (def->hasExternalLinkage() || def->hasWeakLinkage() ||
+ def->hasLinkOnceLinkage() || def->hasCommonLinkage() ||
+ def->hasLinkerPrivateWeakLinkage())
+ attr |= LTO_SYMBOL_SCOPE_DEFAULT;
+ else
+ attr |= LTO_SYMBOL_SCOPE_INTERNAL;
+
+ StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer);
+ entry.setValue(1);
+
+ // fill information structure
+ NameAndAttributes info;
+ StringRef Name = entry.getKey();
+ info.name = Name.data();
+ assert(info.name[Name.size()] == '\0');
+ info.attributes = attr;
+ info.isFunction = isFunction;
+ info.symbol = def;
+
+ // add to table of symbols
+ _symbols.push_back(info);
+}
+
+/// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the
+/// defined list.
+void LTOModule::addAsmGlobalSymbol(const char *name,
+ lto_symbol_attributes scope) {
+ StringSet::value_type &entry = _defines.GetOrCreateValue(name);
+
+ // only add new define if not already defined
+ if (entry.getValue())
+ return;
+
+ entry.setValue(1);
+
+ NameAndAttributes &info = _undefines[entry.getKey().data()];
+
+ if (info.symbol == 0) {
+ // FIXME: This is trying to take care of module ASM like this:
+ //
+ // module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0"
+ //
+ // but is gross and its mother dresses it funny. Have the ASM parser give us
+ // more details for this type of situation so that we're not guessing so
+ // much.
+
+ // fill information structure
+ info.name = entry.getKey().data();
+ info.attributes =
+ LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
+ info.isFunction = false;
+ info.symbol = 0;
+
+ // add to table of symbols
+ _symbols.push_back(info);
+ return;
+ }
+
+ if (info.isFunction)
+ addDefinedFunctionSymbol(cast<Function>(info.symbol));
+ else
+ addDefinedDataSymbol(info.symbol);
+
+ _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK;
+ _symbols.back().attributes |= scope;
+}
+
+/// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the
+/// undefined list.
+void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
+ StringMap<NameAndAttributes>::value_type &entry =
+ _undefines.GetOrCreateValue(name);
+
+ _asm_undefines.push_back(entry.getKey().data());
+
+ // we already have the symbol
+ if (entry.getValue().name)
+ return;
+
+ uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;;
+ attr |= LTO_SYMBOL_SCOPE_DEFAULT;
+ NameAndAttributes info;
+ info.name = entry.getKey().data();
+ info.attributes = attr;
+ info.isFunction = false;
+ info.symbol = 0;
+
+ entry.setValue(info);
+}
+
+/// addPotentialUndefinedSymbol - Add a symbol which isn't defined just yet to a
+/// list to be resolved later.
+void
+LTOModule::addPotentialUndefinedSymbol(const GlobalValue *decl, bool isFunc) {
+ // ignore all llvm.* symbols
+ if (decl->getName().startswith("llvm."))
+ return;
+
+ // ignore all aliases
+ if (isa<GlobalAlias>(decl))
+ return;
+
+ SmallString<64> name;
+ _mangler.getNameWithPrefix(name, decl, false);
+
+ StringMap<NameAndAttributes>::value_type &entry =
+ _undefines.GetOrCreateValue(name);
+
+ // we already have the symbol
+ if (entry.getValue().name)
+ return;
+
+ NameAndAttributes info;
+
+ info.name = entry.getKey().data();
+
+ if (decl->hasExternalWeakLinkage())
+ info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
+ else
+ info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+
+ info.isFunction = isFunc;
+ info.symbol = decl;
+
+ entry.setValue(info);
+}
+
+namespace {
+ class RecordStreamer : public MCStreamer {
+ public:
+ enum State { NeverSeen, Global, Defined, DefinedGlobal, Used };
+
+ private:
+ StringMap<State> Symbols;
+
+ void markDefined(const MCSymbol &Symbol) {
+ State &S = Symbols[Symbol.getName()];
+ switch (S) {
+ case DefinedGlobal:
+ case Global:
+ S = DefinedGlobal;
+ break;
+ case NeverSeen:
+ case Defined:
+ case Used:
+ S = Defined;
+ break;
+ }
+ }
+ void markGlobal(const MCSymbol &Symbol) {
+ State &S = Symbols[Symbol.getName()];
+ switch (S) {
+ case DefinedGlobal:
+ case Defined:
+ S = DefinedGlobal;
+ break;
+
+ case NeverSeen:
+ case Global:
+ case Used:
+ S = Global;
+ break;
+ }
+ }
+ void markUsed(const MCSymbol &Symbol) {
+ State &S = Symbols[Symbol.getName()];
+ switch (S) {
+ case DefinedGlobal:
+ case Defined:
+ case Global:
+ break;
+
+ case NeverSeen:
+ case Used:
+ S = Used;
+ break;
+ }
+ }
+
+ // FIXME: mostly copied for the obj streamer.
+ void AddValueSymbols(const MCExpr *Value) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ // FIXME: What should we do in here?
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbols(BE->getLHS());
+ AddValueSymbols(BE->getRHS());
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ markUsed(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
+ break;
+ }
+ }
+
+ public:
+ typedef StringMap<State>::const_iterator const_iterator;
+
+ const_iterator begin() {
+ return Symbols.begin();
+ }
+
+ const_iterator end() {
+ return Symbols.end();
+ }
+
+ RecordStreamer(MCContext &Context) : MCStreamer(Context, 0) {}
+
+ virtual void EmitInstruction(const MCInst &Inst) {
+ // Scan for values.
+ for (unsigned i = Inst.getNumOperands(); i--; )
+ if (Inst.getOperand(i).isExpr())
+ AddValueSymbols(Inst.getOperand(i).getExpr());
+ }
+ virtual void EmitLabel(MCSymbol *Symbol) {
+ Symbol->setSection(*getCurrentSection().first);
+ markDefined(*Symbol);
+ }
+ virtual void EmitDebugLabel(MCSymbol *Symbol) {
+ EmitLabel(Symbol);
+ }
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ // FIXME: should we handle aliases?
+ markDefined(*Symbol);
+ }
+ virtual bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+ if (Attribute == MCSA_Global)
+ markGlobal(*Symbol);
+ return true;
+ }
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size , unsigned ByteAlignment) {
+ markDefined(*Symbol);
+ }
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ markDefined(*Symbol);
+ }
+
+ virtual void EmitBundleAlignMode(unsigned AlignPow2) {}
+ virtual void EmitBundleLock(bool AlignToEnd) {}
+ virtual void EmitBundleUnlock() {}
+
+ // Noop calls.
+ virtual void ChangeSection(const MCSection *Section,
+ const MCExpr *Subsection) {}
+ virtual void InitToTextSection() {}
+ virtual void InitSections() {}
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+ virtual void EmitThumbFunc(MCSymbol *Func) {}
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {}
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
+ virtual void EmitCOFFSymbolType(int Type) {}
+ virtual void EndCOFFSymbolDef() {}
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {}
+ virtual void EmitBytes(StringRef Data) {}
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) {}
+ virtual void EmitULEB128Value(const MCExpr *Value) {}
+ virtual void EmitSLEB128Value(const MCExpr *Value) {}
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {}
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {}
+ virtual bool EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value ) { return false; }
+ virtual void EmitFileDirective(StringRef Filename) {}
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize) {}
+ virtual void FinishImpl() {}
+ virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+ RecordProcEnd(Frame);
+ }
+ };
+} // end anonymous namespace
+
+/// addAsmGlobalSymbols - Add global symbols from module-level ASM to the
+/// defined or undefined lists.
+bool LTOModule::addAsmGlobalSymbols(std::string &errMsg) {
+ const std::string &inlineAsm = _module->getModuleInlineAsm();
+ if (inlineAsm.empty())
+ return false;
+
+ OwningPtr<RecordStreamer> Streamer(new RecordStreamer(_context));
+ MemoryBuffer *Buffer = MemoryBuffer::getMemBuffer(inlineAsm);
+ SourceMgr SrcMgr;
+ SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+ OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr,
+ _context, *Streamer,
+ *_target->getMCAsmInfo()));
+ const Target &T = _target->getTarget();
+ OwningPtr<MCInstrInfo> MCII(T.createMCInstrInfo());
+ OwningPtr<MCSubtargetInfo>
+ STI(T.createMCSubtargetInfo(_target->getTargetTriple(),
+ _target->getTargetCPU(),
+ _target->getTargetFeatureString()));
+ OwningPtr<MCTargetAsmParser> TAP(T.createMCAsmParser(*STI, *Parser.get(), *MCII));
+ if (!TAP) {
+ errMsg = "target " + std::string(T.getName()) +
+ " does not define AsmParser.";
+ return true;
+ }
+
+ Parser->setTargetParser(*TAP);
+ if (Parser->Run(false))
+ return true;
+
+ for (RecordStreamer::const_iterator i = Streamer->begin(),
+ e = Streamer->end(); i != e; ++i) {
+ StringRef Key = i->first();
+ RecordStreamer::State Value = i->second;
+ if (Value == RecordStreamer::DefinedGlobal)
+ addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_DEFAULT);
+ else if (Value == RecordStreamer::Defined)
+ addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_INTERNAL);
+ else if (Value == RecordStreamer::Global ||
+ Value == RecordStreamer::Used)
+ addAsmGlobalSymbolUndef(Key.data());
+ }
+
+ return false;
+}
+
+/// isDeclaration - Return 'true' if the global value is a declaration.
+static bool isDeclaration(const GlobalValue &V) {
+ if (V.hasAvailableExternallyLinkage())
+ return true;
+
+ if (V.isMaterializable())
+ return false;
+
+ return V.isDeclaration();
+}
+
+/// parseSymbols - Parse the symbols from the module and model-level ASM and add
+/// them to either the defined or undefined lists.
+bool LTOModule::parseSymbols(std::string &errMsg) {
+ // add functions
+ for (Module::iterator f = _module->begin(), e = _module->end(); f != e; ++f) {
+ if (isDeclaration(*f))
+ addPotentialUndefinedSymbol(f, true);
+ else
+ addDefinedFunctionSymbol(f);
+ }
+
+ // add data
+ for (Module::global_iterator v = _module->global_begin(),
+ e = _module->global_end(); v != e; ++v) {
+ if (isDeclaration(*v))
+ addPotentialUndefinedSymbol(v, false);
+ else
+ addDefinedDataSymbol(v);
+ }
+
+ // add asm globals
+ if (addAsmGlobalSymbols(errMsg))
+ return true;
+
+ // add aliases
+ for (Module::alias_iterator a = _module->alias_begin(),
+ e = _module->alias_end(); a != e; ++a) {
+ if (isDeclaration(*a->getAliasedGlobal()))
+ // Is an alias to a declaration.
+ addPotentialUndefinedSymbol(a, false);
+ else
+ addDefinedDataSymbol(a);
+ }
+
+ // make symbols for all undefines
+ for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(),
+ e = _undefines.end(); u != e; ++u) {
+ // If this symbol also has a definition, then don't make an undefine because
+ // it is a tentative definition.
+ if (_defines.count(u->getKey())) continue;
+ NameAndAttributes info = u->getValue();
+ _symbols.push_back(info);
+ }
+
+ return false;
+}
diff --git a/lib/LTO/Makefile b/lib/LTO/Makefile
new file mode 100644
index 0000000..55e2a5e
--- /dev/null
+++ b/lib/LTO/Makefile
@@ -0,0 +1,15 @@
+##===- lib/LTO/Makefile ------------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMLTO
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index ce02c37..8f2200e 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -22,8 +22,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
-
-#include <ctype.h>
+#include <cctype>
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -706,7 +705,11 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
if (DstGV->getVisibility() != SrcGV->getVisibility())
return emitError(
"Appending variables with different visibility need to be linked!");
-
+
+ if (DstGV->hasUnnamedAddr() != SrcGV->hasUnnamedAddr())
+ return emitError(
+ "Appending variables with different unnamed_addr need to be linked!");
+
if (DstGV->getSection() != SrcGV->getSection())
return emitError(
"Appending variables with different section name need to be linked!");
@@ -748,6 +751,7 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
GlobalValue *DGV = getLinkedToGlobal(SGV);
llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
+ bool HasUnnamedAddr = SGV->hasUnnamedAddr();
if (DGV) {
// Concatenation of appending linkage variables is magic and handled later.
@@ -762,6 +766,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
if (getLinkageResult(DGV, SGV, NewLinkage, NV, LinkFromSrc))
return true;
NewVisibility = NV;
+ HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
// If we're not linking from the source, then keep the definition that we
// have.
@@ -770,10 +775,11 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
DGVar->setConstant(true);
-
- // Set calculated linkage and visibility.
+
+ // Set calculated linkage, visibility and unnamed_addr.
DGV->setLinkage(NewLinkage);
DGV->setVisibility(*NewVisibility);
+ DGV->setUnnamedAddr(HasUnnamedAddr);
// Make sure to remember this mapping.
ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType()));
@@ -799,6 +805,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
copyGVAttributes(NewDGV, SGV);
if (NewVisibility)
NewDGV->setVisibility(*NewVisibility);
+ NewDGV->setUnnamedAddr(HasUnnamedAddr);
if (DGV) {
DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
@@ -815,6 +822,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
bool ModuleLinker::linkFunctionProto(Function *SF) {
GlobalValue *DGV = getLinkedToGlobal(SF);
llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
+ bool HasUnnamedAddr = SF->hasUnnamedAddr();
if (DGV) {
GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
@@ -823,11 +831,13 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
if (getLinkageResult(DGV, SF, NewLinkage, NV, LinkFromSrc))
return true;
NewVisibility = NV;
+ HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
if (!LinkFromSrc) {
// Set calculated linkage
DGV->setLinkage(NewLinkage);
DGV->setVisibility(*NewVisibility);
+ DGV->setUnnamedAddr(HasUnnamedAddr);
// Make sure to remember this mapping.
ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType()));
@@ -855,6 +865,7 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
copyGVAttributes(NewDF, SF);
if (NewVisibility)
NewDF->setVisibility(*NewVisibility);
+ NewDF->setUnnamedAddr(HasUnnamedAddr);
if (DGV) {
// Any uses of DF need to change to NewDF, with cast.
@@ -1250,6 +1261,13 @@ bool ModuleLinker::run() {
// Skip if not linking from source.
if (DoNotLinkFromSource.count(SF)) continue;
+ Function *DF = cast<Function>(ValueMap[SF]);
+ if (SF->hasPrefixData()) {
+ // Link in the prefix data.
+ DF->setPrefixData(MapValue(
+ SF->getPrefixData(), ValueMap, RF_None, &TypeMap, &ValMaterializer));
+ }
+
// Skip if no body (function is external) or materialize.
if (SF->isDeclaration()) {
if (!SF->isMaterializable())
@@ -1258,7 +1276,7 @@ bool ModuleLinker::run() {
return true;
}
- linkFunctionBody(cast<Function>(ValueMap[SF]), SF);
+ linkFunctionBody(DF, SF);
SF->Dematerialize();
}
@@ -1286,6 +1304,14 @@ bool ModuleLinker::run() {
continue;
Function *DF = cast<Function>(ValueMap[SF]);
+ if (SF->hasPrefixData()) {
+ // Link in the prefix data.
+ DF->setPrefixData(MapValue(SF->getPrefixData(),
+ ValueMap,
+ RF_None,
+ &TypeMap,
+ &ValMaterializer));
+ }
// Materialize if necessary.
if (SF->isDeclaration()) {
@@ -1326,6 +1352,11 @@ Linker::Linker(Module *M) : Composite(M) {
Linker::~Linker() {
}
+void Linker::deleteModule() {
+ delete Composite;
+ Composite = NULL;
+}
+
bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg) {
ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src, Mode);
if (TheLinker.run()) {
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 89e2aaf..fa844ef 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMMC
MCAsmInfo.cpp
MCAsmInfoCOFF.cpp
MCAsmInfoDarwin.cpp
+ MCAsmInfoELF.cpp
MCAsmStreamer.cpp
MCAssembler.cpp
MCAtom.cpp
@@ -25,6 +26,7 @@ add_llvm_library(LLVMMC
MCMachOStreamer.cpp
MCMachObjectTargetWriter.cpp
MCModule.cpp
+ MCModuleYAML.cpp
MCNullStreamer.cpp
MCObjectFileInfo.cpp
MCObjectDisassembler.cpp
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 2db59ac..9899bb2 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -73,10 +73,6 @@ class ELFObjectWriter : public MCObjectWriter {
// Support lexicographic sorting.
bool operator<(const ELFSymbolData &RHS) const {
- if (MCELF::GetType(*SymbolData) == ELF::STT_FILE)
- return true;
- if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE)
- return false;
return SymbolData->getSymbol().getName() <
RHS.SymbolData->getSymbol().getName();
}
@@ -98,6 +94,7 @@ class ELFObjectWriter : public MCObjectWriter {
/// @{
SmallString<256> StringTable;
+ std::vector<uint64_t> FileSymbolData;
std::vector<ELFSymbolData> LocalSymbolData;
std::vector<ELFSymbolData> ExternalSymbolData;
std::vector<ELFSymbolData> UndefinedSymbolData;
@@ -551,7 +548,7 @@ void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF,
uint8_t Type = MCELF::GetType(Data);
uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift);
- // Other and Visibility share the same byte with Visability using the lower
+ // Other and Visibility share the same byte with Visibility using the lower
// 2 bits
uint8_t Visibility = MCELF::GetVisibility(OrigData);
uint8_t Other = MCELF::getOther(OrigData) <<
@@ -590,8 +587,15 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
// The first entry is the undefined symbol entry.
WriteSymbolEntry(SymtabF, ShndxF, 0, 0, 0, 0, 0, 0, false);
+ for (unsigned i = 0, e = FileSymbolData.size(); i != e; ++i) {
+ WriteSymbolEntry(SymtabF, ShndxF, FileSymbolData[i],
+ ELF::STT_FILE | ELF::STB_LOCAL, 0, 0,
+ ELF::STV_DEFAULT, ELF::SHN_ABS, true);
+ }
+
// Write the symbol table entries.
- LastLocalSymbolIndex = LocalSymbolData.size() + 1;
+ LastLocalSymbolIndex = FileSymbolData.size() + LocalSymbolData.size() + 1;
+
for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) {
ELFSymbolData &MSD = LocalSymbolData[i];
WriteSymbol(SymtabF, ShndxF, MSD, Layout);
@@ -880,6 +884,20 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
// FIXME: We could optimize suffixes in strtab in the same way we
// optimize them in shstrtab.
+ for (MCAssembler::const_file_name_iterator it = Asm.file_names_begin(),
+ ie = Asm.file_names_end();
+ it != ie;
+ ++it) {
+ StringRef Name = *it;
+ uint64_t &Entry = StringIndexMap[Name];
+ if (!Entry) {
+ Entry = StringTable.size();
+ StringTable += Name;
+ StringTable += '\x00';
+ }
+ FileSymbolData.push_back(Entry);
+ }
+
// Add the data for the symbols.
for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
ie = Asm.symbol_end(); it != ie; ++it) {
@@ -964,7 +982,7 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
// Set the symbol indices. Local symbols must come before all other
// symbols with non-local bindings.
- unsigned Index = 1;
+ unsigned Index = FileSymbolData.size() + 1;
for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
LocalSymbolData[i].SymbolData->setIndex(Index++);
@@ -1005,8 +1023,8 @@ void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm,
unsigned Flags = 0;
StringRef Group = "";
if (Section.getFlags() & ELF::SHF_GROUP) {
- Flags = ELF::SHF_GROUP;
- Group = Section.getGroup()->getName();
+ Flags = ELF::SHF_GROUP;
+ Group = Section.getGroup()->getName();
}
const MCSectionELF *RelaSection =
@@ -1073,7 +1091,7 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
else if (entry.Index < 0)
entry.Index = getSymbolIndexInSymbolTable(Asm, entry.Symbol);
else
- entry.Index += LocalSymbolData.size();
+ entry.Index += FileSymbolData.size() + LocalSymbolData.size();
if (is64Bit()) {
String64(*F, entry.r_offset);
if (TargetObjectWriter->isN64()) {
@@ -1104,11 +1122,10 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
}
}
-static int compareBySuffix(const void *a, const void *b) {
- const MCSectionELF *secA = *static_cast<const MCSectionELF* const *>(a);
- const MCSectionELF *secB = *static_cast<const MCSectionELF* const *>(b);
- const StringRef &NameA = secA->getSectionName();
- const StringRef &NameB = secB->getSectionName();
+static int compareBySuffix(const MCSectionELF *const *a,
+ const MCSectionELF *const *b) {
+ const StringRef &NameA = (*a)->getSectionName();
+ const StringRef &NameB = (*b)->getSectionName();
const unsigned sizeA = NameA.size();
const unsigned sizeB = NameB.size();
const unsigned len = std::min(sizeA, sizeB);
@@ -1299,10 +1316,12 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
// Remove ".rel" and ".rela" prefixes.
unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5;
StringRef SectionName = Section.getSectionName().substr(SecNameLen);
+ StringRef GroupName =
+ Section.getGroup() ? Section.getGroup()->getName() : "";
- InfoSection = Asm.getContext().getELFSection(SectionName,
- ELF::SHT_PROGBITS, 0,
- SectionKind::getReadOnly());
+ InfoSection = Asm.getContext().getELFSection(SectionName, ELF::SHT_PROGBITS,
+ 0, SectionKind::getReadOnly(),
+ 0, GroupName);
sh_info = SectionIndexMap.lookup(InfoSection);
break;
}
@@ -1352,11 +1371,12 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC,
SectionKind::getText()));
} else if (SecName.startswith(".ARM.exidx")) {
- sh_link = SectionIndexMap.lookup(
- Asm.getContext().getELFSection(SecName.substr(sizeof(".ARM.exidx") - 1),
- ELF::SHT_PROGBITS,
- ELF::SHF_EXECINSTR | ELF::SHF_ALLOC,
- SectionKind::getText()));
+ StringRef GroupName =
+ Section.getGroup() ? Section.getGroup()->getName() : "";
+ sh_link = SectionIndexMap.lookup(Asm.getContext().getELFSection(
+ SecName.substr(sizeof(".ARM.exidx") - 1), ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, SectionKind::getText(), 0,
+ GroupName));
}
}
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 84e4075..28f1c95 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -35,7 +35,7 @@ MCAsmInfo::MCAsmInfo() {
LinkerRequiresNonEmptyDwarfLines = false;
MaxInstLength = 4;
MinInstAlignment = 1;
- PCSymbol = "$";
+ DollarIsPC = false;
SeparatorString = ";";
CommentColumn = 40;
CommentString = "#";
@@ -50,10 +50,7 @@ MCAsmInfo::MCAsmInfo() {
Code32Directive = ".code32";
Code64Directive = ".code64";
AssemblerDialect = 0;
- AllowQuotesInName = false;
- AllowNameToStartWithDigit = false;
- AllowPeriodsInName = true;
- AllowUTF8 = true;
+ AllowAtInName = false;
UseDataRegionDirectives = false;
ZeroDirective = "\t.zero\t";
AsciiDirective = "\t.ascii\t";
@@ -76,8 +73,8 @@ MCAsmInfo::MCAsmInfo() {
LCOMMDirectiveAlignmentType = LCOMM::NoAlignment;
HasDotTypeDotSizeDirective = true;
HasSingleParameterDotFile = true;
+ HasIdentDirective = false;
HasNoDeadStrip = false;
- HasSymbolResolver = false;
WeakRefDirective = 0;
WeakDefDirective = 0;
LinkOnceDirective = 0;
@@ -87,7 +84,6 @@ MCAsmInfo::MCAsmInfo() {
HasLEB128 = false;
SupportsDebugInformation = false;
ExceptionsType = ExceptionHandling::None;
- DwarfUsesInlineInfoSection = false;
DwarfUsesRelocationsAcrossSections = true;
DwarfRegNumForCFI = false;
HasMicrosoftFastStdCallMangling = false;
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 33350d9..9d9f98e 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -43,7 +43,6 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
void MCAsmInfoMicrosoft::anchor() { }
MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() {
- AllowQuotesInName = true;
}
void MCAsmInfoGNUCOFF::anchor() { }
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index a0e3eba..704c816 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -26,7 +26,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
GlobalPrefix = "_";
PrivateGlobalPrefix = "L";
LinkerPrivateGlobalPrefix = "l";
- AllowQuotesInName = true;
HasSingleParameterDotFile = false;
HasSubsectionsViaSymbols = true;
@@ -58,7 +57,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
- HasSymbolResolver = true;
DwarfUsesRelocationsAcrossSections = false;
}
diff --git a/lib/MC/MCAsmInfoELF.cpp b/lib/MC/MCAsmInfoELF.cpp
new file mode 100644
index 0000000..8cf4e4f
--- /dev/null
+++ b/lib/MC/MCAsmInfoELF.cpp
@@ -0,0 +1,23 @@
+//===-- MCAsmInfoELF.cpp - ELF asm properties -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on ELF-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoELF.h"
+using namespace llvm;
+
+void MCAsmInfoELF::anchor() { }
+
+MCAsmInfoELF::MCAsmInfoELF() {
+ HasIdentDirective = true;
+ WeakRefDirective = "\t.weak\t";
+}
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 781e400..ca49f8f 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -25,6 +25,7 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
@@ -65,17 +66,15 @@ private:
virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame);
public:
- MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
- bool isVerboseAsm, bool useLoc, bool useCFI,
- bool useDwarfDirectory,
- MCInstPrinter *printer, MCCodeEmitter *emitter,
- MCAsmBackend *asmbackend,
- bool showInst)
- : MCStreamer(SK_AsmStreamer, Context), OS(os), MAI(Context.getAsmInfo()),
- InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
- CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
- ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI),
- UseDwarfDirectory(useDwarfDirectory) {
+ MCAsmStreamer(MCContext &Context, MCTargetStreamer *TargetStreamer,
+ formatted_raw_ostream &os, bool isVerboseAsm, bool useLoc,
+ bool useCFI, bool useDwarfDirectory, MCInstPrinter *printer,
+ MCCodeEmitter *emitter, MCAsmBackend *asmbackend, bool showInst)
+ : MCStreamer(Context, TargetStreamer), OS(os), MAI(Context.getAsmInfo()),
+ InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
+ CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
+ ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI),
+ UseDwarfDirectory(useDwarfDirectory) {
if (InstPrinter && IsVerboseAsm)
InstPrinter->setCommentStream(CommentStream);
}
@@ -154,7 +153,7 @@ public:
virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
const MCSymbol *Label);
- virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+ virtual bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
@@ -214,6 +213,7 @@ public:
unsigned Isa, unsigned Discriminator,
StringRef FileName);
+ virtual void EmitIdent(StringRef IdentString);
virtual void EmitCFISections(bool EH, bool Debug);
virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset);
virtual void EmitCFIDefCfaOffset(int64_t Offset);
@@ -229,6 +229,7 @@ public:
virtual void EmitCFISignalFrame();
virtual void EmitCFIUndefined(int64_t Register);
virtual void EmitCFIRegister(int64_t Register1, int64_t Register2);
+ virtual void EmitCFIWindowSave();
virtual void EmitWin64EHStartProc(const MCSymbol *Symbol);
virtual void EmitWin64EHEndProc();
@@ -245,17 +246,6 @@ public:
virtual void EmitWin64EHPushFrame(bool Code);
virtual void EmitWin64EHEndProlog();
- virtual void EmitFnStart();
- virtual void EmitFnEnd();
- virtual void EmitCantUnwind();
- virtual void EmitPersonality(const MCSymbol *Personality);
- virtual void EmitHandlerData();
- virtual void EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
- virtual void EmitPad(int64_t Offset);
- virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool);
-
- virtual void EmitTCEntry(const MCSymbol &S);
-
virtual void EmitInstruction(const MCInst &Inst);
virtual void EmitBundleAlignMode(unsigned AlignPow2);
@@ -265,15 +255,9 @@ public:
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
- virtual void EmitRawText(StringRef String);
+ virtual void EmitRawTextImpl(StringRef String);
virtual void FinishImpl();
-
- /// @}
-
- static bool classof(const MCStreamer *S) {
- return S->getKind() == SK_AsmStreamer;
- }
};
} // end anonymous namespace.
@@ -436,7 +420,7 @@ void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
}
-void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) {
switch (Attribute) {
case MCSA_Invalid: llvm_unreachable("Invalid symbol attribute");
@@ -447,11 +431,12 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_ELF_TypeCommon: /// .type _foo, STT_COMMON # aka @common
case MCSA_ELF_TypeNoType: /// .type _foo, STT_NOTYPE # aka @notype
case MCSA_ELF_TypeGnuUniqueObject: /// .type _foo, @gnu_unique_object
- assert(MAI->hasDotTypeDotSizeDirective() && "Symbol Attr not supported");
+ if (!MAI->hasDotTypeDotSizeDirective())
+ return false; // Symbol attribute not supported
OS << "\t.type\t" << *Symbol << ','
<< ((MAI->getCommentString()[0] != '@') ? '@' : '%');
switch (Attribute) {
- default: llvm_unreachable("Unknown ELF .type");
+ default: return false;
case MCSA_ELF_TypeFunction: OS << "function"; break;
case MCSA_ELF_TypeIndFunction: OS << "gnu_indirect_function"; break;
case MCSA_ELF_TypeObject: OS << "object"; break;
@@ -461,7 +446,7 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_ELF_TypeGnuUniqueObject: OS << "gnu_unique_object"; break;
}
EmitEOL();
- return;
+ return true;
case MCSA_Global: // .globl/.global
OS << MAI->getGlobalDirective();
FlagMap[Symbol] |= EHGlobal;
@@ -491,6 +476,8 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
OS << *Symbol;
EmitEOL();
+
+ return true;
}
void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
@@ -530,6 +517,9 @@ void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {
+ // Common symbols do not belong to any actual section.
+ AssignSection(Symbol, NULL);
+
OS << "\t.comm\t" << *Symbol << ',' << Size;
if (ByteAlignment != 0) {
if (MAI->getCOMMDirectiveAlignmentIsInBytes())
@@ -546,6 +536,9 @@ void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
/// @param Size - The size of the common symbol.
void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlign) {
+ // Common symbols do not belong to any actual section.
+ AssignSection(Symbol, NULL);
+
OS << "\t.lcomm\t" << *Symbol << ',' << Size;
if (ByteAlign > 1) {
switch (MAI->getLCOMMDirectiveAlignmentType()) {
@@ -565,6 +558,9 @@ void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
+ if (Symbol)
+ AssignSection(Symbol, Section);
+
// Note: a .zerofill directive does not switch sections.
OS << ".zerofill ";
@@ -585,6 +581,8 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
// e.g. _a.
void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
+ AssignSection(Symbol, Section);
+
assert(Symbol != NULL && "Symbol shouldn't be NULL!");
// Instead of using the Section we'll just use the shortcut.
// This is a mach-o specific directive and section.
@@ -654,7 +652,6 @@ void MCAsmStreamer::EmitBytes(StringRef Data) {
OS << MAI->getAsciiDirective();
}
- OS << ' ';
PrintQuotedString(Data, OS);
EmitEOL();
}
@@ -884,6 +881,13 @@ void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
EmitEOL();
}
+void MCAsmStreamer::EmitIdent(StringRef IdentString) {
+ assert(MAI->hasIdentDirective() && ".ident directive not supported");
+ OS << "\t.ident\t";
+ PrintQuotedString(IdentString, OS);
+ EmitEOL();
+}
+
void MCAsmStreamer::EmitCFISections(bool EH, bool Debug) {
MCStreamer::EmitCFISections(EH, Debug);
@@ -1085,6 +1089,16 @@ void MCAsmStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) {
EmitEOL();
}
+void MCAsmStreamer::EmitCFIWindowSave() {
+ MCStreamer::EmitCFIWindowSave();
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_window_save";
+ EmitEOL();
+}
+
void MCAsmStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
MCStreamer::EmitWin64EHStartProc(Symbol);
@@ -1290,73 +1304,6 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
}
}
-void MCAsmStreamer::EmitFnStart() {
- OS << "\t.fnstart";
- EmitEOL();
-}
-
-void MCAsmStreamer::EmitFnEnd() {
- OS << "\t.fnend";
- EmitEOL();
-}
-
-void MCAsmStreamer::EmitCantUnwind() {
- OS << "\t.cantunwind";
- EmitEOL();
-}
-
-void MCAsmStreamer::EmitHandlerData() {
- OS << "\t.handlerdata";
- EmitEOL();
-}
-
-void MCAsmStreamer::EmitPersonality(const MCSymbol *Personality) {
- OS << "\t.personality " << Personality->getName();
- EmitEOL();
-}
-
-void MCAsmStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) {
- OS << "\t.setfp\t";
- InstPrinter->printRegName(OS, FpReg);
- OS << ", ";
- InstPrinter->printRegName(OS, SpReg);
- if (Offset)
- OS << ", #" << Offset;
- EmitEOL();
-}
-
-void MCAsmStreamer::EmitPad(int64_t Offset) {
- OS << "\t.pad\t#" << Offset;
- EmitEOL();
-}
-
-void MCAsmStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
- bool isVector) {
- assert(RegList.size() && "RegList should not be empty");
- if (isVector)
- OS << "\t.vsave\t{";
- else
- OS << "\t.save\t{";
-
- InstPrinter->printRegName(OS, RegList[0]);
-
- for (unsigned i = 1, e = RegList.size(); i != e; ++i) {
- OS << ", ";
- InstPrinter->printRegName(OS, RegList[i]);
- }
-
- OS << "}";
- EmitEOL();
-}
-
-void MCAsmStreamer::EmitTCEntry(const MCSymbol &S) {
- OS << "\t.tc ";
- OS << S.getName();
- OS << "[TC],";
- OS << S.getName();
- EmitEOL();
-}
-
void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
assert(getCurrentSection().first &&
"Cannot emit contents before setting section!");
@@ -1399,7 +1346,7 @@ void MCAsmStreamer::EmitBundleUnlock() {
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
-void MCAsmStreamer::EmitRawText(StringRef String) {
+void MCAsmStreamer::EmitRawTextImpl(StringRef String) {
if (!String.empty() && String.back() == '\n')
String = String.substr(0, String.size()-1);
OS << String;
@@ -1418,14 +1365,16 @@ void MCAsmStreamer::FinishImpl() {
MCGenDwarfInfo::Emit(this, LineSectionSymbol);
if (!UseCFI)
- EmitFrames(false);
+ EmitFrames(AsmBackend.get(), false);
}
+
MCStreamer *llvm::createAsmStreamer(MCContext &Context,
+ MCTargetStreamer *TargetStreamer,
formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useLoc,
- bool useCFI, bool useDwarfDirectory,
- MCInstPrinter *IP, MCCodeEmitter *CE,
- MCAsmBackend *MAB, bool ShowInst) {
- return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI,
- useDwarfDirectory, IP, CE, MAB, ShowInst);
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory, MCInstPrinter *IP,
+ MCCodeEmitter *CE, MCAsmBackend *MAB,
+ bool ShowInst) {
+ return new MCAsmStreamer(Context, TargetStreamer, OS, isVerboseAsm, useLoc,
+ useCFI, useDwarfDirectory, IP, CE, MAB, ShowInst);
}
diff --git a/lib/MC/MCAtom.cpp b/lib/MC/MCAtom.cpp
index 2626b39..bc353cd 100644
--- a/lib/MC/MCAtom.cpp
+++ b/lib/MC/MCAtom.cpp
@@ -14,6 +14,9 @@
using namespace llvm;
+// Pin the vtable to this file.
+void MCAtom::anchor() {}
+
void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) {
Parent->remap(this, NewBegin, NewEnd);
}
@@ -44,7 +47,7 @@ void MCAtom::remapForSplit(uint64_t SplitPt,
void MCDataAtom::addData(const MCData &D) {
Data.push_back(D);
- if (Data.size() > Begin - End)
+ if (Data.size() > End + 1 - Begin)
remap(Begin, End + 1);
}
@@ -72,8 +75,8 @@ MCDataAtom *MCDataAtom::split(uint64_t SplitPt) {
// MCTextAtom
void MCTextAtom::addInst(const MCInst &I, uint64_t Size) {
- if (NextInstAddress > End)
- remap(Begin, NextInstAddress);
+ if (NextInstAddress + Size - 1 > End)
+ remap(Begin, NextInstAddress + Size - 1);
Insts.push_back(MCDecodedInst(I, NextInstAddress, Size));
NextInstAddress += Size;
}
@@ -106,5 +109,6 @@ MCTextAtom *MCTextAtom::split(uint64_t SplitPt) {
std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts));
Insts.erase(I, Insts.end());
+ Parent->splitBasicBlocksForAtom(this, RightAtom);
return RightAtom;
}
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 6e4d82b..3b45d16 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -25,12 +25,16 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
+
+#include <map>
+
using namespace llvm;
-typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
-typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
-typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
+typedef std::pair<std::string, std::string> SectionGroupPair;
+typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
+typedef std::map<SectionGroupPair, const MCSectionELF *> ELFUniqueMapTy;
+typedef std::map<SectionGroupPair, const MCSectionCOFF *> COFFUniqueMapTy;
MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
const MCObjectFileInfo *mofi, const SourceMgr *mgr,
@@ -249,8 +253,9 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags,
ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
// Do the lookup, if we have a hit, return it.
- StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section);
- if (Entry.getValue()) return Entry.getValue();
+ std::pair<ELFUniqueMapTy::iterator, bool> Entry = Map.insert(
+ std::make_pair(SectionGroupPair(Section, Group), (MCSectionELF *)0));
+ if (!Entry.second) return Entry.first->second;
// Possibly refine the entry size first.
if (!EntrySize) {
@@ -261,9 +266,9 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags,
if (!Group.empty())
GroupSym = GetOrCreateSymbol(Group);
- MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags,
- Kind, EntrySize, GroupSym);
- Entry.setValue(Result);
+ MCSectionELF *Result = new (*this) MCSectionELF(
+ Entry.first->first.first, Type, Flags, Kind, EntrySize, GroupSym);
+ Entry.first->second = Result;
return Result;
}
@@ -274,32 +279,51 @@ const MCSectionELF *MCContext::CreateELFGroupSection() {
return Result;
}
-const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
- unsigned Characteristics,
- SectionKind Kind, int Selection,
- const MCSectionCOFF *Assoc) {
+const MCSectionCOFF *
+MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
+ SectionKind Kind, StringRef COMDATSymName,
+ int Selection, const MCSectionCOFF *Assoc) {
if (COFFUniquingMap == 0)
COFFUniquingMap = new COFFUniqueMapTy();
COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
// Do the lookup, if we have a hit, return it.
- StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section);
- if (Entry.getValue()) return Entry.getValue();
- MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(),
- Characteristics,
- Selection, Assoc, Kind);
+ SectionGroupPair P(Section, COMDATSymName);
+ std::pair<COFFUniqueMapTy::iterator, bool> Entry =
+ Map.insert(std::make_pair(P, (MCSectionCOFF *)0));
+ COFFUniqueMapTy::iterator Iter = Entry.first;
+ if (!Entry.second)
+ return Iter->second;
- Entry.setValue(Result);
+ const MCSymbol *COMDATSymbol = NULL;
+ if (!COMDATSymName.empty())
+ COMDATSymbol = GetOrCreateSymbol(COMDATSymName);
+
+ MCSectionCOFF *Result =
+ new (*this) MCSectionCOFF(Iter->first.first, Characteristics,
+ COMDATSymbol, Selection, Assoc, Kind);
+
+ Iter->second = Result;
return Result;
}
+const MCSectionCOFF *
+MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
+ SectionKind Kind) {
+ return getCOFFSection(Section, Characteristics, Kind, "", 0);
+}
+
const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
if (COFFUniquingMap == 0)
COFFUniquingMap = new COFFUniqueMapTy();
COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
- return Map.lookup(Section);
+ SectionGroupPair P(Section, "");
+ COFFUniqueMapTy::iterator Iter = Map.find(P);
+ if (Iter == Map.end())
+ return 0;
+ return Iter->second;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index ecc7aff..a0066c8 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -20,6 +20,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolizer.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
@@ -101,6 +102,7 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
if (!DC)
return 0;
+ DC->setCPU(CPU);
return DC;
}
@@ -143,6 +145,112 @@ public:
};
} // end anonymous namespace
+/// \brief Emits the comments that are stored in \p DC comment stream.
+/// Each comment in the comment stream must end with a newline.
+static void emitComments(LLVMDisasmContext *DC,
+ formatted_raw_ostream &FormattedOS) {
+ // Flush the stream before taking its content.
+ DC->CommentStream.flush();
+ StringRef Comments = DC->CommentsToEmit.str();
+ // Get the default information for printing a comment.
+ const MCAsmInfo *MAI = DC->getAsmInfo();
+ const char *CommentBegin = MAI->getCommentString();
+ unsigned CommentColumn = MAI->getCommentColumn();
+ bool IsFirst = true;
+ while (!Comments.empty()) {
+ if (!IsFirst)
+ FormattedOS << '\n';
+ // Emit a line of comments.
+ FormattedOS.PadToColumn(CommentColumn);
+ size_t Position = Comments.find('\n');
+ FormattedOS << CommentBegin << ' ' << Comments.substr(0, Position);
+ // Move after the newline character.
+ Comments = Comments.substr(Position+1);
+ IsFirst = false;
+ }
+ FormattedOS.flush();
+
+ // Tell the comment stream that the vector changed underneath it.
+ DC->CommentsToEmit.clear();
+ DC->CommentStream.resync();
+}
+
+/// \brief Gets latency information for \p Inst form the itinerary
+/// scheduling model, based on \p DC information.
+/// \return The maximum expected latency over all the operands or -1
+/// if no information are available.
+static int getItineraryLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
+ const int NoInformationAvailable = -1;
+
+ // Check if we have a CPU to get the itinerary information.
+ if (DC->getCPU().empty())
+ return NoInformationAvailable;
+
+ // Get itinerary information.
+ const MCSubtargetInfo *STI = DC->getSubtargetInfo();
+ InstrItineraryData IID = STI->getInstrItineraryForCPU(DC->getCPU());
+ // Get the scheduling class of the requested instruction.
+ const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode());
+ unsigned SCClass = Desc.getSchedClass();
+
+ int Latency = 0;
+ for (unsigned OpIdx = 0, OpIdxEnd = Inst.getNumOperands(); OpIdx != OpIdxEnd;
+ ++OpIdx)
+ Latency = std::max(Latency, IID.getOperandCycle(SCClass, OpIdx));
+
+ return Latency;
+}
+
+/// \brief Gets latency information for \p Inst, based on \p DC information.
+/// \return The maximum expected latency over all the definitions or -1
+/// if no information are available.
+static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
+ // Try to compute scheduling information.
+ const MCSubtargetInfo *STI = DC->getSubtargetInfo();
+ const MCSchedModel *SCModel = STI->getSchedModel();
+ const int NoInformationAvailable = -1;
+
+ // Check if we have a scheduling model for instructions.
+ if (!SCModel || !SCModel->hasInstrSchedModel())
+ // Try to fall back to the itinerary model if we do not have a
+ // scheduling model.
+ return getItineraryLatency(DC, Inst);
+
+ // Get the scheduling class of the requested instruction.
+ const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode());
+ unsigned SCClass = Desc.getSchedClass();
+ const MCSchedClassDesc *SCDesc = SCModel->getSchedClassDesc(SCClass);
+ // Resolving the variant SchedClass requires an MI to pass to
+ // SubTargetInfo::resolveSchedClass.
+ if (!SCDesc || !SCDesc->isValid() || SCDesc->isVariant())
+ return NoInformationAvailable;
+
+ // Compute output latency.
+ int Latency = 0;
+ for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+ DefIdx != DefEnd; ++DefIdx) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc,
+ DefIdx);
+ Latency = std::max(Latency, WLEntry->Cycles);
+ }
+
+ return Latency;
+}
+
+
+/// \brief Emits latency information in DC->CommentStream for \p Inst, based
+/// on the information available in \p DC.
+static void emitLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
+ int Latency = getLatency(DC, Inst);
+
+ // Report only interesting latency.
+ if (Latency < 2)
+ return;
+
+ DC->CommentStream << "Latency: " << Latency << '\n';
+}
+
//
// LLVMDisasmInstruction() disassembles a single instruction using the
// disassembler context specified in the parameter DC. The bytes of the
@@ -167,8 +275,10 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
const MCDisassembler *DisAsm = DC->getDisAsm();
MCInstPrinter *IP = DC->getIP();
MCDisassembler::DecodeStatus S;
+ SmallVector<char, 64> InsnStr;
+ raw_svector_ostream Annotations(InsnStr);
S = DisAsm->getInstruction(Inst, Size, MemoryObject, PC,
- /*REMOVE*/ nulls(), DC->CommentStream);
+ /*REMOVE*/ nulls(), Annotations);
switch (S) {
case MCDisassembler::Fail:
case MCDisassembler::SoftFail:
@@ -176,17 +286,18 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
return 0;
case MCDisassembler::Success: {
- DC->CommentStream.flush();
- StringRef Comments = DC->CommentsToEmit.str();
+ Annotations.flush();
+ StringRef AnnotationsStr = Annotations.str();
SmallVector<char, 64> InsnStr;
raw_svector_ostream OS(InsnStr);
- IP->printInst(&Inst, OS, Comments);
- OS.flush();
+ formatted_raw_ostream FormattedOS(OS);
+ IP->printInst(&Inst, FormattedOS, AnnotationsStr);
- // Tell the comment stream that the vector changed underneath it.
- DC->CommentsToEmit.clear();
- DC->CommentStream.resync();
+ if (DC->getOptions() & LLVMDisassembler_Option_PrintLatency)
+ emitLatency(DC, Inst);
+
+ emitComments(DC, FormattedOS);
assert(OutStringSize != 0 && "Output buffer cannot be zero size");
size_t OutputSize = std::min(OutStringSize-1, InsnStr.size());
@@ -208,12 +319,14 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){
LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
MCInstPrinter *IP = DC->getIP();
IP->setUseMarkup(1);
+ DC->addOptions(LLVMDisassembler_Option_UseMarkup);
Options &= ~LLVMDisassembler_Option_UseMarkup;
}
if (Options & LLVMDisassembler_Option_PrintImmHex){
LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
MCInstPrinter *IP = DC->getIP();
IP->setPrintImmHex(1);
+ DC->addOptions(LLVMDisassembler_Option_PrintImmHex);
Options &= ~LLVMDisassembler_Option_PrintImmHex;
}
if (Options & LLVMDisassembler_Option_AsmPrinterVariant){
@@ -229,8 +342,21 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){
AsmPrinterVariant, *MAI, *MII, *MRI, *STI);
if (IP) {
DC->setIP(IP);
+ DC->addOptions(LLVMDisassembler_Option_AsmPrinterVariant);
Options &= ~LLVMDisassembler_Option_AsmPrinterVariant;
}
}
+ if (Options & LLVMDisassembler_Option_SetInstrComments) {
+ LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+ MCInstPrinter *IP = DC->getIP();
+ IP->setCommentStream(DC->CommentStream);
+ DC->addOptions(LLVMDisassembler_Option_SetInstrComments);
+ Options &= ~LLVMDisassembler_Option_SetInstrComments;
+ }
+ if (Options & LLVMDisassembler_Option_PrintLatency) {
+ LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+ DC->addOptions(LLVMDisassembler_Option_PrintLatency);
+ Options &= ~LLVMDisassembler_Option_PrintLatency;
+ }
return (Options == 0);
}
diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h
index 6eb59d0..4855af2 100644
--- a/lib/MC/MCDisassembler/Disassembler.h
+++ b/lib/MC/MCDisassembler/Disassembler.h
@@ -73,6 +73,10 @@ private:
llvm::OwningPtr<const llvm::MCDisassembler> DisAsm;
// The instruction printer for the target architecture.
llvm::OwningPtr<llvm::MCInstPrinter> IP;
+ // The options used to set up the disassembler.
+ uint64_t Options;
+ // The CPU string.
+ std::string CPU;
public:
// Comment stream and backing vector.
@@ -90,6 +94,7 @@ public:
MCInstPrinter *iP) : TripleName(tripleName),
DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo),
SymbolLookUp(symbolLookUp), TheTarget(theTarget),
+ Options(0),
CommentStream(CommentsToEmit) {
MAI.reset(mAI);
MRI.reset(mRI);
@@ -114,6 +119,10 @@ public:
const MCSubtargetInfo *getSubtargetInfo() const { return MSI.get(); }
MCInstPrinter *getIP() { return IP.get(); }
void setIP(MCInstPrinter *NewIP) { IP.reset(NewIP); }
+ uint64_t getOptions() const { return Options; }
+ void addOptions(uint64_t Options) { this->Options |= Options; }
+ StringRef getCPU() const { return CPU; }
+ void setCPU(const char *CPU) { this->CPU = CPU; }
};
} // namespace llvm
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 195bbfe..1e5c2e3 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -873,9 +873,7 @@ namespace {
void setSectionStart(const MCSymbol *Label) { SectionStart = Label; }
- /// EmitCompactUnwind - Emit the unwind information in a compact way. If
- /// we're successful, return 'true'. Otherwise, return 'false' and it will
- /// emit the normal CIE and FDE.
+ /// EmitCompactUnwind - Emit the unwind information in a compact way.
void EmitCompactUnwind(MCStreamer &streamer,
const MCDwarfFrameInfo &frame);
@@ -889,7 +887,7 @@ namespace {
const MCSymbol &cieStart,
const MCDwarfFrameInfo &frame);
void EmitCFIInstructions(MCStreamer &streamer,
- const std::vector<MCCFIInstruction> &Instrs,
+ ArrayRef<MCCFIInstruction> Instrs,
MCSymbol *BaseLabel);
void EmitCFIInstruction(MCStreamer &Streamer,
const MCCFIInstruction &Instr);
@@ -961,6 +959,10 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
Streamer.EmitULEB128IntValue(Reg2);
return;
}
+ case MCCFIInstruction::OpWindowSave: {
+ Streamer.EmitIntValue(dwarf::DW_CFA_GNU_window_save, 1);
+ return;
+ }
case MCCFIInstruction::OpUndefined: {
unsigned Reg = Instr.getRegister();
if (VerboseAsm) {
@@ -1091,7 +1093,7 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
/// EmitFrameMoves - Emit frame instructions to describe the layout of the
/// frame.
void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
- const std::vector<MCCFIInstruction> &Instrs,
+ ArrayRef<MCCFIInstruction> Instrs,
MCSymbol *BaseLabel) {
for (unsigned i = 0, N = Instrs.size(); i < N; ++i) {
const MCCFIInstruction &Instr = Instrs[i];
@@ -1113,9 +1115,7 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
}
}
-/// EmitCompactUnwind - Emit the unwind information in a compact way. If we're
-/// successful, return 'true'. Otherwise, return 'false' and it will emit the
-/// normal CIE and FDE.
+/// EmitCompactUnwind - Emit the unwind information in a compact way.
void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
const MCDwarfFrameInfo &Frame) {
MCContext &Context = Streamer.getContext();
@@ -1419,9 +1419,10 @@ namespace llvm {
};
}
-void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
- bool UsingCFI,
- bool IsEH) {
+void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, MCAsmBackend *MAB,
+ bool UsingCFI, bool IsEH) {
+ Streamer.generateCompactUnwindEncodings(MAB);
+
MCContext &Context = Streamer.getContext();
const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
FrameEmitterImpl Emitter(UsingCFI, IsEH);
diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp
index 560cdbc..ebb189e 100644
--- a/lib/MC/MCELF.cpp
+++ b/lib/MC/MCELF.cpp
@@ -36,8 +36,8 @@ unsigned MCELF::GetBinding(const MCSymbolData &SD) {
void MCELF::SetType(MCSymbolData &SD, unsigned Type) {
assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
- Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
- Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
+ Type == ELF::STT_COMMON || Type == ELF::STT_TLS ||
+ Type == ELF::STT_GNU_IFUNC);
uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
@@ -47,8 +47,7 @@ unsigned MCELF::GetType(const MCSymbolData &SD) {
uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift;
assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
- Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
- Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
+ Type == ELF::STT_COMMON || Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
return Type;
}
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index ec7397d..0c39e4a 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -42,9 +42,9 @@ const MCSymbol *MCELFObjectTargetWriter::undefinedExplicitRelSym(const MCValue &
// ELF doesn't require relocations to be in any order. We sort by the r_offset,
// just to match gnu as for easier comparison. The use type and index is an
// arbitrary way of making the sort deterministic.
-static int cmpRel(const void *AP, const void *BP) {
- const ELFRelocationEntry &A = *(const ELFRelocationEntry *)AP;
- const ELFRelocationEntry &B = *(const ELFRelocationEntry *)BP;
+static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
+ const ELFRelocationEntry &A = *AP;
+ const ELFRelocationEntry &B = *BP;
if (A.r_offset != B.r_offset)
return B.r_offset - A.r_offset;
if (B.Type != A.Type)
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 6e5ff50..e806cb9 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELF.h"
@@ -96,6 +97,9 @@ void MCELFStreamer::EmitDebugLabel(MCSymbol *Symbol) {
}
void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ // Let the target do whatever target specific stuff it needs to do.
+ getAssembler().getBackend().handleAssemblerFlag(Flag);
+ // Do any generic stuff we need to do.
switch (Flag) {
case MCAF_SyntaxUnified: return; // no-op here.
case MCAF_Code16: return; // Change parsing mode; no-op here.
@@ -148,8 +152,8 @@ static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) {
return T2;
}
-void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
- MCSymbolAttr Attribute) {
+bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
// Indirect symbols are handled differently, to match how 'as' handles
// them. This makes writing matching .o files easier.
if (Attribute == MCSA_IndirectSymbol) {
@@ -159,7 +163,7 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
ISD.Symbol = Symbol;
ISD.SectionData = getCurrentSectionData();
getAssembler().getIndirectSymbols().push_back(ISD);
- return;
+ return true;
}
// Adding a symbol attribute always introduces the symbol, note that an
@@ -182,7 +186,7 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_WeakDefAutoPrivate:
case MCSA_Invalid:
case MCSA_IndirectSymbol:
- llvm_unreachable("Invalid symbol attribute for ELF!");
+ return false;
case MCSA_NoDeadStrip:
case MCSA_ELF_TypeGnuUniqueObject:
@@ -251,6 +255,8 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCELF::SetVisibility(SD, ELF::STV_INTERNAL);
break;
}
+
+ return true;
}
void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -270,7 +276,8 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
ELF::SHF_WRITE |
ELF::SHF_ALLOC,
SectionKind::getBSS());
- Symbol->setSection(*Section);
+
+ AssignSection(Symbol, Section);
struct LocalCommon L = {&SD, Size, ByteAlignment};
LocalCommons.push_back(L);
@@ -313,20 +320,29 @@ void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
ValueSize, MaxBytesToEmit);
}
-
-// Add a symbol for the file name of this module. This is the second
-// entry in the module's symbol table (the first being the null symbol).
+// Add a symbol for the file name of this module. They start after the
+// null symbol and don't count as normal symbol, i.e. a non-STT_FILE symbol
+// with the same name may appear.
void MCELFStreamer::EmitFileDirective(StringRef Filename) {
- MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
- Symbol->setSection(*getCurrentSection().first);
- Symbol->setAbsolute();
-
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-
- SD.setFlags(ELF_STT_File | ELF_STB_Local | ELF_STV_Default);
+ getAssembler().addFileName(Filename);
+}
+
+void MCELFStreamer::EmitIdent(StringRef IdentString) {
+ const MCSection *Comment = getAssembler().getContext().getELFSection(
+ ".comment", ELF::SHT_PROGBITS, ELF::SHF_MERGE | ELF::SHF_STRINGS,
+ SectionKind::getReadOnly(), 1, "");
+ PushSection();
+ SwitchSection(Comment);
+ if (!SeenIdent) {
+ EmitIntValue(0, 1);
+ SeenIdent = true;
+ }
+ EmitBytes(IdentString);
+ EmitIntValue(0, 1);
+ PopSection();
}
-void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
+void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
switch (expr->getKind()) {
case MCExpr::Target:
cast<MCTargetExpr>(expr)->fixELFSymbolsInTLSFixups(getAssembler());
@@ -525,9 +541,7 @@ void MCELFStreamer::EmitBundleUnlock() {
SD->setBundleLockState(MCSectionData::NotBundleLocked);
}
-void MCELFStreamer::FinishImpl() {
- EmitFrames(true);
-
+void MCELFStreamer::Flush() {
for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
e = LocalCommons.end();
i != e; ++i) {
@@ -548,17 +562,23 @@ void MCELFStreamer::FinishImpl() {
SectData.setAlignment(ByteAlignment);
}
- this->MCObjectStreamer::FinishImpl();
+ LocalCommons.clear();
}
-void MCELFStreamer::EmitTCEntry(const MCSymbol &S) {
- // Creates a R_PPC64_TOC relocation
- MCObjectStreamer::EmitSymbolValue(&S, 8);
+
+void MCELFStreamer::FinishImpl() {
+ EmitFrames(NULL, true);
+
+ Flush();
+
+ this->MCObjectStreamer::FinishImpl();
}
-MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *CE,
- bool RelaxAll, bool NoExecStack) {
- MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE);
+MCStreamer *llvm::createELFStreamer(MCContext &Context,
+ MCTargetStreamer *Streamer,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *CE, bool RelaxAll,
+ bool NoExecStack) {
+ MCELFStreamer *S = new MCELFStreamer(Context, Streamer, MAB, OS, CE);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
if (NoExecStack)
diff --git a/lib/MC/MCExternalSymbolizer.cpp b/lib/MC/MCExternalSymbolizer.cpp
index 47ef6c4..ca368b2 100644
--- a/lib/MC/MCExternalSymbolizer.cpp
+++ b/lib/MC/MCExternalSymbolizer.cpp
@@ -63,6 +63,8 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
}
if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
cStream << "symbol stub for: " << ReferenceName;
+ else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message)
+ cStream << "Objc message: " << ReferenceName;
if (!Name && !IsBranch)
return false;
}
@@ -132,6 +134,8 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
// literal pool's entry if the referenced address is that of a symbol. Or it
// will return a pointer to a literal 'C' string if the referenced address of
// the literal pool's entry is an address into a section with C string literals.
+// Or if the reference is to an Objective-C data structure it will return a
+// specific reference type for it and a string.
void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
int64_t Value,
uint64_t Address) {
@@ -139,9 +143,26 @@ void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
const char *ReferenceName;
(void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
- if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr ||
- ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
- cStream << "literal pool for: " << ReferenceName;
+ if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
+ cStream << "literal pool symbol address: " << ReferenceName;
+ else if(ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
+ cStream << "literal pool for: \"" << ReferenceName << "\"";
+ else if(ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
+ cStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
+ else if(ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_Objc_Message)
+ cStream << "Objc message: " << ReferenceName;
+ else if(ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
+ cStream << "Objc message ref: " << ReferenceName;
+ else if(ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
+ cStream << "Objc selector ref: " << ReferenceName;
+ else if(ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
+ cStream << "Objc class ref: " << ReferenceName;
}
}
diff --git a/lib/MC/MCFunction.cpp b/lib/MC/MCFunction.cpp
index 2665d3e..767e1e0 100644
--- a/lib/MC/MCFunction.cpp
+++ b/lib/MC/MCFunction.cpp
@@ -9,15 +9,15 @@
#include "llvm/MC/MCFunction.h"
#include "llvm/MC/MCAtom.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCModule.h"
#include <algorithm>
using namespace llvm;
// MCFunction
-MCFunction::MCFunction(StringRef Name)
- : Name(Name)
+MCFunction::MCFunction(StringRef Name, MCModule *Parent)
+ : Name(Name), ParentModule(Parent)
{}
MCFunction::~MCFunction() {
@@ -26,18 +26,32 @@ MCFunction::~MCFunction() {
}
MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) {
- Blocks.push_back(new MCBasicBlock(TA, this));
- return *Blocks.back();
+ MCBasicBlock *MCBB = new MCBasicBlock(TA, this);
+ Blocks.push_back(MCBB);
+ return *MCBB;
+}
+
+MCBasicBlock *MCFunction::find(uint64_t StartAddr) {
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if ((*I)->getInsts()->getBeginAddr() == StartAddr)
+ return *I;
+ return 0;
+}
+
+const MCBasicBlock *MCFunction::find(uint64_t StartAddr) const {
+ return const_cast<MCFunction *>(this)->find(StartAddr);
}
// MCBasicBlock
MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent)
- : Insts(&Insts), Parent(Parent)
-{}
+ : Insts(&Insts), Parent(Parent) {
+ getParent()->getParent()->trackBBForAtom(&Insts, this);
+}
void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) {
- Successors.push_back(MCBB);
+ if (!isSuccessor(MCBB))
+ Successors.push_back(MCBB);
}
bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const {
@@ -46,10 +60,22 @@ bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const {
}
void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) {
- Predecessors.push_back(MCBB);
+ if (!isPredecessor(MCBB))
+ Predecessors.push_back(MCBB);
}
bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const {
return std::find(Predecessors.begin(), Predecessors.end(),
MCBB) != Predecessors.end();
}
+
+void MCBasicBlock::splitBasicBlock(MCBasicBlock *SplitBB) {
+ assert(Insts->getEndAddr() + 1 == SplitBB->Insts->getBeginAddr() &&
+ "Splitting unrelated basic blocks!");
+ SplitBB->addPredecessor(this);
+ assert(SplitBB->Successors.empty() &&
+ "Split basic block shouldn't already have successors!");
+ SplitBB->Successors = Successors;
+ Successors.clear();
+ addSuccessor(SplitBB);
+}
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 6a452c8..ba71245 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -31,9 +31,13 @@ void MCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
void MCInstPrinter::printAnnotation(raw_ostream &OS, StringRef Annot) {
if (!Annot.empty()) {
- if (CommentStream)
+ if (CommentStream) {
(*CommentStream) << Annot;
- else
+ // By definition (see MCInstPrinter.h), CommentStream must end with
+ // a newline after each comment.
+ if (Annot.back() != '\n')
+ (*CommentStream) << '\n';
+ } else
OS << " " << MAI.getCommentString() << " " << Annot;
}
}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 0729b7a..2924dcd 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -1,3 +1,4 @@
+//===-- MCMachOStreamer.cpp - MachO Streamer ------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -36,7 +37,7 @@ private:
public:
MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
MCCodeEmitter *Emitter)
- : MCObjectStreamer(SK_MachOStreamer, Context, MAB, OS, Emitter) {}
+ : MCObjectStreamer(Context, 0, MAB, OS, Emitter) {}
/// @name MCStreamer Interface
/// @{
@@ -51,7 +52,7 @@ public:
virtual void EmitLinkerOptions(ArrayRef<std::string> Options);
virtual void EmitDataRegion(MCDataRegionType Kind);
virtual void EmitThumbFunc(MCSymbol *Func);
- virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+ virtual bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment);
@@ -81,16 +82,14 @@ public:
// FIXME: Just ignore the .file; it isn't important enough to fail the
// entire assembly.
- //report_fatal_error("unsupported directive: '.file'");
+ // report_fatal_error("unsupported directive: '.file'");
}
- virtual void FinishImpl();
-
- /// @}
-
- static bool classof(const MCStreamer *S) {
- return S->getKind() == SK_MachOStreamer;
+ virtual void EmitIdent(StringRef IdentString) {
+ llvm_unreachable("macho doesn't support this directive");
}
+
+ virtual void FinishImpl();
};
} // end anonymous namespace.
@@ -122,7 +121,7 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
// isSymbolLinkerVisible uses the section.
- Symbol->setSection(*getCurrentSection().first);
+ AssignSection(Symbol, getCurrentSection().first);
// We have to create a new fragment if this is an atom defining symbol,
// fragments cannot span atoms.
if (getAssembler().isSymbolLinkerVisible(*Symbol))
@@ -217,7 +216,7 @@ void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
SD.setFlags(SD.getFlags() | SF_ThumbFunc);
}
-void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) {
// Indirect symbols are handled differently, to match how 'as' handles
// them. This makes writing matching .o files easier.
@@ -228,7 +227,7 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
ISD.Symbol = Symbol;
ISD.SectionData = getCurrentSectionData();
getAssembler().getIndirectSymbols().push_back(ISD);
- return;
+ return true;
}
// Adding a symbol attribute always introduces the symbol, note that an
@@ -257,7 +256,7 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_Protected:
case MCSA_Weak:
case MCSA_Local:
- llvm_unreachable("Invalid symbol attribute for Mach-O!");
+ return false;
case MCSA_Global:
SD.setExternal(true);
@@ -309,6 +308,8 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference);
break;
}
+
+ return true;
}
void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
@@ -324,6 +325,8 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
// FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ AssignSection(Symbol, NULL);
+
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
SD.setExternal(true);
SD.setCommon(Size, ByteAlignment);
@@ -360,7 +363,7 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
SD.setFragment(F);
- Symbol->setSection(*Section);
+ AssignSection(Symbol, Section);
// Update the maximum alignment on the zero fill section if necessary.
if (ByteAlignment > SectData.getAlignment())
@@ -393,7 +396,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
}
void MCMachOStreamer::FinishImpl() {
- EmitFrames(true);
+ EmitFrames(&getAssembler().getBackend(), true);
// We have to set the fragment atom associations so we can relax properly for
// Mach-O.
diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp
index 5890b4b..7e9e18a 100644
--- a/lib/MC/MCModule.cpp
+++ b/lib/MC/MCModule.cpp
@@ -18,6 +18,10 @@ static bool AtomComp(const MCAtom *L, uint64_t Addr) {
return L->getEndAddr() < Addr;
}
+static bool AtomCompInv(uint64_t Addr, const MCAtom *R) {
+ return Addr < R->getEndAddr();
+}
+
void MCModule::map(MCAtom *NewAtom) {
uint64_t Begin = NewAtom->Begin;
@@ -54,9 +58,13 @@ void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) {
assert(*I == Atom && "Previous atom mapping was invalid!");
Atoms.erase(I);
+ // FIXME: special case NewBegin == Atom->Begin
+
// Insert the new mapping.
AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(),
NewBegin, AtomComp);
+ assert((NewI == atom_end() || (*NewI)->getBeginAddr() > Atom->End)
+ && "Offset range already occupied!");
Atoms.insert(NewI, Atom);
// Update the atom internal bounds.
@@ -73,18 +81,55 @@ const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const {
}
MCAtom *MCModule::findAtomContaining(uint64_t Addr) {
- AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
- Addr, AtomComp);
- if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
+ return const_cast<MCAtom*>(
+ const_cast<const MCModule *>(this)->findAtomContaining(Addr));
+}
+
+const MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) const {
+ AtomListTy::const_iterator I = std::upper_bound(atom_begin(), atom_end(),
+ Addr, AtomCompInv);
+ if (I != atom_end())
return *I;
return 0;
}
-MCFunction *MCModule::createFunction(const StringRef &Name) {
- Functions.push_back(new MCFunction(Name));
+MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) {
+ return const_cast<MCAtom*>(
+ const_cast<const MCModule *>(this)->findFirstAtomAfter(Addr));
+}
+
+MCFunction *MCModule::createFunction(StringRef Name) {
+ Functions.push_back(new MCFunction(Name, this));
return Functions.back();
}
+static bool CompBBToAtom(MCBasicBlock *BB, const MCTextAtom *Atom) {
+ return BB->getInsts() < Atom;
+}
+
+void MCModule::splitBasicBlocksForAtom(const MCTextAtom *TA,
+ const MCTextAtom *NewTA) {
+ BBsByAtomTy::iterator
+ I = std::lower_bound(BBsByAtom.begin(), BBsByAtom.end(),
+ TA, CompBBToAtom);
+ for (; I != BBsByAtom.end() && (*I)->getInsts() == TA; ++I) {
+ MCBasicBlock *BB = *I;
+ MCBasicBlock *NewBB = &BB->getParent()->createBlock(*NewTA);
+ BB->splitBasicBlock(NewBB);
+ }
+}
+
+void MCModule::trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BB) {
+ assert(Atom == BB->getInsts() && "Text atom doesn't back the basic block!");
+ BBsByAtomTy::iterator I = std::lower_bound(BBsByAtom.begin(),
+ BBsByAtom.end(),
+ Atom, CompBBToAtom);
+ for (; I != BBsByAtom.end() && (*I)->getInsts() == Atom; ++I)
+ if (*I == BB)
+ return;
+ BBsByAtom.insert(I, BB);
+}
+
MCModule::~MCModule() {
for (AtomListTy::iterator AI = atom_begin(),
AE = atom_end();
diff --git a/lib/MC/MCModuleYAML.cpp b/lib/MC/MCModuleYAML.cpp
new file mode 100644
index 0000000..e2de578
--- /dev/null
+++ b/lib/MC/MCModuleYAML.cpp
@@ -0,0 +1,461 @@
+//===- MCModuleYAML.cpp - MCModule YAMLIO implementation ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes for handling the YAML representation of MCModule.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCModuleYAML.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCFunction.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Object/YAML.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <vector>
+
+namespace llvm {
+
+namespace {
+
+// This class is used to map opcode and register names to enum values.
+//
+// There are at least 3 obvious ways to do this:
+// 1- Generate an MII/MRI method using a tablegen StringMatcher
+// 2- Write an MII/MRI method using std::lower_bound and the assumption that
+// the enums are sorted (starting at a fixed value).
+// 3- Do the matching manually as is done here.
+//
+// Why 3?
+// 1- A StringMatcher function for thousands of entries would incur
+// a non-negligible binary size overhead.
+// 2- The lower_bound comparators would be somewhat involved and aren't
+// obviously reusable (see LessRecordRegister in llvm/TableGen/Record.h)
+// 3- This isn't actually something useful outside tests (but the same argument
+// can be made against having {MII,MRI}::getName).
+//
+// If this becomes useful outside this specific situation, feel free to do
+// the Right Thing (tm) and move the functionality to MII/MRI.
+//
+class InstrRegInfoHolder {
+ typedef StringMap<unsigned, BumpPtrAllocator> EnumValByNameTy;
+ EnumValByNameTy InstEnumValueByName;
+ EnumValByNameTy RegEnumValueByName;
+
+public:
+ const MCInstrInfo &MII;
+ const MCRegisterInfo &MRI;
+ InstrRegInfoHolder(const MCInstrInfo &MII, const MCRegisterInfo &MRI)
+ : InstEnumValueByName(NextPowerOf2(MII.getNumOpcodes())),
+ RegEnumValueByName(NextPowerOf2(MRI.getNumRegs())), MII(MII), MRI(MRI) {
+ for (int i = 0, e = MII.getNumOpcodes(); i != e; ++i)
+ InstEnumValueByName[MII.getName(i)] = i;
+ for (int i = 0, e = MRI.getNumRegs(); i != e; ++i)
+ RegEnumValueByName[MRI.getName(i)] = i;
+ }
+
+ bool matchRegister(StringRef Name, unsigned &Reg) {
+ EnumValByNameTy::const_iterator It = RegEnumValueByName.find(Name);
+ if (It == RegEnumValueByName.end())
+ return false;
+ Reg = It->getValue();
+ return true;
+ }
+ bool matchOpcode(StringRef Name, unsigned &Opc) {
+ EnumValByNameTy::const_iterator It = InstEnumValueByName.find(Name);
+ if (It == InstEnumValueByName.end())
+ return false;
+ Opc = It->getValue();
+ return true;
+ }
+};
+
+} // end unnamed namespace
+
+namespace MCModuleYAML {
+
+LLVM_YAML_STRONG_TYPEDEF(unsigned, OpcodeEnum)
+
+struct Operand {
+ MCOperand MCOp;
+};
+
+struct Inst {
+ OpcodeEnum Opcode;
+ std::vector<Operand> Operands;
+ uint64_t Size;
+};
+
+struct Atom {
+ MCAtom::AtomKind Type;
+ yaml::Hex64 StartAddress;
+ uint64_t Size;
+
+ std::vector<Inst> Insts;
+ object::yaml::BinaryRef Data;
+};
+
+struct BasicBlock {
+ yaml::Hex64 Address;
+ std::vector<yaml::Hex64> Preds;
+ std::vector<yaml::Hex64> Succs;
+};
+
+struct Function {
+ StringRef Name;
+ std::vector<BasicBlock> BasicBlocks;
+};
+
+struct Module {
+ std::vector<Atom> Atoms;
+ std::vector<Function> Functions;
+};
+
+} // end namespace MCModuleYAML
+} // end namespace llvm
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex64)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::MCModuleYAML::Operand)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Inst)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Atom)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::BasicBlock)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Function)
+
+namespace llvm {
+
+namespace yaml {
+
+template <> struct ScalarEnumerationTraits<MCAtom::AtomKind> {
+ static void enumeration(IO &IO, MCAtom::AtomKind &Kind);
+};
+
+template <> struct MappingTraits<MCModuleYAML::Atom> {
+ static void mapping(IO &IO, MCModuleYAML::Atom &A);
+};
+
+template <> struct MappingTraits<MCModuleYAML::Inst> {
+ static void mapping(IO &IO, MCModuleYAML::Inst &I);
+};
+
+template <> struct MappingTraits<MCModuleYAML::BasicBlock> {
+ static void mapping(IO &IO, MCModuleYAML::BasicBlock &BB);
+};
+
+template <> struct MappingTraits<MCModuleYAML::Function> {
+ static void mapping(IO &IO, MCModuleYAML::Function &Fn);
+};
+
+template <> struct MappingTraits<MCModuleYAML::Module> {
+ static void mapping(IO &IO, MCModuleYAML::Module &M);
+};
+
+template <> struct ScalarTraits<MCModuleYAML::Operand> {
+ static void output(const MCModuleYAML::Operand &, void *,
+ llvm::raw_ostream &);
+ static StringRef input(StringRef, void *, MCModuleYAML::Operand &);
+};
+
+template <> struct ScalarTraits<MCModuleYAML::OpcodeEnum> {
+ static void output(const MCModuleYAML::OpcodeEnum &, void *,
+ llvm::raw_ostream &);
+ static StringRef input(StringRef, void *, MCModuleYAML::OpcodeEnum &);
+};
+
+void ScalarEnumerationTraits<MCAtom::AtomKind>::enumeration(
+ IO &IO, MCAtom::AtomKind &Value) {
+ IO.enumCase(Value, "Text", MCAtom::TextAtom);
+ IO.enumCase(Value, "Data", MCAtom::DataAtom);
+}
+
+void MappingTraits<MCModuleYAML::Atom>::mapping(IO &IO, MCModuleYAML::Atom &A) {
+ IO.mapRequired("StartAddress", A.StartAddress);
+ IO.mapRequired("Size", A.Size);
+ IO.mapRequired("Type", A.Type);
+ if (A.Type == MCAtom::TextAtom)
+ IO.mapRequired("Content", A.Insts);
+ else if (A.Type == MCAtom::DataAtom)
+ IO.mapRequired("Content", A.Data);
+}
+
+void MappingTraits<MCModuleYAML::Inst>::mapping(IO &IO, MCModuleYAML::Inst &I) {
+ IO.mapRequired("Inst", I.Opcode);
+ IO.mapRequired("Size", I.Size);
+ IO.mapRequired("Ops", I.Operands);
+}
+
+void
+MappingTraits<MCModuleYAML::BasicBlock>::mapping(IO &IO,
+ MCModuleYAML::BasicBlock &BB) {
+ IO.mapRequired("Address", BB.Address);
+ IO.mapRequired("Preds", BB.Preds);
+ IO.mapRequired("Succs", BB.Succs);
+}
+
+void MappingTraits<MCModuleYAML::Function>::mapping(IO &IO,
+ MCModuleYAML::Function &F) {
+ IO.mapRequired("Name", F.Name);
+ IO.mapRequired("BasicBlocks", F.BasicBlocks);
+}
+
+void MappingTraits<MCModuleYAML::Module>::mapping(IO &IO,
+ MCModuleYAML::Module &M) {
+ IO.mapRequired("Atoms", M.Atoms);
+ IO.mapOptional("Functions", M.Functions);
+}
+
+void
+ScalarTraits<MCModuleYAML::Operand>::output(const MCModuleYAML::Operand &Val,
+ void *Ctx, raw_ostream &Out) {
+ InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
+
+ // FIXME: Doesn't support FPImm and expr/inst, but do these make sense?
+ if (Val.MCOp.isImm())
+ Out << "I" << Val.MCOp.getImm();
+ else if (Val.MCOp.isReg())
+ Out << "R" << IRI->MRI.getName(Val.MCOp.getReg());
+ else
+ llvm_unreachable("Trying to output invalid MCOperand!");
+}
+
+StringRef
+ScalarTraits<MCModuleYAML::Operand>::input(StringRef Scalar, void *Ctx,
+ MCModuleYAML::Operand &Val) {
+ InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
+ char Type = 0;
+ if (Scalar.size() >= 1)
+ Type = Scalar.front();
+ if (Type != 'R' && Type != 'I')
+ return "Operand must start with 'R' (register) or 'I' (immediate).";
+ if (Type == 'R') {
+ unsigned Reg;
+ if (!IRI->matchRegister(Scalar.substr(1), Reg))
+ return "Invalid register name.";
+ Val.MCOp = MCOperand::CreateReg(Reg);
+ } else if (Type == 'I') {
+ int64_t RIVal;
+ if (Scalar.substr(1).getAsInteger(10, RIVal))
+ return "Invalid immediate value.";
+ Val.MCOp = MCOperand::CreateImm(RIVal);
+ } else {
+ Val.MCOp = MCOperand();
+ }
+ return StringRef();
+}
+
+void ScalarTraits<MCModuleYAML::OpcodeEnum>::output(
+ const MCModuleYAML::OpcodeEnum &Val, void *Ctx, raw_ostream &Out) {
+ InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
+ Out << IRI->MII.getName(Val);
+}
+
+StringRef
+ScalarTraits<MCModuleYAML::OpcodeEnum>::input(StringRef Scalar, void *Ctx,
+ MCModuleYAML::OpcodeEnum &Val) {
+ InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
+ unsigned Opc;
+ if (!IRI->matchOpcode(Scalar, Opc))
+ return "Invalid instruction opcode.";
+ Val = Opc;
+ return "";
+}
+
+} // end namespace yaml
+
+namespace {
+
+class MCModule2YAML {
+ const MCModule &MCM;
+ MCModuleYAML::Module YAMLModule;
+ void dumpAtom(const MCAtom *MCA);
+ void dumpFunction(const MCFunction *MCF);
+ void dumpBasicBlock(const MCBasicBlock *MCBB);
+
+public:
+ MCModule2YAML(const MCModule &MCM);
+ MCModuleYAML::Module &getYAMLModule();
+};
+
+class YAML2MCModule {
+ MCModule &MCM;
+
+public:
+ YAML2MCModule(MCModule &MCM);
+ StringRef parse(const MCModuleYAML::Module &YAMLModule);
+};
+
+} // end unnamed namespace
+
+MCModule2YAML::MCModule2YAML(const MCModule &MCM) : MCM(MCM), YAMLModule() {
+ for (MCModule::const_atom_iterator AI = MCM.atom_begin(), AE = MCM.atom_end();
+ AI != AE; ++AI)
+ dumpAtom(*AI);
+ for (MCModule::const_func_iterator FI = MCM.func_begin(), FE = MCM.func_end();
+ FI != FE; ++FI)
+ dumpFunction(*FI);
+}
+
+void MCModule2YAML::dumpAtom(const MCAtom *MCA) {
+ YAMLModule.Atoms.resize(YAMLModule.Atoms.size() + 1);
+ MCModuleYAML::Atom &A = YAMLModule.Atoms.back();
+ A.Type = MCA->getKind();
+ A.StartAddress = MCA->getBeginAddr();
+ A.Size = MCA->getEndAddr() - MCA->getBeginAddr() + 1;
+ if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(MCA)) {
+ const size_t InstCount = TA->size();
+ A.Insts.resize(InstCount);
+ for (size_t i = 0; i != InstCount; ++i) {
+ const MCDecodedInst &MCDI = TA->at(i);
+ A.Insts[i].Opcode = MCDI.Inst.getOpcode();
+ A.Insts[i].Size = MCDI.Size;
+ const unsigned OpCount = MCDI.Inst.getNumOperands();
+ A.Insts[i].Operands.resize(OpCount);
+ for (unsigned oi = 0; oi != OpCount; ++oi)
+ A.Insts[i].Operands[oi].MCOp = MCDI.Inst.getOperand(oi);
+ }
+ } else if (const MCDataAtom *DA = dyn_cast<MCDataAtom>(MCA)) {
+ A.Data = DA->getData();
+ } else {
+ llvm_unreachable("Unknown atom type.");
+ }
+}
+
+void MCModule2YAML::dumpFunction(const MCFunction *MCF) {
+ YAMLModule.Functions.resize(YAMLModule.Functions.size() + 1);
+ MCModuleYAML::Function &F = YAMLModule.Functions.back();
+ F.Name = MCF->getName();
+ for (MCFunction::const_iterator BBI = MCF->begin(), BBE = MCF->end();
+ BBI != BBE; ++BBI) {
+ const MCBasicBlock *MCBB = *BBI;
+ F.BasicBlocks.resize(F.BasicBlocks.size() + 1);
+ MCModuleYAML::BasicBlock &BB = F.BasicBlocks.back();
+ BB.Address = MCBB->getInsts()->getBeginAddr();
+ for (MCBasicBlock::pred_const_iterator PI = MCBB->pred_begin(),
+ PE = MCBB->pred_end();
+ PI != PE; ++PI)
+ BB.Preds.push_back((*PI)->getInsts()->getBeginAddr());
+ for (MCBasicBlock::succ_const_iterator SI = MCBB->succ_begin(),
+ SE = MCBB->succ_end();
+ SI != SE; ++SI)
+ BB.Succs.push_back((*SI)->getInsts()->getBeginAddr());
+ }
+}
+
+MCModuleYAML::Module &MCModule2YAML::getYAMLModule() { return YAMLModule; }
+
+YAML2MCModule::YAML2MCModule(MCModule &MCM) : MCM(MCM) {}
+
+StringRef YAML2MCModule::parse(const MCModuleYAML::Module &YAMLModule) {
+ typedef std::vector<MCModuleYAML::Atom>::const_iterator AtomIt;
+ typedef std::vector<MCModuleYAML::Inst>::const_iterator InstIt;
+ typedef std::vector<MCModuleYAML::Operand>::const_iterator OpIt;
+
+ typedef DenseMap<uint64_t, MCTextAtom *> AddrToTextAtomTy;
+ AddrToTextAtomTy TAByAddr;
+
+ for (AtomIt AI = YAMLModule.Atoms.begin(), AE = YAMLModule.Atoms.end();
+ AI != AE; ++AI) {
+ uint64_t StartAddress = AI->StartAddress;
+ if (AI->Size == 0)
+ return "Atoms can't be empty!";
+ uint64_t EndAddress = StartAddress + AI->Size - 1;
+ switch (AI->Type) {
+ case MCAtom::TextAtom: {
+ MCTextAtom *TA = MCM.createTextAtom(StartAddress, EndAddress);
+ TAByAddr[StartAddress] = TA;
+ for (InstIt II = AI->Insts.begin(), IE = AI->Insts.end(); II != IE;
+ ++II) {
+ MCInst MI;
+ MI.setOpcode(II->Opcode);
+ for (OpIt OI = II->Operands.begin(), OE = II->Operands.end(); OI != OE;
+ ++OI)
+ MI.addOperand(OI->MCOp);
+ TA->addInst(MI, II->Size);
+ }
+ break;
+ }
+ case MCAtom::DataAtom: {
+ MCDataAtom *DA = MCM.createDataAtom(StartAddress, EndAddress);
+ SmallVector<char, 64> Data;
+ raw_svector_ostream OS(Data);
+ AI->Data.writeAsBinary(OS);
+ OS.flush();
+ for (size_t i = 0, e = Data.size(); i != e; ++i)
+ DA->addData((uint8_t)Data[i]);
+ break;
+ }
+ }
+ }
+
+ typedef std::vector<MCModuleYAML::Function>::const_iterator FuncIt;
+ typedef std::vector<MCModuleYAML::BasicBlock>::const_iterator BBIt;
+ typedef std::vector<yaml::Hex64>::const_iterator AddrIt;
+ for (FuncIt FI = YAMLModule.Functions.begin(),
+ FE = YAMLModule.Functions.end();
+ FI != FE; ++FI) {
+ MCFunction *MCFN = MCM.createFunction(FI->Name);
+ for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end();
+ BBI != BBE; ++BBI) {
+ AddrToTextAtomTy::const_iterator It = TAByAddr.find(BBI->Address);
+ if (It == TAByAddr.end())
+ return "Basic block start address doesn't match any text atom!";
+ MCFN->createBlock(*It->second);
+ }
+ for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end();
+ BBI != BBE; ++BBI) {
+ MCBasicBlock *MCBB = MCFN->find(BBI->Address);
+ if (!MCBB)
+ return "Couldn't find matching basic block in function.";
+ for (AddrIt PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE;
+ ++PI) {
+ MCBasicBlock *Pred = MCFN->find(*PI);
+ if (!Pred)
+ return "Couldn't find predecessor basic block.";
+ MCBB->addPredecessor(Pred);
+ }
+ for (AddrIt SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE;
+ ++SI) {
+ MCBasicBlock *Succ = MCFN->find(*SI);
+ if (!Succ)
+ return "Couldn't find predecessor basic block.";
+ MCBB->addSuccessor(Succ);
+ }
+ }
+ }
+ return "";
+}
+
+StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM,
+ const MCInstrInfo &MII, const MCRegisterInfo &MRI) {
+ MCModule2YAML Dumper(MCM);
+ InstrRegInfoHolder IRI(MII, MRI);
+ yaml::Output YOut(OS, (void *)&IRI);
+ YOut << Dumper.getYAMLModule();
+ return "";
+}
+
+StringRef yaml2mcmodule(OwningPtr<MCModule> &MCM, StringRef YamlContent,
+ const MCInstrInfo &MII, const MCRegisterInfo &MRI) {
+ MCM.reset(new MCModule);
+ YAML2MCModule Parser(*MCM);
+ MCModuleYAML::Module YAMLModule;
+ InstrRegInfoHolder IRI(MII, MRI);
+ yaml::Input YIn(YamlContent, (void *)&IRI);
+ YIn >> YAMLModule;
+ if (error_code ec = YIn.error())
+ return ec.message();
+ StringRef err = Parser.parse(YAMLModule);
+ if (!err.empty())
+ return err;
+ return "";
+}
+
+} // end namespace llvm
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 530c646..9b9c4aa 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -19,7 +19,7 @@ namespace {
class MCNullStreamer : public MCStreamer {
public:
- MCNullStreamer(MCContext &Context) : MCStreamer(SK_NullStreamer, Context) {}
+ MCNullStreamer(MCContext &Context) : MCStreamer(Context, 0) {}
/// @name MCStreamer Interface
/// @{
@@ -37,7 +37,7 @@ namespace {
virtual void EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
assert(getCurrentSection().first &&"Cannot emit before setting section!");
- Symbol->setSection(*getCurrentSection().first);
+ AssignSection(Symbol, getCurrentSection().first);
}
virtual void EmitDebugLabel(MCSymbol *Symbol) {
EmitLabel(Symbol);
@@ -52,7 +52,9 @@ namespace {
const MCSymbol *Label,
unsigned PointerSize) {}
- virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){}
+ virtual bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){
+ return true;
+ }
virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
@@ -107,13 +109,6 @@ namespace {
virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
RecordProcEnd(Frame);
}
-
- /// @}
-
- static bool classof(const MCStreamer *S) {
- return S->getKind() == SK_NullStreamer;
- }
-
};
}
diff --git a/lib/MC/MCObjectDisassembler.cpp b/lib/MC/MCObjectDisassembler.cpp
index 1ea6eed..16a110f 100644
--- a/lib/MC/MCObjectDisassembler.cpp
+++ b/lib/MC/MCObjectDisassembler.cpp
@@ -8,8 +8,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCObjectDisassembler.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
@@ -18,12 +18,15 @@
#include "llvm/MC/MCFunction.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCModule.h"
+#include "llvm/MC/MCObjectSymbolizer.h"
+#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/StringRefMemoryObject.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
-#include <set>
using namespace llvm;
using namespace object;
@@ -31,10 +34,55 @@ using namespace object;
MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
const MCDisassembler &Dis,
const MCInstrAnalysis &MIA)
- : Obj(Obj), Dis(Dis), MIA(MIA) {}
+ : Obj(Obj), Dis(Dis), MIA(MIA), MOS(0) {}
-MCModule *MCObjectDisassembler::buildModule(bool withCFG) {
+uint64_t MCObjectDisassembler::getEntrypoint() {
+ error_code ec;
+ for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols();
+ SI != SE; SI.increment(ec)) {
+ if (ec)
+ break;
+ StringRef Name;
+ SI->getName(Name);
+ if (Name == "main" || Name == "_main") {
+ uint64_t Entrypoint;
+ SI->getAddress(Entrypoint);
+ return getEffectiveLoadAddr(Entrypoint);
+ }
+ }
+ return 0;
+}
+
+ArrayRef<uint64_t> MCObjectDisassembler::getStaticInitFunctions() {
+ return ArrayRef<uint64_t>();
+}
+
+ArrayRef<uint64_t> MCObjectDisassembler::getStaticExitFunctions() {
+ return ArrayRef<uint64_t>();
+}
+
+MemoryObject *MCObjectDisassembler::getRegionFor(uint64_t Addr) {
+ // FIXME: Keep track of object sections.
+ return FallbackRegion.get();
+}
+
+uint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
+ return Addr;
+}
+
+uint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) {
+ return Addr;
+}
+
+MCModule *MCObjectDisassembler::buildEmptyModule() {
MCModule *Module = new MCModule;
+ Module->Entrypoint = getEntrypoint();
+ return Module;
+}
+
+MCModule *MCObjectDisassembler::buildModule(bool withCFG) {
+ MCModule *Module = buildEmptyModule();
+
buildSectionAtoms(Module);
if (withCFG)
buildCFG(Module);
@@ -58,9 +106,10 @@ void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
uint64_t SecSize; SI->getSize(SecSize);
if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize)
continue;
+ StartAddr = getEffectiveLoadAddr(StartAddr);
StringRef Contents; SI->getContents(Contents);
- StringRefMemoryObject memoryObject(Contents);
+ StringRefMemoryObject memoryObject(Contents, StartAddr);
// We don't care about things like non-file-backed sections yet.
if (Contents.size() != SecSize || !SecSize)
@@ -70,19 +119,31 @@ void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
StringRef SecName; SI->getName(SecName);
if (isText) {
- MCTextAtom *Text = Module->createTextAtom(StartAddr, EndAddr);
- Text->setName(SecName);
+ MCTextAtom *Text = 0;
+ MCDataAtom *InvalidData = 0;
+
uint64_t InstSize;
for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
+ const uint64_t CurAddr = StartAddr + Index;
MCInst Inst;
- if (Dis.getInstruction(Inst, InstSize, memoryObject, Index,
- nulls(), nulls()))
+ if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(),
+ nulls())) {
+ if (!Text) {
+ Text = Module->createTextAtom(CurAddr, CurAddr);
+ Text->setName(SecName);
+ }
Text->addInst(Inst, InstSize);
- else
- // We don't care about splitting mixed atoms either.
- llvm_unreachable("Couldn't disassemble instruction in atom.");
+ InvalidData = 0;
+ } else {
+ assert(InstSize && "getInstruction() consumed no bytes");
+ if (!InvalidData) {
+ Text = 0;
+ InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1);
+ }
+ for (uint64_t I = 0; I < InstSize; ++I)
+ InvalidData->addData(Contents[Index+I]);
+ }
}
-
} else {
MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr);
Data->setName(SecName);
@@ -94,13 +155,16 @@ void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
namespace {
struct BBInfo;
- typedef std::set<BBInfo*> BBInfoSetTy;
+ typedef SmallPtrSet<BBInfo*, 2> BBInfoSetTy;
struct BBInfo {
MCTextAtom *Atom;
MCBasicBlock *BB;
BBInfoSetTy Succs;
BBInfoSetTy Preds;
+ MCObjectDisassembler::AddressSetTy SuccAddrs;
+
+ BBInfo() : Atom(0), BB(0) {}
void addSucc(BBInfo &Succ) {
Succs.insert(&Succ);
@@ -109,13 +173,33 @@ namespace {
};
}
+static void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V) {
+ std::sort(V.begin(), V.end());
+ V.erase(std::unique(V.begin(), V.end()), V.end());
+}
+
void MCObjectDisassembler::buildCFG(MCModule *Module) {
typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
BBInfoByAddrTy BBInfos;
- typedef std::set<uint64_t> AddressSetTy;
AddressSetTy Splits;
AddressSetTy Calls;
+ error_code ec;
+ for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols();
+ SI != SE; SI.increment(ec)) {
+ if (ec)
+ break;
+ SymbolRef::Type SymType;
+ SI->getType(SymType);
+ if (SymType == SymbolRef::ST_Function) {
+ uint64_t SymAddr;
+ SI->getAddress(SymAddr);
+ SymAddr = getEffectiveLoadAddr(SymAddr);
+ Calls.push_back(SymAddr);
+ Splits.push_back(SymAddr);
+ }
+ }
+
assert(Module->func_begin() == Module->func_end()
&& "Module already has a CFG!");
@@ -125,21 +209,24 @@ void MCObjectDisassembler::buildCFG(MCModule *Module) {
AI != AE; ++AI) {
MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
if (!TA) continue;
- Calls.insert(TA->getBeginAddr());
+ Calls.push_back(TA->getBeginAddr());
BBInfos[TA->getBeginAddr()].Atom = TA;
for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
II != IE; ++II) {
if (MIA.isTerminator(II->Inst))
- Splits.insert(II->Address + II->Size);
+ Splits.push_back(II->Address + II->Size);
uint64_t Target;
if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
if (MIA.isCall(II->Inst))
- Calls.insert(Target);
- Splits.insert(Target);
+ Calls.push_back(Target);
+ Splits.push_back(Target);
}
}
}
+ RemoveDupsFromAddressVector(Splits);
+ RemoveDupsFromAddressVector(Calls);
+
// Split text atoms into basic block atoms.
for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
SI != SE; ++SI) {
@@ -185,8 +272,8 @@ void MCObjectDisassembler::buildCFG(MCModule *Module) {
// Create MCBBs.
SmallSetVector<BBInfo*, 16> Worklist;
Worklist.insert(&BBI);
- for (size_t WI = 0; WI < Worklist.size(); ++WI) {
- BBInfo *BBI = Worklist[WI];
+ for (size_t wi = 0; wi < Worklist.size(); ++wi) {
+ BBInfo *BBI = Worklist[wi];
if (!BBI->Atom)
continue;
BBI->BB = &MCFN.createBlock(*BBI->Atom);
@@ -200,17 +287,298 @@ void MCObjectDisassembler::buildCFG(MCModule *Module) {
}
// Set preds/succs.
- for (size_t WI = 0; WI < Worklist.size(); ++WI) {
- BBInfo *BBI = Worklist[WI];
+ for (size_t wi = 0; wi < Worklist.size(); ++wi) {
+ BBInfo *BBI = Worklist[wi];
MCBasicBlock *MCBB = BBI->BB;
if (!MCBB)
continue;
for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
- SI != SE; ++SI)
- MCBB->addSuccessor((*SI)->BB);
+ SI != SE; ++SI)
+ if ((*SI)->BB)
+ MCBB->addSuccessor((*SI)->BB);
for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
- PI != PE; ++PI)
- MCBB->addPredecessor((*PI)->BB);
+ PI != PE; ++PI)
+ if ((*PI)->BB)
+ MCBB->addPredecessor((*PI)->BB);
+ }
+ }
+}
+
+// Basic idea of the disassembly + discovery:
+//
+// start with the wanted address, insert it in the worklist
+// while worklist not empty, take next address in the worklist:
+// - check if atom exists there
+// - if middle of atom:
+// - split basic blocks referencing the atom
+// - look for an already encountered BBInfo (using a map<atom, bbinfo>)
+// - if there is, split it (new one, fallthrough, move succs, etc..)
+// - if start of atom: nothing else to do
+// - if no atom: create new atom and new bbinfo
+// - look at the last instruction in the atom, add succs to worklist
+// for all elements in the worklist:
+// - create basic block, update preds/succs, etc..
+//
+MCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN,
+ uint64_t BBBeginAddr,
+ AddressSetTy &CallTargets,
+ AddressSetTy &TailCallTargets) {
+ typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
+ typedef SmallSetVector<uint64_t, 16> AddrWorklistTy;
+ BBInfoByAddrTy BBInfos;
+ AddrWorklistTy Worklist;
+
+ Worklist.insert(BBBeginAddr);
+ for (size_t wi = 0; wi < Worklist.size(); ++wi) {
+ const uint64_t BeginAddr = Worklist[wi];
+ BBInfo *BBI = &BBInfos[BeginAddr];
+
+ MCTextAtom *&TA = BBI->Atom;
+ assert(!TA && "Discovered basic block already has an associated atom!");
+
+ // Look for an atom at BeginAddr.
+ if (MCAtom *A = Module->findAtomContaining(BeginAddr)) {
+ // FIXME: We don't care about mixed atoms, see above.
+ TA = cast<MCTextAtom>(A);
+
+ // The found atom doesn't begin at BeginAddr, we have to split it.
+ if (TA->getBeginAddr() != BeginAddr) {
+ // FIXME: Handle overlapping atoms: middle-starting instructions, etc..
+ MCTextAtom *NewTA = TA->split(BeginAddr);
+
+ // Look for an already encountered basic block that needs splitting
+ BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr());
+ if (It != BBInfos.end() && It->second.Atom) {
+ BBI->SuccAddrs = It->second.SuccAddrs;
+ It->second.SuccAddrs.clear();
+ It->second.SuccAddrs.push_back(BeginAddr);
+ }
+ TA = NewTA;
+ }
+ BBI->Atom = TA;
+ } else {
+ // If we didn't find an atom, then we have to disassemble to create one!
+
+ MemoryObject *Region = getRegionFor(BeginAddr);
+ if (!Region)
+ llvm_unreachable(("Couldn't find suitable region for disassembly at " +
+ utostr(BeginAddr)).c_str());
+
+ uint64_t InstSize;
+ uint64_t EndAddr = Region->getBase() + Region->getExtent();
+
+ // We want to stop before the next atom and have a fallthrough to it.
+ if (MCTextAtom *NextAtom =
+ cast_or_null<MCTextAtom>(Module->findFirstAtomAfter(BeginAddr)))
+ EndAddr = std::min(EndAddr, NextAtom->getBeginAddr());
+
+ for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) {
+ MCInst Inst;
+ if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(),
+ nulls())) {
+ if (!TA)
+ TA = Module->createTextAtom(Addr, Addr);
+ TA->addInst(Inst, InstSize);
+ } else {
+ // We don't care about splitting mixed atoms either.
+ llvm_unreachable("Couldn't disassemble instruction in atom.");
+ }
+
+ uint64_t BranchTarget;
+ if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) {
+ if (MIA.isCall(Inst))
+ CallTargets.push_back(BranchTarget);
+ }
+
+ if (MIA.isTerminator(Inst))
+ break;
+ }
+ BBI->Atom = TA;
+ }
+
+ assert(TA && "Couldn't disassemble atom, none was created!");
+ assert(TA->begin() != TA->end() && "Empty atom!");
+
+ MemoryObject *Region = getRegionFor(TA->getBeginAddr());
+ assert(Region && "Couldn't find region for already disassembled code!");
+ uint64_t EndRegion = Region->getBase() + Region->getExtent();
+
+ // Now we have a basic block atom, add successors.
+ // Add the fallthrough block.
+ if ((MIA.isConditionalBranch(TA->back().Inst) ||
+ !MIA.isTerminator(TA->back().Inst)) &&
+ (TA->getEndAddr() + 1 < EndRegion)) {
+ BBI->SuccAddrs.push_back(TA->getEndAddr() + 1);
+ Worklist.insert(TA->getEndAddr() + 1);
+ }
+
+ // If the terminator is a branch, add the target block.
+ if (MIA.isBranch(TA->back().Inst)) {
+ uint64_t BranchTarget;
+ if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address,
+ TA->back().Size, BranchTarget)) {
+ StringRef ExtFnName;
+ if (MOS)
+ ExtFnName =
+ MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget));
+ if (!ExtFnName.empty()) {
+ TailCallTargets.push_back(BranchTarget);
+ CallTargets.push_back(BranchTarget);
+ } else {
+ BBI->SuccAddrs.push_back(BranchTarget);
+ Worklist.insert(BranchTarget);
+ }
+ }
+ }
+ }
+
+ for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
+ const uint64_t BeginAddr = Worklist[wi];
+ BBInfo *BBI = &BBInfos[BeginAddr];
+
+ assert(BBI->Atom && "Found a basic block without an associated atom!");
+
+ // Look for a basic block at BeginAddr.
+ BBI->BB = MCFN->find(BeginAddr);
+ if (BBI->BB) {
+ // FIXME: check that the succs/preds are the same
+ continue;
+ }
+ // If there was none, we have to create one from the atom.
+ BBI->BB = &MCFN->createBlock(*BBI->Atom);
+ }
+
+ for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
+ const uint64_t BeginAddr = Worklist[wi];
+ BBInfo *BBI = &BBInfos[BeginAddr];
+ MCBasicBlock *BB = BBI->BB;
+
+ RemoveDupsFromAddressVector(BBI->SuccAddrs);
+ for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(),
+ SE = BBI->SuccAddrs.end();
+ SE != SE; ++SI) {
+ MCBasicBlock *Succ = BBInfos[*SI].BB;
+ BB->addSuccessor(Succ);
+ Succ->addPredecessor(BB);
+ }
+ }
+
+ assert(BBInfos[Worklist[0]].BB &&
+ "No basic block created at requested address?");
+
+ return BBInfos[Worklist[0]].BB;
+}
+
+MCFunction *
+MCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr,
+ AddressSetTy &CallTargets,
+ AddressSetTy &TailCallTargets) {
+ // First, check if this is an external function.
+ StringRef ExtFnName;
+ if (MOS)
+ ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BeginAddr));
+ if (!ExtFnName.empty())
+ return Module->createFunction(ExtFnName);
+
+ // If it's not, look for an existing function.
+ for (MCModule::func_iterator FI = Module->func_begin(),
+ FE = Module->func_end();
+ FI != FE; ++FI) {
+ if ((*FI)->empty())
+ continue;
+ // FIXME: MCModule should provide a findFunctionByAddr()
+ if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr)
+ return *FI;
+ }
+
+ // Finally, just create a new one.
+ MCFunction *MCFN = Module->createFunction("");
+ getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets);
+ return MCFN;
+}
+
+// MachO MCObjectDisassembler implementation.
+
+MCMachOObjectDisassembler::MCMachOObjectDisassembler(
+ const MachOObjectFile &MOOF, const MCDisassembler &Dis,
+ const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
+ uint64_t HeaderLoadAddress)
+ : MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF),
+ VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) {
+
+ error_code ec;
+ for (section_iterator SI = MOOF.begin_sections(), SE = MOOF.end_sections();
+ SI != SE; SI.increment(ec)) {
+ if (ec)
+ break;
+ StringRef Name;
+ SI->getName(Name);
+ // FIXME: We should use the S_ section type instead of the name.
+ if (Name == "__mod_init_func") {
+ DEBUG(dbgs() << "Found __mod_init_func section!\n");
+ SI->getContents(ModInitContents);
+ } else if (Name == "__mod_exit_func") {
+ DEBUG(dbgs() << "Found __mod_exit_func section!\n");
+ SI->getContents(ModExitContents);
+ }
+ }
+}
+
+// FIXME: Only do the translations for addresses actually inside the object.
+uint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
+ return Addr + VMAddrSlide;
+}
+
+uint64_t
+MCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) {
+ return EffectiveAddr - VMAddrSlide;
+}
+
+uint64_t MCMachOObjectDisassembler::getEntrypoint() {
+ uint64_t EntryFileOffset = 0;
+
+ // Look for LC_MAIN.
+ {
+ uint32_t LoadCommandCount = MOOF.getHeader().ncmds;
+ MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo();
+ for (unsigned I = 0;; ++I) {
+ if (Load.C.cmd == MachO::LC_MAIN) {
+ EntryFileOffset =
+ ((const MachO::entry_point_command *)Load.Ptr)->entryoff;
+ break;
+ }
+
+ if (I == LoadCommandCount - 1)
+ break;
+ else
+ Load = MOOF.getNextLoadCommandInfo(Load);
}
}
+
+ // If we didn't find anything, default to the common implementation.
+ // FIXME: Maybe we could also look at LC_UNIXTHREAD and friends?
+ if (EntryFileOffset)
+ return MCObjectDisassembler::getEntrypoint();
+
+ return EntryFileOffset + HeaderLoadAddress;
+}
+
+ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticInitFunctions() {
+ // FIXME: We only handle 64bit mach-o
+ assert(MOOF.is64Bit());
+
+ size_t EntrySize = 8;
+ size_t EntryCount = ModInitContents.size() / EntrySize;
+ return ArrayRef<uint64_t>(
+ reinterpret_cast<const uint64_t *>(ModInitContents.data()), EntryCount);
+}
+
+ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticExitFunctions() {
+ // FIXME: We only handle 64bit mach-o
+ assert(MOOF.is64Bit());
+
+ size_t EntrySize = 8;
+ size_t EntryCount = ModExitContents.size() / EntrySize;
+ return ArrayRef<uint64_t>(
+ reinterpret_cast<const uint64_t *>(ModExitContents.data()), EntryCount);
}
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index bcf52d2..8ef4a0a 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -39,6 +39,9 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
= Ctx->getMachOSection("__DATA", "__data", 0,
SectionKind::getDataRel());
+ // BSSSection might not be expected initialized on msvc.
+ BSSSection = 0;
+
TLSDataSection // .tdata
= Ctx->getMachOSection("__DATA", "__thread_data",
MCSectionMachO::S_THREAD_LOCAL_REGULAR,
@@ -199,6 +202,14 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
Ctx->getMachOSection("__DWARF", "__debug_pubtypes",
MCSectionMachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
+ DwarfGnuPubNamesSection =
+ Ctx->getMachOSection("__DWARF", "__debug_gnu_pubn",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ DwarfGnuPubTypesSection =
+ Ctx->getMachOSection("__DWARF", "__debug_gnu_pubt",
+ MCSectionMachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
DwarfStrSection =
Ctx->getMachOSection("__DWARF", "__debug_str",
MCSectionMachO::S_ATTR_DEBUG,
@@ -223,6 +234,9 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
Ctx->getMachOSection("__DWARF", "__debug_inlined",
MCSectionMachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
+ StackMapSection =
+ Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps", 0,
+ SectionKind::getMetadata());
TLSExtraDataSection = TLSTLVSection;
}
@@ -435,6 +449,12 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
DwarfPubTypesSection =
Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
+ DwarfGnuPubNamesSection =
+ Ctx->getELFSection(".debug_gnu_pubnames", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
+ DwarfGnuPubTypesSection =
+ Ctx->getELFSection(".debug_gnu_pubtypes", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
DwarfStrSection =
Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS,
ELF::SHF_MERGE | ELF::SHF_STRINGS,
@@ -496,6 +516,12 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
// COFF
+ BSSSection =
+ Ctx->getCOFFSection(".bss",
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getBSS());
TextSection =
Ctx->getCOFFSection(".text",
COFF::IMAGE_SCN_CNT_CODE |
@@ -585,6 +611,16 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_MEM_DISCARDABLE |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
+ DwarfGnuPubNamesSection =
+ Ctx->getCOFFSection(".debug_gnu_pubnames",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfGnuPubTypesSection =
+ Ctx->getCOFFSection(".debug_gnu_pubtypes",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfStrSection =
Ctx->getCOFFSection(".debug_str",
COFF::IMAGE_SCN_MEM_DISCARDABLE |
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 36a923a..bc14c2a 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -22,19 +22,22 @@
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
-MCObjectStreamer::MCObjectStreamer(StreamerKind Kind, MCContext &Context,
+MCObjectStreamer::MCObjectStreamer(MCContext &Context,
+ MCTargetStreamer *TargetStreamer,
MCAsmBackend &TAB, raw_ostream &OS,
MCCodeEmitter *Emitter_)
- : MCStreamer(Kind, Context),
+ : MCStreamer(Context, TargetStreamer),
Assembler(new MCAssembler(Context, TAB, *Emitter_,
*TAB.createObjectWriter(OS), OS)),
CurSectionData(0) {}
-MCObjectStreamer::MCObjectStreamer(StreamerKind Kind, MCContext &Context,
+MCObjectStreamer::MCObjectStreamer(MCContext &Context,
+ MCTargetStreamer *TargetStreamer,
MCAsmBackend &TAB, raw_ostream &OS,
MCCodeEmitter *Emitter_,
MCAssembler *_Assembler)
- : MCStreamer(Kind, Context), Assembler(_Assembler), CurSectionData(0) {}
+ : MCStreamer(Context, TargetStreamer), Assembler(_Assembler),
+ CurSectionData(0) {}
MCObjectStreamer::~MCObjectStreamer() {
delete &Assembler->getBackend();
@@ -302,6 +305,7 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
}
void MCObjectStreamer::EmitBytes(StringRef Data) {
+ MCLineEntry::Make(this, getCurrentSection().first);
getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
}
diff --git a/lib/MC/MCObjectSymbolizer.cpp b/lib/MC/MCObjectSymbolizer.cpp
index aa7648e..b9131d1 100644
--- a/lib/MC/MCObjectSymbolizer.cpp
+++ b/lib/MC/MCObjectSymbolizer.cpp
@@ -15,7 +15,7 @@
#include "llvm/MC/MCRelocationInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Object/MachO.h"
-#include "llvm/Object/ELF.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -26,100 +26,127 @@ using namespace object;
namespace {
class MCMachObjectSymbolizer : public MCObjectSymbolizer {
+ const MachOObjectFile *MOOF;
+ // __TEXT;__stubs support.
+ uint64_t StubsStart;
+ uint64_t StubsCount;
+ uint64_t StubSize;
+ uint64_t StubsIndSymIndex;
+
public:
MCMachObjectSymbolizer(MCContext &Ctx, OwningPtr<MCRelocationInfo> &RelInfo,
- const object::MachOObjectFile *MachOOF)
- : MCObjectSymbolizer(Ctx, RelInfo, MachOOF)
- {}
+ const MachOObjectFile *MOOF);
+
+ StringRef findExternalFunctionAt(uint64_t Addr) LLVM_OVERRIDE;
void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
- int64_t Value, uint64_t Address) {
- AddrToRelocMap::iterator RI = AddrToReloc.find(Address);
- if (RI != AddrToReloc.end()) {
- const MCExpr *RelExpr = RelInfo->createExprForRelocation(RI->second);
- if (!RelExpr || RelExpr->EvaluateAsAbsolute(Value) == false)
- return;
- }
- uint64_t Addr = Value;
- SortedSectionList::const_iterator SI = findSectionContaining(Addr);
- if (SI != SortedSections.end()) {
- const SectionRef &S = *SI;
- StringRef Name; S.getName(Name);
- uint64_t SAddr; S.getAddress(SAddr);
- if (Name == "__cstring") {
- StringRef Contents;
- S.getContents(Contents);
- Contents = Contents.substr(Addr - SAddr);
- cStream << " ## literal pool for: "
- << Contents.substr(0, Contents.find_first_of(0));
- }
- }
- }
+ int64_t Value,
+ uint64_t Address) LLVM_OVERRIDE;
};
} // End unnamed namespace
-//===- MCObjectSymbolizer -------------------------------------------------===//
-MCObjectSymbolizer::MCObjectSymbolizer(MCContext &Ctx,
- OwningPtr<MCRelocationInfo> &RelInfo,
- const ObjectFile *Obj)
- : MCSymbolizer(Ctx, RelInfo), Obj(Obj), SortedSections(), AddrToReloc() {
+MCMachObjectSymbolizer::
+MCMachObjectSymbolizer(MCContext &Ctx, OwningPtr<MCRelocationInfo> &RelInfo,
+ const MachOObjectFile *MOOF)
+ : MCObjectSymbolizer(Ctx, RelInfo, MOOF), MOOF(MOOF),
+ StubsStart(0), StubsCount(0), StubSize(0), StubsIndSymIndex(0) {
+
error_code ec;
- for (section_iterator SI = Obj->begin_sections(),
- SE = Obj->end_sections();
- SI != SE;
- SI.increment(ec)) {
+ for (section_iterator SI = MOOF->begin_sections(), SE = MOOF->end_sections();
+ SI != SE; SI.increment(ec)) {
if (ec) break;
-
- section_iterator RelSecI = SI->getRelocatedSection();
- if (RelSecI == Obj->end_sections())
- continue;
-
- uint64_t StartAddr; RelSecI->getAddress(StartAddr);
- uint64_t Size; RelSecI->getSize(Size);
- bool RequiredForExec; RelSecI->isRequiredForExecution(RequiredForExec);
- if (RequiredForExec == false || Size == 0)
- continue;
- insertSection(*SI);
- for (relocation_iterator RI = SI->begin_relocations(),
- RE = SI->end_relocations();
- RI != RE;
- RI.increment(ec)) {
- if (ec) break;
- // FIXME: libObject is inconsistent regarding error handling. The
- // overwhelming majority of methods always return object_error::success,
- // and assert for simple errors.. Here, ELFObjectFile::getRelocationOffset
- // asserts when the file type isn't ET_REL.
- // This workaround handles x86-64 elf, the only one that has a relocinfo.
- uint64_t Offset;
- if (Obj->isELF()) {
- const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj);
- if (ELFObj == 0)
- break;
- if (ELFObj->getElfHeader()->e_type == ELF::ET_REL) {
- RI->getOffset(Offset);
- Offset += StartAddr;
- } else {
- RI->getAddress(Offset);
- }
+ StringRef Name; SI->getName(Name);
+ if (Name == "__stubs") {
+ SectionRef StubsSec = *SI;
+ if (MOOF->is64Bit()) {
+ MachO::section_64 S = MOOF->getSection64(StubsSec.getRawDataRefImpl());
+ StubsIndSymIndex = S.reserved1;
+ StubSize = S.reserved2;
} else {
- RI->getOffset(Offset);
- Offset += StartAddr;
+ MachO::section S = MOOF->getSection(StubsSec.getRawDataRefImpl());
+ StubsIndSymIndex = S.reserved1;
+ StubSize = S.reserved2;
}
- // At a specific address, only keep the first relocation.
- if (AddrToReloc.find(Offset) == AddrToReloc.end())
- AddrToReloc[Offset] = *RI;
+ assert(StubSize && "Mach-O stub entry size can't be zero!");
+ StubsSec.getAddress(StubsStart);
+ StubsSec.getSize(StubsCount);
+ StubsCount /= StubSize;
+ }
+ }
+}
+
+StringRef MCMachObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) {
+ // FIXME: also, this can all be done at the very beginning, by iterating over
+ // all stubs and creating the calls to outside functions. Is it worth it
+ // though?
+ if (!StubSize)
+ return StringRef();
+ uint64_t StubIdx = (Addr - StubsStart) / StubSize;
+ if (StubIdx >= StubsCount)
+ return StringRef();
+
+ uint32_t SymtabIdx =
+ MOOF->getIndirectSymbolTableEntry(MOOF->getDysymtabLoadCommand(), StubIdx);
+
+ StringRef SymName;
+ symbol_iterator SI = MOOF->begin_symbols();
+ error_code ec;
+ for (uint32_t i = 0; i != SymtabIdx; ++i) {
+ SI.increment(ec);
+ }
+ SI->getName(SymName);
+ assert(SI != MOOF->end_symbols() && "Stub wasn't found in the symbol table!");
+ assert(SymName.front() == '_' && "Mach-O symbol doesn't start with '_'!");
+ return SymName.substr(1);
+}
+
+void MCMachObjectSymbolizer::
+tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
+ uint64_t Address) {
+ if (const RelocationRef *R = findRelocationAt(Address)) {
+ const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R);
+ if (!RelExpr || RelExpr->EvaluateAsAbsolute(Value) == false)
+ return;
+ }
+ uint64_t Addr = Value;
+ if (const SectionRef *S = findSectionContaining(Addr)) {
+ StringRef Name; S->getName(Name);
+ uint64_t SAddr; S->getAddress(SAddr);
+ if (Name == "__cstring") {
+ StringRef Contents;
+ S->getContents(Contents);
+ Contents = Contents.substr(Addr - SAddr);
+ cStream << " ## literal pool for: "
+ << Contents.substr(0, Contents.find_first_of(0));
}
}
}
+//===- MCObjectSymbolizer -------------------------------------------------===//
+
+MCObjectSymbolizer::MCObjectSymbolizer(MCContext &Ctx,
+ OwningPtr<MCRelocationInfo> &RelInfo,
+ const ObjectFile *Obj)
+ : MCSymbolizer(Ctx, RelInfo), Obj(Obj), SortedSections(), AddrToReloc() {
+}
+
bool MCObjectSymbolizer::
tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream,
int64_t Value, uint64_t Address, bool IsBranch,
uint64_t Offset, uint64_t InstSize) {
- AddrToRelocMap::iterator RI = AddrToReloc.find(Address + Offset);
- if (RI != AddrToReloc.end()) {
- if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(RI->second)) {
+ if (IsBranch) {
+ StringRef ExtFnName = findExternalFunctionAt((uint64_t)Value);
+ if (!ExtFnName.empty()) {
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(ExtFnName);
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ MI.addOperand(MCOperand::CreateExpr(Expr));
+ return true;
+ }
+ }
+
+ if (const RelocationRef *R = findRelocationAt(Address + Offset)) {
+ if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R)) {
MI.addOperand(MCOperand::CreateExpr(RelExpr));
return true;
}
@@ -133,10 +160,8 @@ tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream,
uint64_t UValue = Value;
// FIXME: map instead of looping each time?
error_code ec;
- for (symbol_iterator SI = Obj->begin_symbols(),
- SE = Obj->end_symbols();
- SI != SE;
- SI.increment(ec)) {
+ for (symbol_iterator SI = Obj->begin_symbols(), SE = Obj->end_symbols();
+ SI != SE; SI.increment(ec)) {
if (ec) break;
uint64_t SymAddr; SI->getAddress(SymAddr);
uint64_t SymSize; SI->getSize(SymSize);
@@ -166,13 +191,16 @@ tryAddingPcLoadReferenceComment(raw_ostream &cStream,
int64_t Value, uint64_t Address) {
}
+StringRef MCObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) {
+ return StringRef();
+}
+
MCObjectSymbolizer *
MCObjectSymbolizer::createObjectSymbolizer(MCContext &Ctx,
OwningPtr<MCRelocationInfo> &RelInfo,
const ObjectFile *Obj) {
- if (const MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(Obj)) {
- return new MCMachObjectSymbolizer(Ctx, RelInfo, MachOOF);
- }
+ if (const MachOObjectFile *MOOF = dyn_cast<MachOObjectFile>(Obj))
+ return new MCMachObjectSymbolizer(Ctx, RelInfo, MOOF);
return new MCObjectSymbolizer(Ctx, RelInfo, Obj);
}
@@ -183,32 +211,100 @@ static bool SectionStartsBefore(const SectionRef &S, uint64_t Addr) {
return SAddr < Addr;
}
-MCObjectSymbolizer::SortedSectionList::const_iterator
-MCObjectSymbolizer::findSectionContaining(uint64_t Addr) const {
- SortedSectionList::const_iterator
+const SectionRef *MCObjectSymbolizer::findSectionContaining(uint64_t Addr) {
+ if (SortedSections.empty())
+ buildSectionList();
+
+ SortedSectionList::iterator
EndIt = SortedSections.end(),
It = std::lower_bound(SortedSections.begin(), EndIt,
Addr, SectionStartsBefore);
if (It == EndIt)
- return It;
+ return 0;
uint64_t SAddr; It->getAddress(SAddr);
uint64_t SSize; It->getSize(SSize);
if (Addr >= SAddr + SSize)
- return EndIt;
- return It;
+ return 0;
+ return &*It;
+}
+
+const RelocationRef *MCObjectSymbolizer::findRelocationAt(uint64_t Addr) {
+ if (AddrToReloc.empty())
+ buildRelocationByAddrMap();
+
+ AddrToRelocMap::const_iterator RI = AddrToReloc.find(Addr);
+ if (RI == AddrToReloc.end())
+ return 0;
+ return &RI->second;
+}
+
+void MCObjectSymbolizer::buildSectionList() {
+ error_code ec;
+ for (section_iterator SI = Obj->begin_sections(), SE = Obj->end_sections();
+ SI != SE; SI.increment(ec)) {
+ if (ec) break;
+
+ bool RequiredForExec; SI->isRequiredForExecution(RequiredForExec);
+ if (RequiredForExec == false)
+ continue;
+ uint64_t SAddr; SI->getAddress(SAddr);
+ uint64_t SSize; SI->getSize(SSize);
+ SortedSectionList::iterator It = std::lower_bound(SortedSections.begin(),
+ SortedSections.end(),
+ SAddr,
+ SectionStartsBefore);
+ if (It != SortedSections.end()) {
+ uint64_t FoundSAddr; It->getAddress(FoundSAddr);
+ if (FoundSAddr < SAddr + SSize)
+ llvm_unreachable("Inserting overlapping sections");
+ }
+ SortedSections.insert(It, *SI);
+ }
}
-void MCObjectSymbolizer::insertSection(SectionRef Sec) {
- uint64_t SAddr; Sec.getAddress(SAddr);
- uint64_t SSize; Sec.getSize(SSize);
- SortedSectionList::iterator It = std::lower_bound(SortedSections.begin(),
- SortedSections.end(),
- SAddr,
- SectionStartsBefore);
- if (It != SortedSections.end()) {
- uint64_t FoundSAddr; It->getAddress(FoundSAddr);
- if (FoundSAddr < SAddr + SSize)
- llvm_unreachable("Inserting overlapping sections");
+void MCObjectSymbolizer::buildRelocationByAddrMap() {
+ error_code ec;
+ for (section_iterator SI = Obj->begin_sections(), SE = Obj->end_sections();
+ SI != SE; SI.increment(ec)) {
+ if (ec) break;
+
+ section_iterator RelSecI = SI->getRelocatedSection();
+ if (RelSecI == Obj->end_sections())
+ continue;
+
+ uint64_t StartAddr; RelSecI->getAddress(StartAddr);
+ uint64_t Size; RelSecI->getSize(Size);
+ bool RequiredForExec; RelSecI->isRequiredForExecution(RequiredForExec);
+ if (RequiredForExec == false || Size == 0)
+ continue;
+ for (relocation_iterator RI = SI->begin_relocations(),
+ RE = SI->end_relocations();
+ RI != RE;
+ RI.increment(ec)) {
+ if (ec) break;
+ // FIXME: libObject is inconsistent regarding error handling. The
+ // overwhelming majority of methods always return object_error::success,
+ // and assert for simple errors.. Here, ELFObjectFile::getRelocationOffset
+ // asserts when the file type isn't ET_REL.
+ // This workaround handles x86-64 elf, the only one that has a relocinfo.
+ uint64_t Offset;
+ if (Obj->isELF()) {
+ const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj);
+ if (ELFObj == 0)
+ break;
+ if (ELFObj->getELFFile()->getHeader()->e_type == ELF::ET_REL) {
+ RI->getOffset(Offset);
+ Offset += StartAddr;
+ } else {
+ RI->getAddress(Offset);
+ }
+ } else {
+ RI->getOffset(Offset);
+ Offset += StartAddr;
+ }
+ // At a specific address, only keep the first relocation.
+ if (AddrToReloc.find(Offset) == AddrToReloc.end())
+ AddrToReloc[Offset] = *RI;
+ }
}
- SortedSections.insert(It, Sec);
}
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index c1c594a..b49dd01 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -91,9 +91,56 @@ AsmToken AsmLexer::LexFloatLiteral() {
StringRef(TokStart, CurPtr - TokStart));
}
-/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+/// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
+/// while making sure there are enough actual digits around for the constant to
+/// be valid.
+///
+/// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
+/// before we get here.
+AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
+ assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
+ "unexpected parse state in floating hex");
+ bool NoFracDigits = true;
+
+ // Skip the fractional part if there is one
+ if (*CurPtr == '.') {
+ ++CurPtr;
+
+ const char *FracStart = CurPtr;
+ while (isxdigit(*CurPtr))
+ ++CurPtr;
+
+ NoFracDigits = CurPtr == FracStart;
+ }
+
+ if (NoIntDigits && NoFracDigits)
+ return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
+ "expected at least one significand digit");
+
+ // Make sure we do have some kind of proper exponent part
+ if (*CurPtr != 'p' && *CurPtr != 'P')
+ return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
+ "expected exponent part 'p'");
+ ++CurPtr;
+
+ if (*CurPtr == '+' || *CurPtr == '-')
+ ++CurPtr;
+
+ // N.b. exponent digits are *not* hex
+ const char *ExpStart = CurPtr;
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+
+ if (CurPtr == ExpStart)
+ return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
+ "expected at least one exponent digit");
+
+ return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
+}
+
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
static bool IsIdentifierChar(char c) {
- return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
+ return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@' || c == '?';
}
AsmToken AsmLexer::LexIdentifier() {
// Check for floating point literals.
@@ -265,7 +312,12 @@ AsmToken AsmLexer::LexDigit() {
while (isxdigit(CurPtr[0]))
++CurPtr;
- // Requires at least one hex digit.
+ // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
+ // diagnosed by LexHexFloatLiteral).
+ if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
+ return LexHexFloatLiteral(NumStart == CurPtr);
+
+ // Otherwise requires at least one hex digit.
if (CurPtr == NumStart)
return ReturnError(CurPtr-2, "invalid hexadecimal number");
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index dd0d181..a91bd93 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -94,13 +94,13 @@ public:
};
struct ParseStatementInfo {
- /// ParsedOperands - The parsed operands from the last parsed statement.
+ /// \brief The parsed operands from the last parsed statement.
SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
- /// Opcode - The opcode from the last parsed instruction.
+ /// \brief The opcode from the last parsed instruction.
unsigned Opcode;
- /// Error - Was there an error parsing the inline assembly?
+ /// \brief Was there an error parsing the inline assembly?
bool ParseError;
SmallVectorImpl<AsmRewrite> *AsmRewrites;
@@ -138,18 +138,18 @@ private:
AsmCond TheCondState;
std::vector<AsmCond> TheCondStack;
- /// ExtensionDirectiveMap - maps directive names to handler methods in parser
+ /// \brief maps directive names to handler methods in parser
/// extensions. Extensions register themselves in this map by calling
/// addDirectiveHandler.
StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
- /// MacroMap - Map of currently defined macros.
+ /// \brief Map of currently defined macros.
StringMap<MCAsmMacro*> MacroMap;
- /// ActiveMacros - Stack of active macro instantiations.
+ /// \brief Stack of active macro instantiations.
std::vector<MacroInstantiation*> ActiveMacros;
- /// MacroLikeBodies - List of bodies of anonymous macros.
+ /// \brief List of bodies of anonymous macros.
std::deque<MCAsmMacro> MacroLikeBodies;
/// Boolean tracking whether macro substitution is enabled.
@@ -165,7 +165,7 @@ private:
int CppHashBuf;
/// When generating dwarf for assembly source files we need to calculate the
/// logical line number based on the last parsed cpp hash file line comment
- /// and current line. Since this is slow and messes up the SourceMgr's
+ /// and current line. Since this is slow and messes up the SourceMgr's
/// cache we save the last info we queried with SrcMgr.FindLineNumber().
SMLoc LastQueryIDLoc;
int LastQueryBuffer;
@@ -174,10 +174,10 @@ private:
/// AssemblerDialect. ~OU means unset value and use value provided by MAI.
unsigned AssemblerDialect;
- /// IsDarwin - is Darwin compatibility enabled?
+ /// \brief is Darwin compatibility enabled?
bool IsDarwin;
- /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
+ /// \brief Are we parsing ms-style inline assembly?
bool ParsingInlineAsm;
public:
@@ -235,7 +235,7 @@ public:
virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
virtual bool parseAbsoluteExpression(int64_t &Res);
- /// parseIdentifier - Parse an identifier or string (as a quoted identifier)
+ /// \brief Parse an identifier or string (as a quoted identifier)
/// and set \p Res to the identifier contents.
virtual bool parseIdentifier(StringRef &Res);
virtual void eatToEndOfStatement();
@@ -245,11 +245,11 @@ public:
private:
- bool ParseStatement(ParseStatementInfo &Info);
- void EatToEndOfLine();
- bool ParseCppHashLineFilenameComment(const SMLoc &L);
+ bool parseStatement(ParseStatementInfo &Info);
+ void eatToEndOfLine();
+ bool parseCppHashLineFilenameComment(const SMLoc &L);
- void CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body,
+ void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body,
MCAsmMacroParameters Parameters);
bool expandMacro(raw_svector_ostream &OS, StringRef Body,
const MCAsmMacroParameters &Parameters,
@@ -257,55 +257,56 @@ private:
const SMLoc &L);
/// \brief Are macros enabled in the parser?
- bool MacrosEnabled() {return MacrosEnabledFlag;}
+ bool areMacrosEnabled() {return MacrosEnabledFlag;}
/// \brief Control a flag in the parser that enables or disables macros.
- void SetMacrosEnabled(bool Flag) {MacrosEnabledFlag = Flag;}
+ void setMacrosEnabled(bool Flag) {MacrosEnabledFlag = Flag;}
/// \brief Lookup a previously defined macro.
/// \param Name Macro name.
/// \returns Pointer to macro. NULL if no such macro was defined.
- const MCAsmMacro* LookupMacro(StringRef Name);
+ const MCAsmMacro* lookupMacro(StringRef Name);
/// \brief Define a new macro with the given name and information.
- void DefineMacro(StringRef Name, const MCAsmMacro& Macro);
+ void defineMacro(StringRef Name, const MCAsmMacro& Macro);
/// \brief Undefine a macro. If no such macro was defined, it's a no-op.
- void UndefineMacro(StringRef Name);
+ void undefineMacro(StringRef Name);
/// \brief Are we inside a macro instantiation?
- bool InsideMacroInstantiation() {return !ActiveMacros.empty();}
+ bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
- /// \brief Handle entry to macro instantiation.
+ /// \brief Handle entry to macro instantiation.
///
/// \param M The macro.
/// \param NameLoc Instantiation location.
- bool HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc);
+ bool handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc);
/// \brief Handle exit from macro instantiation.
- void HandleMacroExit();
+ void handleMacroExit();
/// \brief Extract AsmTokens for a macro argument. If the argument delimiter
/// is initially unknown, set it to AsmToken::Eof. It will be set to the
/// correct delimiter by the method.
- bool ParseMacroArgument(MCAsmMacroArgument &MA,
+ bool parseMacroArgument(MCAsmMacroArgument &MA,
AsmToken::TokenKind &ArgumentDelimiter);
/// \brief Parse all macro arguments for a given macro.
- bool ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A);
+ bool parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A);
- void PrintMacroInstantiations();
- void PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
+ void printMacroInstantiations();
+ void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
ArrayRef<SMRange> Ranges = None) const {
SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
}
static void DiagHandler(const SMDiagnostic &Diag, void *Context);
- /// EnterIncludeFile - Enter the specified file. This returns true on failure.
- bool EnterIncludeFile(const std::string &Filename);
- /// ProcessIncbinFile - Process the specified file for the .incbin directive.
+ /// \brief Enter the specified file. This returns true on failure.
+ bool enterIncludeFile(const std::string &Filename);
+
+ /// \brief Process the specified file for the .incbin directive.
/// This returns true on failure.
- bool ProcessIncbinFile(const std::string &Filename);
+ bool processIncbinFile(const std::string &Filename);
/// \brief Reset the current lexer position to that given by \p Loc. The
/// current token is not set; clients should ensure Lex() is called
@@ -313,7 +314,7 @@ private:
///
/// \param InBuffer If not -1, should be the known buffer id that contains the
/// location.
- void JumpToLoc(SMLoc Loc, int InBuffer=-1);
+ void jumpToLoc(SMLoc Loc, int InBuffer=-1);
/// \brief Parse up to the end of statement and a return the contents from the
/// current token until the end of the statement; the current token on exit
@@ -322,17 +323,16 @@ private:
/// \brief Parse until the end of a statement or a comma is encountered,
/// return the contents from the current token up to the end or comma.
- StringRef ParseStringToComma();
+ StringRef parseStringToComma();
- bool ParseAssignment(StringRef Name, bool allow_redef,
+ bool parseAssignment(StringRef Name, bool allow_redef,
bool NoDeadStrip = false);
- bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
- bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
- bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
- bool ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
+ bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
+ bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
+ bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
- bool ParseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
+ bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
// Generic (target and platform independent) directive parsing.
enum DirectiveKind {
@@ -342,7 +342,7 @@ private:
DK_FLOAT, DK_DOUBLE, DK_ALIGN, DK_ALIGN32, DK_BALIGN, DK_BALIGNW,
DK_BALIGNL, DK_P2ALIGN, DK_P2ALIGNW, DK_P2ALIGNL, DK_ORG, DK_FILL, DK_ENDR,
DK_BUNDLE_ALIGN_MODE, DK_BUNDLE_LOCK, DK_BUNDLE_UNLOCK,
- DK_ZERO, DK_EXTERN, DK_GLOBL, DK_GLOBAL, DK_INDIRECT_SYMBOL,
+ DK_ZERO, DK_EXTERN, DK_GLOBL, DK_GLOBAL,
DK_LAZY_REFERENCE, DK_NO_DEAD_STRIP, DK_SYMBOL_RESOLVER, DK_PRIVATE_EXTERN,
DK_REFERENCE, DK_WEAK_DEFINITION, DK_WEAK_REFERENCE,
DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT,
@@ -355,112 +355,113 @@ private:
DK_CFI_OFFSET, DK_CFI_REL_OFFSET, DK_CFI_PERSONALITY, DK_CFI_LSDA,
DK_CFI_REMEMBER_STATE, DK_CFI_RESTORE_STATE, DK_CFI_SAME_VALUE,
DK_CFI_RESTORE, DK_CFI_ESCAPE, DK_CFI_SIGNAL_FRAME, DK_CFI_UNDEFINED,
- DK_CFI_REGISTER,
+ DK_CFI_REGISTER, DK_CFI_WINDOW_SAVE,
DK_MACROS_ON, DK_MACROS_OFF, DK_MACRO, DK_ENDM, DK_ENDMACRO, DK_PURGEM,
DK_SLEB128, DK_ULEB128
};
- /// DirectiveKindMap - Maps directive name --> DirectiveKind enum, for
+ /// \brief Maps directive name --> DirectiveKind enum, for
/// directives parsed by this class.
StringMap<DirectiveKind> DirectiveKindMap;
// ".ascii", ".asciz", ".string"
- bool ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
- bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
- bool ParseDirectiveRealValue(const fltSemantics &); // ".single", ...
- bool ParseDirectiveFill(); // ".fill"
- bool ParseDirectiveZero(); // ".zero"
+ bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
+ bool parseDirectiveValue(unsigned Size); // ".byte", ".long", ...
+ bool parseDirectiveRealValue(const fltSemantics &); // ".single", ...
+ bool parseDirectiveFill(); // ".fill"
+ bool parseDirectiveZero(); // ".zero"
// ".set", ".equ", ".equiv"
- bool ParseDirectiveSet(StringRef IDVal, bool allow_redef);
- bool ParseDirectiveOrg(); // ".org"
+ bool parseDirectiveSet(StringRef IDVal, bool allow_redef);
+ bool parseDirectiveOrg(); // ".org"
// ".align{,32}", ".p2align{,w,l}"
- bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize);
+ bool parseDirectiveAlign(bool IsPow2, unsigned ValueSize);
// ".file", ".line", ".loc", ".stabs"
- bool ParseDirectiveFile(SMLoc DirectiveLoc);
- bool ParseDirectiveLine();
- bool ParseDirectiveLoc();
- bool ParseDirectiveStabs();
+ bool parseDirectiveFile(SMLoc DirectiveLoc);
+ bool parseDirectiveLine();
+ bool parseDirectiveLoc();
+ bool parseDirectiveStabs();
// .cfi directives
- bool ParseDirectiveCFIRegister(SMLoc DirectiveLoc);
- bool ParseDirectiveCFISections();
- bool ParseDirectiveCFIStartProc();
- bool ParseDirectiveCFIEndProc();
- bool ParseDirectiveCFIDefCfaOffset();
- bool ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
- bool ParseDirectiveCFIAdjustCfaOffset();
- bool ParseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
- bool ParseDirectiveCFIOffset(SMLoc DirectiveLoc);
- bool ParseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
- bool ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
- bool ParseDirectiveCFIRememberState();
- bool ParseDirectiveCFIRestoreState();
- bool ParseDirectiveCFISameValue(SMLoc DirectiveLoc);
- bool ParseDirectiveCFIRestore(SMLoc DirectiveLoc);
- bool ParseDirectiveCFIEscape();
- bool ParseDirectiveCFISignalFrame();
- bool ParseDirectiveCFIUndefined(SMLoc DirectiveLoc);
+ bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
+ bool parseDirectiveCFIWindowSave();
+ bool parseDirectiveCFISections();
+ bool parseDirectiveCFIStartProc();
+ bool parseDirectiveCFIEndProc();
+ bool parseDirectiveCFIDefCfaOffset();
+ bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
+ bool parseDirectiveCFIAdjustCfaOffset();
+ bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
+ bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
+ bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
+ bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
+ bool parseDirectiveCFIRememberState();
+ bool parseDirectiveCFIRestoreState();
+ bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
+ bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
+ bool parseDirectiveCFIEscape();
+ bool parseDirectiveCFISignalFrame();
+ bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
// macro directives
- bool ParseDirectivePurgeMacro(SMLoc DirectiveLoc);
- bool ParseDirectiveEndMacro(StringRef Directive);
- bool ParseDirectiveMacro(SMLoc DirectiveLoc);
- bool ParseDirectiveMacrosOnOff(StringRef Directive);
+ bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
+ bool parseDirectiveEndMacro(StringRef Directive);
+ bool parseDirectiveMacro(SMLoc DirectiveLoc);
+ bool parseDirectiveMacrosOnOff(StringRef Directive);
// ".bundle_align_mode"
- bool ParseDirectiveBundleAlignMode();
+ bool parseDirectiveBundleAlignMode();
// ".bundle_lock"
- bool ParseDirectiveBundleLock();
+ bool parseDirectiveBundleLock();
// ".bundle_unlock"
- bool ParseDirectiveBundleUnlock();
+ bool parseDirectiveBundleUnlock();
// ".space", ".skip"
- bool ParseDirectiveSpace(StringRef IDVal);
+ bool parseDirectiveSpace(StringRef IDVal);
// .sleb128 (Signed=true) and .uleb128 (Signed=false)
- bool ParseDirectiveLEB128(bool Signed);
+ bool parseDirectiveLEB128(bool Signed);
- /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
+ /// \brief Parse a directive like ".globl" which
/// accepts a single symbol (which should be a label or an external).
- bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
+ bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
- bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
+ bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
- bool ParseDirectiveAbort(); // ".abort"
- bool ParseDirectiveInclude(); // ".include"
- bool ParseDirectiveIncbin(); // ".incbin"
+ bool parseDirectiveAbort(); // ".abort"
+ bool parseDirectiveInclude(); // ".include"
+ bool parseDirectiveIncbin(); // ".incbin"
- bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
+ bool parseDirectiveIf(SMLoc DirectiveLoc); // ".if"
// ".ifb" or ".ifnb", depending on ExpectBlank.
- bool ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
+ bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
// ".ifc" or ".ifnc", depending on ExpectEqual.
- bool ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual);
+ bool parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual);
// ".ifdef" or ".ifndef", depending on expect_defined
- bool ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
- bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
- bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else"
- bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
+ bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
+ bool parseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
+ bool parseDirectiveElse(SMLoc DirectiveLoc); // ".else"
+ bool parseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
virtual bool parseEscapedString(std::string &Data);
- const MCExpr *ApplyModifierToExpr(const MCExpr *E,
+ const MCExpr *applyModifierToExpr(const MCExpr *E,
MCSymbolRefExpr::VariantKind Variant);
// Macro-like directives
- MCAsmMacro *ParseMacroLikeBody(SMLoc DirectiveLoc);
- void InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
+ MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
+ void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
raw_svector_ostream &OS);
- bool ParseDirectiveRept(SMLoc DirectiveLoc); // ".rept"
- bool ParseDirectiveIrp(SMLoc DirectiveLoc); // ".irp"
- bool ParseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc"
- bool ParseDirectiveEndr(SMLoc DirectiveLoc); // ".endr"
+ bool parseDirectiveRept(SMLoc DirectiveLoc); // ".rept"
+ bool parseDirectiveIrp(SMLoc DirectiveLoc); // ".irp"
+ bool parseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc"
+ bool parseDirectiveEndr(SMLoc DirectiveLoc); // ".endr"
// "_emit" or "__emit"
- bool ParseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
+ bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
size_t Len);
// "align"
- bool ParseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
+ bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
void initializeDirectiveKindMap();
};
@@ -476,12 +477,12 @@ extern MCAsmParserExtension *createCOFFAsmParser();
enum { DEFAULT_ADDRSPACE = 0 };
-AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
- MCStreamer &_Out, const MCAsmInfo &_MAI)
- : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
- PlatformParser(0),
- CurBuffer(0), MacrosEnabledFlag(true), CppHashLineNumber(0),
- AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) {
+AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
+ const MCAsmInfo &_MAI)
+ : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
+ PlatformParser(0), CurBuffer(0), MacrosEnabledFlag(true),
+ CppHashLineNumber(0), AssemblerDialect(~0U), IsDarwin(false),
+ ParsingInlineAsm(false) {
// Save the old handler.
SavedDiagHandler = SrcMgr.getDiagHandler();
SavedDiagContext = SrcMgr.getDiagContext();
@@ -512,37 +513,40 @@ AsmParser::~AsmParser() {
assert(ActiveMacros.empty() && "Unexpected active macro instantiation!");
// Destroy any macros.
- for (StringMap<MCAsmMacro*>::iterator it = MacroMap.begin(),
- ie = MacroMap.end(); it != ie; ++it)
+ for (StringMap<MCAsmMacro *>::iterator it = MacroMap.begin(),
+ ie = MacroMap.end();
+ it != ie; ++it)
delete it->getValue();
delete PlatformParser;
}
-void AsmParser::PrintMacroInstantiations() {
+void AsmParser::printMacroInstantiations() {
// Print the active macro instantiation stack.
- for (std::vector<MacroInstantiation*>::const_reverse_iterator
- it = ActiveMacros.rbegin(), ie = ActiveMacros.rend(); it != ie; ++it)
- PrintMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
+ for (std::vector<MacroInstantiation *>::const_reverse_iterator
+ it = ActiveMacros.rbegin(),
+ ie = ActiveMacros.rend();
+ it != ie; ++it)
+ printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
"while in macro instantiation");
}
bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
if (FatalAssemblerWarnings)
return Error(L, Msg, Ranges);
- PrintMessage(L, SourceMgr::DK_Warning, Msg, Ranges);
- PrintMacroInstantiations();
+ printMessage(L, SourceMgr::DK_Warning, Msg, Ranges);
+ printMacroInstantiations();
return false;
}
bool AsmParser::Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
HadError = true;
- PrintMessage(L, SourceMgr::DK_Error, Msg, Ranges);
- PrintMacroInstantiations();
+ printMessage(L, SourceMgr::DK_Error, Msg, Ranges);
+ printMacroInstantiations();
return true;
}
-bool AsmParser::EnterIncludeFile(const std::string &Filename) {
+bool AsmParser::enterIncludeFile(const std::string &Filename) {
std::string IncludedFile;
int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
if (NewBuf == -1)
@@ -558,7 +562,7 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) {
/// Process the specified .incbin file by searching for it in the include paths
/// then just emitting the byte contents of the file to the streamer. This
/// returns true on failure.
-bool AsmParser::ProcessIncbinFile(const std::string &Filename) {
+bool AsmParser::processIncbinFile(const std::string &Filename) {
std::string IncludedFile;
int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
if (NewBuf == -1)
@@ -569,7 +573,7 @@ bool AsmParser::ProcessIncbinFile(const std::string &Filename) {
return false;
}
-void AsmParser::JumpToLoc(SMLoc Loc, int InBuffer) {
+void AsmParser::jumpToLoc(SMLoc Loc, int InBuffer) {
if (InBuffer != -1) {
CurBuffer = InBuffer;
} else {
@@ -586,7 +590,7 @@ const AsmToken &AsmParser::Lex() {
// include stack.
SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
if (ParentIncludeLoc != SMLoc()) {
- JumpToLoc(ParentIncludeLoc);
+ jumpToLoc(ParentIncludeLoc);
tok = &Lexer.Lex();
}
}
@@ -623,7 +627,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof)) {
ParseStatementInfo Info;
- if (!ParseStatement(Info)) continue;
+ if (!parseStatement(Info))
+ continue;
// We had an error, validate that one was emitted and recover by skipping to
// the next line.
@@ -637,7 +642,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// Check to see there are no empty DwarfFile slots.
const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
- getContext().getMCDwarfFiles();
+ getContext().getMCDwarfFiles();
for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
if (!MCDwarfFiles[i])
TokError("unassigned file number: " + Twine(i) + " for .file directives");
@@ -650,7 +655,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) {
const MCContext::SymbolTable &Symbols = getContext().getSymbols();
for (MCContext::SymbolTable::const_iterator i = Symbols.begin(),
- e = Symbols.end();
+ e = Symbols.end();
i != e; ++i) {
MCSymbol *Sym = i->getValue();
// Variable symbols may not be marked as defined, so check those
@@ -660,13 +665,12 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// FIXME: We would really like to refer back to where the symbol was
// first referenced for a source location. We need to add something
// to track that. Currently, we just point to the end of the file.
- PrintMessage(getLexer().getLoc(), SourceMgr::DK_Error,
- "assembler local symbol '" + Sym->getName() +
- "' not defined");
+ printMessage(
+ getLexer().getLoc(), SourceMgr::DK_Error,
+ "assembler local symbol '" + Sym->getName() + "' not defined");
}
}
-
// Finalize the output stream if there are no errors and if the client wants
// us to.
if (!HadError && !NoFinalize)
@@ -682,10 +686,9 @@ void AsmParser::checkForValidSection() {
}
}
-/// eatToEndOfStatement - Throw away the rest of the line for testing purposes.
+/// \brief Throw away the rest of the line for testing purposes.
void AsmParser::eatToEndOfStatement() {
- while (Lexer.isNot(AsmToken::EndOfStatement) &&
- Lexer.isNot(AsmToken::Eof))
+ while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
Lex();
// Eat EOL.
@@ -696,33 +699,32 @@ void AsmParser::eatToEndOfStatement() {
StringRef AsmParser::parseStringToEndOfStatement() {
const char *Start = getTok().getLoc().getPointer();
- while (Lexer.isNot(AsmToken::EndOfStatement) &&
- Lexer.isNot(AsmToken::Eof))
+ while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
Lex();
const char *End = getTok().getLoc().getPointer();
return StringRef(Start, End - Start);
}
-StringRef AsmParser::ParseStringToComma() {
+StringRef AsmParser::parseStringToComma() {
const char *Start = getTok().getLoc().getPointer();
while (Lexer.isNot(AsmToken::EndOfStatement) &&
- Lexer.isNot(AsmToken::Comma) &&
- Lexer.isNot(AsmToken::Eof))
+ Lexer.isNot(AsmToken::Comma) && Lexer.isNot(AsmToken::Eof))
Lex();
const char *End = getTok().getLoc().getPointer();
return StringRef(Start, End - Start);
}
-/// ParseParenExpr - Parse a paren expression and return it.
+/// \brief Parse a paren expression and return it.
/// NOTE: This assumes the leading '(' has already been consumed.
///
/// parenexpr ::= expr)
///
-bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
- if (parseExpression(Res)) return true;
+bool AsmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+ if (parseExpression(Res))
+ return true;
if (Lexer.isNot(AsmToken::RParen))
return TokError("expected ')' in parentheses expression");
EndLoc = Lexer.getTok().getEndLoc();
@@ -730,13 +732,14 @@ bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return false;
}
-/// ParseBracketExpr - Parse a bracket expression and return it.
+/// \brief Parse a bracket expression and return it.
/// NOTE: This assumes the leading '[' has already been consumed.
///
/// bracketexpr ::= expr]
///
-bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
- if (parseExpression(Res)) return true;
+bool AsmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+ if (parseExpression(Res))
+ return true;
if (Lexer.isNot(AsmToken::RBrac))
return TokError("expected ']' in brackets expression");
EndLoc = Lexer.getTok().getEndLoc();
@@ -744,13 +747,13 @@ bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return false;
}
-/// ParsePrimaryExpr - Parse a primary expression and return it.
+/// \brief Parse a primary expression and return it.
/// primaryexpr ::= (parenexpr
/// primaryexpr ::= symbol
/// primaryexpr ::= number
/// primaryexpr ::= '.'
/// primaryexpr ::= ~,+,- primaryexpr
-bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
SMLoc FirstTokenLoc = getLexer().getLoc();
AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
switch (FirstTokenKind) {
@@ -761,36 +764,54 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return true;
case AsmToken::Exclaim:
Lex(); // Eat the operator.
- if (ParsePrimaryExpr(Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc))
return true;
Res = MCUnaryExpr::CreateLNot(Res, getContext());
return false;
case AsmToken::Dollar:
+ case AsmToken::At:
case AsmToken::String:
case AsmToken::Identifier: {
StringRef Identifier;
if (parseIdentifier(Identifier)) {
- if (FirstTokenKind == AsmToken::Dollar)
- return Error(FirstTokenLoc, "invalid token in expression");
- return true;
+ if (FirstTokenKind == AsmToken::Dollar) {
+ if (Lexer.getMAI().getDollarIsPC()) {
+ // This is a '$' reference, which references the current PC. Emit a
+ // temporary label to the streamer and refer to it.
+ MCSymbol *Sym = Ctx.CreateTempSymbol();
+ Out.EmitLabel(Sym);
+ Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+ getContext());
+ EndLoc = FirstTokenLoc;
+ return false;
+ } else
+ return Error(FirstTokenLoc, "invalid token in expression");
+ return true;
+ }
}
EndLoc = SMLoc::getFromPointer(Identifier.end());
// This is a symbol reference.
+ StringRef SymbolName = Identifier;
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
std::pair<StringRef, StringRef> Split = Identifier.split('@');
- MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
// Lookup the symbol variant if used.
- MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
if (Split.first.size() != Identifier.size()) {
Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
- if (Variant == MCSymbolRefExpr::VK_Invalid) {
+ if (Variant != MCSymbolRefExpr::VK_Invalid) {
+ SymbolName = Split.first;
+ } else if (MAI.doesAllowAtInName()) {
+ Variant = MCSymbolRefExpr::VK_None;
+ } else {
Variant = MCSymbolRefExpr::VK_None;
return TokError("invalid variant '" + Split.second + "'");
}
}
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
@@ -823,11 +844,11 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
Variant = MCSymbolRefExpr::VK_None;
return TokError("invalid variant '" + Split.second + "'");
}
- IDVal = Split.first;
+ IDVal = Split.first;
}
- if (IDVal == "f" || IDVal == "b"){
- MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal,
- IDVal == "f" ? 1 : 0);
+ if (IDVal == "f" || IDVal == "b") {
+ MCSymbol *Sym =
+ Ctx.GetDirectionalLocalSymbol(IntVal, IDVal == "f" ? 1 : 0);
Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
if (IDVal == "b" && Sym->isUndefined())
return Error(Loc, "invalid reference to undefined symbol");
@@ -857,27 +878,27 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
}
case AsmToken::LParen:
Lex(); // Eat the '('.
- return ParseParenExpr(Res, EndLoc);
+ return parseParenExpr(Res, EndLoc);
case AsmToken::LBrac:
if (!PlatformParser->HasBracketExpressions())
return TokError("brackets expression not supported on this target");
Lex(); // Eat the '['.
- return ParseBracketExpr(Res, EndLoc);
+ return parseBracketExpr(Res, EndLoc);
case AsmToken::Minus:
Lex(); // Eat the operator.
- if (ParsePrimaryExpr(Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc))
return true;
Res = MCUnaryExpr::CreateMinus(Res, getContext());
return false;
case AsmToken::Plus:
Lex(); // Eat the operator.
- if (ParsePrimaryExpr(Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc))
return true;
Res = MCUnaryExpr::CreatePlus(Res, getContext());
return false;
case AsmToken::Tilde:
Lex(); // Eat the operator.
- if (ParsePrimaryExpr(Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc))
return true;
Res = MCUnaryExpr::CreateNot(Res, getContext());
return false;
@@ -889,13 +910,13 @@ bool AsmParser::parseExpression(const MCExpr *&Res) {
return parseExpression(Res, EndLoc);
}
-bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
- return ParsePrimaryExpr(Res, EndLoc);
-}
-
const MCExpr *
-AsmParser::ApplyModifierToExpr(const MCExpr *E,
+AsmParser::applyModifierToExpr(const MCExpr *E,
MCSymbolRefExpr::VariantKind Variant) {
+ // Ask the target implementation about this expression first.
+ const MCExpr *NewE = getTargetParser().applyModifierToExpr(E, Variant, Ctx);
+ if (NewE)
+ return NewE;
// Recurse over the given expression, rebuilding it to apply the given variant
// if there is exactly one symbol.
switch (E->getKind()) {
@@ -907,8 +928,8 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E,
const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
if (SRE->getKind() != MCSymbolRefExpr::VK_None) {
- TokError("invalid variant on expression '" +
- getTok().getIdentifier() + "' (already modified)");
+ TokError("invalid variant on expression '" + getTok().getIdentifier() +
+ "' (already modified)");
return E;
}
@@ -917,7 +938,7 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E,
case MCExpr::Unary: {
const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
- const MCExpr *Sub = ApplyModifierToExpr(UE->getSubExpr(), Variant);
+ const MCExpr *Sub = applyModifierToExpr(UE->getSubExpr(), Variant);
if (!Sub)
return 0;
return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext());
@@ -925,14 +946,16 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E,
case MCExpr::Binary: {
const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
- const MCExpr *LHS = ApplyModifierToExpr(BE->getLHS(), Variant);
- const MCExpr *RHS = ApplyModifierToExpr(BE->getRHS(), Variant);
+ const MCExpr *LHS = applyModifierToExpr(BE->getLHS(), Variant);
+ const MCExpr *RHS = applyModifierToExpr(BE->getRHS(), Variant);
if (!LHS && !RHS)
return 0;
- if (!LHS) LHS = BE->getLHS();
- if (!RHS) RHS = BE->getRHS();
+ if (!LHS)
+ LHS = BE->getLHS();
+ if (!RHS)
+ RHS = BE->getRHS();
return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
}
@@ -941,7 +964,7 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E,
llvm_unreachable("Invalid expression kind!");
}
-/// parseExpression - Parse an expression and return it.
+/// \brief Parse an expression and return it.
///
/// expr ::= expr &&,|| expr -> lowest.
/// expr ::= expr |,^,&,! expr
@@ -954,7 +977,7 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E,
bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
// Parse the expression.
Res = 0;
- if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
+ if (parsePrimaryExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc))
return true;
// As a special case, we support 'a op b @ modifier' by rewriting the
@@ -967,11 +990,11 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
return TokError("unexpected symbol modifier following '@'");
MCSymbolRefExpr::VariantKind Variant =
- MCSymbolRefExpr::getVariantKindForName(getTok().getIdentifier());
+ MCSymbolRefExpr::getVariantKindForName(getTok().getIdentifier());
if (Variant == MCSymbolRefExpr::VK_Invalid)
return TokError("invalid variant '" + getTok().getIdentifier() + "'");
- const MCExpr *ModifiedRes = ApplyModifierToExpr(Res, Variant);
+ const MCExpr *ModifiedRes = applyModifierToExpr(Res, Variant);
if (!ModifiedRes) {
return TokError("invalid modifier '" + getTok().getIdentifier() +
"' (no symbols present)");
@@ -991,8 +1014,7 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
bool AsmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
Res = 0;
- return ParseParenExpr(Res, EndLoc) ||
- ParseBinOpRHS(1, Res, EndLoc);
+ return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
}
bool AsmParser::parseAbsoluteExpression(int64_t &Res) {
@@ -1012,9 +1034,9 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
MCBinaryExpr::Opcode &Kind) {
switch (K) {
default:
- return 0; // not a binop.
+ return 0; // not a binop.
- // Lowest Precedence: &&, ||
+ // Lowest Precedence: &&, ||
case AsmToken::AmpAmp:
Kind = MCBinaryExpr::LAnd;
return 1;
@@ -1022,10 +1044,9 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
Kind = MCBinaryExpr::LOr;
return 1;
-
- // Low Precedence: |, &, ^
- //
- // FIXME: gas seems to support '!' as an infix operator?
+ // Low Precedence: |, &, ^
+ //
+ // FIXME: gas seems to support '!' as an infix operator?
case AsmToken::Pipe:
Kind = MCBinaryExpr::Or;
return 2;
@@ -1036,7 +1057,7 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
Kind = MCBinaryExpr::And;
return 2;
- // Low Intermediate Precedence: ==, !=, <>, <, <=, >, >=
+ // Low Intermediate Precedence: ==, !=, <>, <, <=, >, >=
case AsmToken::EqualEqual:
Kind = MCBinaryExpr::EQ;
return 3;
@@ -1057,7 +1078,7 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
Kind = MCBinaryExpr::GTE;
return 3;
- // Intermediate Precedence: <<, >>
+ // Intermediate Precedence: <<, >>
case AsmToken::LessLess:
Kind = MCBinaryExpr::Shl;
return 4;
@@ -1065,7 +1086,7 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
Kind = MCBinaryExpr::Shr;
return 4;
- // High Intermediate Precedence: +, -
+ // High Intermediate Precedence: +, -
case AsmToken::Plus:
Kind = MCBinaryExpr::Add;
return 5;
@@ -1073,7 +1094,7 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
Kind = MCBinaryExpr::Sub;
return 5;
- // Highest Precedence: *, /, %
+ // Highest Precedence: *, /, %
case AsmToken::Star:
Kind = MCBinaryExpr::Mul;
return 6;
@@ -1086,10 +1107,9 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
}
}
-
-/// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
+/// \brief Parse all binary operators with precedence >= 'Precedence'.
/// Res contains the LHS of the expression on input.
-bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
+bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
SMLoc &EndLoc) {
while (1) {
MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
@@ -1104,15 +1124,15 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
// Eat the next primary expression.
const MCExpr *RHS;
- if (ParsePrimaryExpr(RHS, EndLoc)) return true;
+ if (parsePrimaryExpr(RHS, EndLoc))
+ return true;
// If BinOp binds less tightly with RHS than the operator after RHS, let
// the pending operator take RHS as its LHS.
MCBinaryExpr::Opcode Dummy;
unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
- if (TokPrec < NextTokPrec) {
- if (ParseBinOpRHS(TokPrec+1, RHS, EndLoc)) return true;
- }
+ if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
+ return true;
// Merge LHS and RHS according to operator.
Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext());
@@ -1123,7 +1143,7 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
/// ::= EndOfStatement
/// ::= Label* Directive ...Operands... EndOfStatement
/// ::= Label* Identifier OperandList* EndOfStatement
-bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
+bool AsmParser::parseStatement(ParseStatementInfo &Info) {
if (Lexer.is(AsmToken::EndOfStatement)) {
Out.AddBlankLine();
Lex();
@@ -1137,7 +1157,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
int64_t LocalLabelVal = -1;
// A full line comment is a '#' as the first token.
if (Lexer.is(AsmToken::Hash))
- return ParseCppHashLineFilenameComment(IDLoc);
+ return parseCppHashLineFilenameComment(IDLoc);
// Allow an integer followed by a ':' as a directional local label.
if (Lexer.is(AsmToken::Integer)) {
@@ -1168,34 +1188,34 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
// have to do this so that .endif isn't skipped in a ".if 0" block for
// example.
StringMap<DirectiveKind>::const_iterator DirKindIt =
- DirectiveKindMap.find(IDVal);
- DirectiveKind DirKind =
- (DirKindIt == DirectiveKindMap.end()) ? DK_NO_DIRECTIVE :
- DirKindIt->getValue();
+ DirectiveKindMap.find(IDVal);
+ DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
+ ? DK_NO_DIRECTIVE
+ : DirKindIt->getValue();
switch (DirKind) {
- default:
- break;
- case DK_IF:
- return ParseDirectiveIf(IDLoc);
- case DK_IFB:
- return ParseDirectiveIfb(IDLoc, true);
- case DK_IFNB:
- return ParseDirectiveIfb(IDLoc, false);
- case DK_IFC:
- return ParseDirectiveIfc(IDLoc, true);
- case DK_IFNC:
- return ParseDirectiveIfc(IDLoc, false);
- case DK_IFDEF:
- return ParseDirectiveIfdef(IDLoc, true);
- case DK_IFNDEF:
- case DK_IFNOTDEF:
- return ParseDirectiveIfdef(IDLoc, false);
- case DK_ELSEIF:
- return ParseDirectiveElseIf(IDLoc);
- case DK_ELSE:
- return ParseDirectiveElse(IDLoc);
- case DK_ENDIF:
- return ParseDirectiveEndIf(IDLoc);
+ default:
+ break;
+ case DK_IF:
+ return parseDirectiveIf(IDLoc);
+ case DK_IFB:
+ return parseDirectiveIfb(IDLoc, true);
+ case DK_IFNB:
+ return parseDirectiveIfb(IDLoc, false);
+ case DK_IFC:
+ return parseDirectiveIfc(IDLoc, true);
+ case DK_IFNC:
+ return parseDirectiveIfc(IDLoc, false);
+ case DK_IFDEF:
+ return parseDirectiveIfdef(IDLoc, true);
+ case DK_IFNDEF:
+ case DK_IFNOTDEF:
+ return parseDirectiveIfdef(IDLoc, false);
+ case DK_ELSEIF:
+ return parseDirectiveElseIf(IDLoc);
+ case DK_ELSE:
+ return parseDirectiveElse(IDLoc);
+ case DK_ENDIF:
+ return parseDirectiveEndIf(IDLoc);
}
// Ignore the statement if in the middle of inactive conditional
@@ -1242,6 +1262,8 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
IDLoc);
+ getTargetParser().onLabelParsed(Sym);
+
// Consume any end of statement token, if present, to avoid spurious
// AddBlankLine calls().
if (Lexer.is(AsmToken::EndOfStatement)) {
@@ -1257,24 +1279,24 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
// identifier '=' ... -> assignment statement
Lex();
- return ParseAssignment(IDVal, true);
+ return parseAssignment(IDVal, true);
default: // Normal instruction or directive.
break;
}
// If macros are enabled, check to see if this is a macro instantiation.
- if (MacrosEnabled())
- if (const MCAsmMacro *M = LookupMacro(IDVal)) {
- return HandleMacroEntry(M, IDLoc);
+ if (areMacrosEnabled())
+ if (const MCAsmMacro *M = lookupMacro(IDVal)) {
+ return handleMacroEntry(M, IDLoc);
}
// Otherwise, we have a normal instruction or directive.
-
+
// Directives start with "."
if (IDVal[0] == '.' && IDVal != ".") {
// There are several entities interested in parsing directives:
- //
+ //
// 1. The target-specific assembly parser. Some directives are target
// specific or may potentially behave differently on certain targets.
// 2. Asm parser extensions. For example, platform-specific parsers
@@ -1291,185 +1313,185 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
// Next, check the extention directive map to see if any extension has
// registered itself to parse this directive.
- std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
- ExtensionDirectiveMap.lookup(IDVal);
+ std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
+ ExtensionDirectiveMap.lookup(IDVal);
if (Handler.first)
return (*Handler.second)(Handler.first, IDVal, IDLoc);
// Finally, if no one else is interested in this directive, it must be
// generic and familiar to this class.
switch (DirKind) {
- default:
- break;
- case DK_SET:
- case DK_EQU:
- return ParseDirectiveSet(IDVal, true);
- case DK_EQUIV:
- return ParseDirectiveSet(IDVal, false);
- case DK_ASCII:
- return ParseDirectiveAscii(IDVal, false);
- case DK_ASCIZ:
- case DK_STRING:
- return ParseDirectiveAscii(IDVal, true);
- case DK_BYTE:
- return ParseDirectiveValue(1);
- case DK_SHORT:
- case DK_VALUE:
- case DK_2BYTE:
- return ParseDirectiveValue(2);
- case DK_LONG:
- case DK_INT:
- case DK_4BYTE:
- return ParseDirectiveValue(4);
- case DK_QUAD:
- case DK_8BYTE:
- return ParseDirectiveValue(8);
- case DK_SINGLE:
- case DK_FLOAT:
- return ParseDirectiveRealValue(APFloat::IEEEsingle);
- case DK_DOUBLE:
- return ParseDirectiveRealValue(APFloat::IEEEdouble);
- case DK_ALIGN: {
- bool IsPow2 = !getContext().getAsmInfo()->getAlignmentIsInBytes();
- return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1);
- }
- case DK_ALIGN32: {
- bool IsPow2 = !getContext().getAsmInfo()->getAlignmentIsInBytes();
- return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4);
- }
- case DK_BALIGN:
- return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
- case DK_BALIGNW:
- return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2);
- case DK_BALIGNL:
- return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4);
- case DK_P2ALIGN:
- return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
- case DK_P2ALIGNW:
- return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2);
- case DK_P2ALIGNL:
- return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
- case DK_ORG:
- return ParseDirectiveOrg();
- case DK_FILL:
- return ParseDirectiveFill();
- case DK_ZERO:
- return ParseDirectiveZero();
- case DK_EXTERN:
- eatToEndOfStatement(); // .extern is the default, ignore it.
- return false;
- case DK_GLOBL:
- case DK_GLOBAL:
- return ParseDirectiveSymbolAttribute(MCSA_Global);
- case DK_INDIRECT_SYMBOL:
- return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol);
- case DK_LAZY_REFERENCE:
- return ParseDirectiveSymbolAttribute(MCSA_LazyReference);
- case DK_NO_DEAD_STRIP:
- return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
- case DK_SYMBOL_RESOLVER:
- return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver);
- case DK_PRIVATE_EXTERN:
- return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern);
- case DK_REFERENCE:
- return ParseDirectiveSymbolAttribute(MCSA_Reference);
- case DK_WEAK_DEFINITION:
- return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition);
- case DK_WEAK_REFERENCE:
- return ParseDirectiveSymbolAttribute(MCSA_WeakReference);
- case DK_WEAK_DEF_CAN_BE_HIDDEN:
- return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
- case DK_COMM:
- case DK_COMMON:
- return ParseDirectiveComm(/*IsLocal=*/false);
- case DK_LCOMM:
- return ParseDirectiveComm(/*IsLocal=*/true);
- case DK_ABORT:
- return ParseDirectiveAbort();
- case DK_INCLUDE:
- return ParseDirectiveInclude();
- case DK_INCBIN:
- return ParseDirectiveIncbin();
- case DK_CODE16:
- case DK_CODE16GCC:
- return TokError(Twine(IDVal) + " not supported yet");
- case DK_REPT:
- return ParseDirectiveRept(IDLoc);
- case DK_IRP:
- return ParseDirectiveIrp(IDLoc);
- case DK_IRPC:
- return ParseDirectiveIrpc(IDLoc);
- case DK_ENDR:
- return ParseDirectiveEndr(IDLoc);
- case DK_BUNDLE_ALIGN_MODE:
- return ParseDirectiveBundleAlignMode();
- case DK_BUNDLE_LOCK:
- return ParseDirectiveBundleLock();
- case DK_BUNDLE_UNLOCK:
- return ParseDirectiveBundleUnlock();
- case DK_SLEB128:
- return ParseDirectiveLEB128(true);
- case DK_ULEB128:
- return ParseDirectiveLEB128(false);
- case DK_SPACE:
- case DK_SKIP:
- return ParseDirectiveSpace(IDVal);
- case DK_FILE:
- return ParseDirectiveFile(IDLoc);
- case DK_LINE:
- return ParseDirectiveLine();
- case DK_LOC:
- return ParseDirectiveLoc();
- case DK_STABS:
- return ParseDirectiveStabs();
- case DK_CFI_SECTIONS:
- return ParseDirectiveCFISections();
- case DK_CFI_STARTPROC:
- return ParseDirectiveCFIStartProc();
- case DK_CFI_ENDPROC:
- return ParseDirectiveCFIEndProc();
- case DK_CFI_DEF_CFA:
- return ParseDirectiveCFIDefCfa(IDLoc);
- case DK_CFI_DEF_CFA_OFFSET:
- return ParseDirectiveCFIDefCfaOffset();
- case DK_CFI_ADJUST_CFA_OFFSET:
- return ParseDirectiveCFIAdjustCfaOffset();
- case DK_CFI_DEF_CFA_REGISTER:
- return ParseDirectiveCFIDefCfaRegister(IDLoc);
- case DK_CFI_OFFSET:
- return ParseDirectiveCFIOffset(IDLoc);
- case DK_CFI_REL_OFFSET:
- return ParseDirectiveCFIRelOffset(IDLoc);
- case DK_CFI_PERSONALITY:
- return ParseDirectiveCFIPersonalityOrLsda(true);
- case DK_CFI_LSDA:
- return ParseDirectiveCFIPersonalityOrLsda(false);
- case DK_CFI_REMEMBER_STATE:
- return ParseDirectiveCFIRememberState();
- case DK_CFI_RESTORE_STATE:
- return ParseDirectiveCFIRestoreState();
- case DK_CFI_SAME_VALUE:
- return ParseDirectiveCFISameValue(IDLoc);
- case DK_CFI_RESTORE:
- return ParseDirectiveCFIRestore(IDLoc);
- case DK_CFI_ESCAPE:
- return ParseDirectiveCFIEscape();
- case DK_CFI_SIGNAL_FRAME:
- return ParseDirectiveCFISignalFrame();
- case DK_CFI_UNDEFINED:
- return ParseDirectiveCFIUndefined(IDLoc);
- case DK_CFI_REGISTER:
- return ParseDirectiveCFIRegister(IDLoc);
- case DK_MACROS_ON:
- case DK_MACROS_OFF:
- return ParseDirectiveMacrosOnOff(IDVal);
- case DK_MACRO:
- return ParseDirectiveMacro(IDLoc);
- case DK_ENDM:
- case DK_ENDMACRO:
- return ParseDirectiveEndMacro(IDVal);
- case DK_PURGEM:
- return ParseDirectivePurgeMacro(IDLoc);
+ default:
+ break;
+ case DK_SET:
+ case DK_EQU:
+ return parseDirectiveSet(IDVal, true);
+ case DK_EQUIV:
+ return parseDirectiveSet(IDVal, false);
+ case DK_ASCII:
+ return parseDirectiveAscii(IDVal, false);
+ case DK_ASCIZ:
+ case DK_STRING:
+ return parseDirectiveAscii(IDVal, true);
+ case DK_BYTE:
+ return parseDirectiveValue(1);
+ case DK_SHORT:
+ case DK_VALUE:
+ case DK_2BYTE:
+ return parseDirectiveValue(2);
+ case DK_LONG:
+ case DK_INT:
+ case DK_4BYTE:
+ return parseDirectiveValue(4);
+ case DK_QUAD:
+ case DK_8BYTE:
+ return parseDirectiveValue(8);
+ case DK_SINGLE:
+ case DK_FLOAT:
+ return parseDirectiveRealValue(APFloat::IEEEsingle);
+ case DK_DOUBLE:
+ return parseDirectiveRealValue(APFloat::IEEEdouble);
+ case DK_ALIGN: {
+ bool IsPow2 = !getContext().getAsmInfo()->getAlignmentIsInBytes();
+ return parseDirectiveAlign(IsPow2, /*ExprSize=*/1);
+ }
+ case DK_ALIGN32: {
+ bool IsPow2 = !getContext().getAsmInfo()->getAlignmentIsInBytes();
+ return parseDirectiveAlign(IsPow2, /*ExprSize=*/4);
+ }
+ case DK_BALIGN:
+ return parseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
+ case DK_BALIGNW:
+ return parseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2);
+ case DK_BALIGNL:
+ return parseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4);
+ case DK_P2ALIGN:
+ return parseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
+ case DK_P2ALIGNW:
+ return parseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2);
+ case DK_P2ALIGNL:
+ return parseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
+ case DK_ORG:
+ return parseDirectiveOrg();
+ case DK_FILL:
+ return parseDirectiveFill();
+ case DK_ZERO:
+ return parseDirectiveZero();
+ case DK_EXTERN:
+ eatToEndOfStatement(); // .extern is the default, ignore it.
+ return false;
+ case DK_GLOBL:
+ case DK_GLOBAL:
+ return parseDirectiveSymbolAttribute(MCSA_Global);
+ case DK_LAZY_REFERENCE:
+ return parseDirectiveSymbolAttribute(MCSA_LazyReference);
+ case DK_NO_DEAD_STRIP:
+ return parseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
+ case DK_SYMBOL_RESOLVER:
+ return parseDirectiveSymbolAttribute(MCSA_SymbolResolver);
+ case DK_PRIVATE_EXTERN:
+ return parseDirectiveSymbolAttribute(MCSA_PrivateExtern);
+ case DK_REFERENCE:
+ return parseDirectiveSymbolAttribute(MCSA_Reference);
+ case DK_WEAK_DEFINITION:
+ return parseDirectiveSymbolAttribute(MCSA_WeakDefinition);
+ case DK_WEAK_REFERENCE:
+ return parseDirectiveSymbolAttribute(MCSA_WeakReference);
+ case DK_WEAK_DEF_CAN_BE_HIDDEN:
+ return parseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
+ case DK_COMM:
+ case DK_COMMON:
+ return parseDirectiveComm(/*IsLocal=*/false);
+ case DK_LCOMM:
+ return parseDirectiveComm(/*IsLocal=*/true);
+ case DK_ABORT:
+ return parseDirectiveAbort();
+ case DK_INCLUDE:
+ return parseDirectiveInclude();
+ case DK_INCBIN:
+ return parseDirectiveIncbin();
+ case DK_CODE16:
+ case DK_CODE16GCC:
+ return TokError(Twine(IDVal) + " not supported yet");
+ case DK_REPT:
+ return parseDirectiveRept(IDLoc);
+ case DK_IRP:
+ return parseDirectiveIrp(IDLoc);
+ case DK_IRPC:
+ return parseDirectiveIrpc(IDLoc);
+ case DK_ENDR:
+ return parseDirectiveEndr(IDLoc);
+ case DK_BUNDLE_ALIGN_MODE:
+ return parseDirectiveBundleAlignMode();
+ case DK_BUNDLE_LOCK:
+ return parseDirectiveBundleLock();
+ case DK_BUNDLE_UNLOCK:
+ return parseDirectiveBundleUnlock();
+ case DK_SLEB128:
+ return parseDirectiveLEB128(true);
+ case DK_ULEB128:
+ return parseDirectiveLEB128(false);
+ case DK_SPACE:
+ case DK_SKIP:
+ return parseDirectiveSpace(IDVal);
+ case DK_FILE:
+ return parseDirectiveFile(IDLoc);
+ case DK_LINE:
+ return parseDirectiveLine();
+ case DK_LOC:
+ return parseDirectiveLoc();
+ case DK_STABS:
+ return parseDirectiveStabs();
+ case DK_CFI_SECTIONS:
+ return parseDirectiveCFISections();
+ case DK_CFI_STARTPROC:
+ return parseDirectiveCFIStartProc();
+ case DK_CFI_ENDPROC:
+ return parseDirectiveCFIEndProc();
+ case DK_CFI_DEF_CFA:
+ return parseDirectiveCFIDefCfa(IDLoc);
+ case DK_CFI_DEF_CFA_OFFSET:
+ return parseDirectiveCFIDefCfaOffset();
+ case DK_CFI_ADJUST_CFA_OFFSET:
+ return parseDirectiveCFIAdjustCfaOffset();
+ case DK_CFI_DEF_CFA_REGISTER:
+ return parseDirectiveCFIDefCfaRegister(IDLoc);
+ case DK_CFI_OFFSET:
+ return parseDirectiveCFIOffset(IDLoc);
+ case DK_CFI_REL_OFFSET:
+ return parseDirectiveCFIRelOffset(IDLoc);
+ case DK_CFI_PERSONALITY:
+ return parseDirectiveCFIPersonalityOrLsda(true);
+ case DK_CFI_LSDA:
+ return parseDirectiveCFIPersonalityOrLsda(false);
+ case DK_CFI_REMEMBER_STATE:
+ return parseDirectiveCFIRememberState();
+ case DK_CFI_RESTORE_STATE:
+ return parseDirectiveCFIRestoreState();
+ case DK_CFI_SAME_VALUE:
+ return parseDirectiveCFISameValue(IDLoc);
+ case DK_CFI_RESTORE:
+ return parseDirectiveCFIRestore(IDLoc);
+ case DK_CFI_ESCAPE:
+ return parseDirectiveCFIEscape();
+ case DK_CFI_SIGNAL_FRAME:
+ return parseDirectiveCFISignalFrame();
+ case DK_CFI_UNDEFINED:
+ return parseDirectiveCFIUndefined(IDLoc);
+ case DK_CFI_REGISTER:
+ return parseDirectiveCFIRegister(IDLoc);
+ case DK_CFI_WINDOW_SAVE:
+ return parseDirectiveCFIWindowSave();
+ case DK_MACROS_ON:
+ case DK_MACROS_OFF:
+ return parseDirectiveMacrosOnOff(IDVal);
+ case DK_MACRO:
+ return parseDirectiveMacro(IDLoc);
+ case DK_ENDM:
+ case DK_ENDMACRO:
+ return parseDirectiveEndMacro(IDVal);
+ case DK_PURGEM:
+ return parseDirectivePurgeMacro(IDLoc);
}
return Error(IDLoc, "unknown directive");
@@ -1478,19 +1500,19 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
// __asm _emit or __asm __emit
if (ParsingInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
IDVal == "_EMIT" || IDVal == "__EMIT"))
- return ParseDirectiveMSEmit(IDLoc, Info, IDVal.size());
+ return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
// __asm align
if (ParsingInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
- return ParseDirectiveMSAlign(IDLoc, Info);
+ return parseDirectiveMSAlign(IDLoc, Info);
checkForValidSection();
// Canonicalize the opcode to lower case.
std::string OpcodeStr = IDVal.lower();
ParseInstructionInfo IInfo(Info.AsmRewrites);
- bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr,
- IDLoc, Info.ParsedOperands);
+ bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, IDLoc,
+ Info.ParsedOperands);
Info.ParseError = HadError;
// Dump the parsed representation, if requested.
@@ -1505,7 +1527,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
}
OS << "]";
- PrintMessage(IDLoc, SourceMgr::DK_Note, OS.str());
+ printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
}
// If we are generating dwarf for assembly source files and the current
@@ -1513,49 +1535,49 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
// the instruction.
if (!HadError && getContext().getGenDwarfForAssembly() &&
getContext().getGenDwarfSection() ==
- getStreamer().getCurrentSection().first) {
+ getStreamer().getCurrentSection().first) {
unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
// If we previously parsed a cpp hash file line comment then make sure the
// current Dwarf File is for the CppHashFilename if not then emit the
// Dwarf File table for it and adjust the line number for the .loc.
- const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
- getContext().getMCDwarfFiles();
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
+ getContext().getMCDwarfFiles();
if (CppHashFilename.size() != 0) {
if (MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() !=
CppHashFilename)
getStreamer().EmitDwarfFileDirective(
- getContext().nextGenDwarfFileNumber(), StringRef(), CppHashFilename);
-
- // Since SrcMgr.FindLineNumber() is slow and messes up the SourceMgr's
- // cache with the different Loc from the call above we save the last
- // info we queried here with SrcMgr.FindLineNumber().
- unsigned CppHashLocLineNo;
- if (LastQueryIDLoc == CppHashLoc && LastQueryBuffer == CppHashBuf)
- CppHashLocLineNo = LastQueryLine;
- else {
- CppHashLocLineNo = SrcMgr.FindLineNumber(CppHashLoc, CppHashBuf);
- LastQueryLine = CppHashLocLineNo;
- LastQueryIDLoc = CppHashLoc;
- LastQueryBuffer = CppHashBuf;
- }
- Line = CppHashLineNumber - 1 + (Line - CppHashLocLineNo);
+ getContext().nextGenDwarfFileNumber(), StringRef(),
+ CppHashFilename);
+
+ // Since SrcMgr.FindLineNumber() is slow and messes up the SourceMgr's
+ // cache with the different Loc from the call above we save the last
+ // info we queried here with SrcMgr.FindLineNumber().
+ unsigned CppHashLocLineNo;
+ if (LastQueryIDLoc == CppHashLoc && LastQueryBuffer == CppHashBuf)
+ CppHashLocLineNo = LastQueryLine;
+ else {
+ CppHashLocLineNo = SrcMgr.FindLineNumber(CppHashLoc, CppHashBuf);
+ LastQueryLine = CppHashLocLineNo;
+ LastQueryIDLoc = CppHashLoc;
+ LastQueryBuffer = CppHashBuf;
+ }
+ Line = CppHashLineNumber - 1 + (Line - CppHashLocLineNo);
}
- getStreamer().EmitDwarfLocDirective(getContext().getGenDwarfFileNumber(),
- Line, 0, DWARF2_LINE_DEFAULT_IS_STMT ?
- DWARF2_FLAG_IS_STMT : 0, 0, 0,
- StringRef());
+ getStreamer().EmitDwarfLocDirective(
+ getContext().getGenDwarfFileNumber(), Line, 0,
+ DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0,
+ StringRef());
}
// If parsing succeeded, match the instruction.
if (!HadError) {
unsigned ErrorInfo;
- HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, Info.Opcode,
- Info.ParsedOperands,
- Out, ErrorInfo,
- ParsingInlineAsm);
+ HadError = getTargetParser().MatchAndEmitInstruction(
+ IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
+ ParsingInlineAsm);
}
// Don't skip the rest of the line, the instruction parser is responsible for
@@ -1563,25 +1585,25 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
return false;
}
-/// EatToEndOfLine uses the Lexer to eat the characters to the end of the line
+/// eatToEndOfLine uses the Lexer to eat the characters to the end of the line
/// since they may not be able to be tokenized to get to the end of line token.
-void AsmParser::EatToEndOfLine() {
+void AsmParser::eatToEndOfLine() {
if (!Lexer.is(AsmToken::EndOfStatement))
Lexer.LexUntilEndOfLine();
- // Eat EOL.
- Lex();
+ // Eat EOL.
+ Lex();
}
-/// ParseCppHashLineFilenameComment as this:
+/// parseCppHashLineFilenameComment as this:
/// ::= # number "filename"
/// or just as a full line comment if it doesn't have a number and a string.
-bool AsmParser::ParseCppHashLineFilenameComment(const SMLoc &L) {
+bool AsmParser::parseCppHashLineFilenameComment(const SMLoc &L) {
Lex(); // Eat the hash token.
if (getLexer().isNot(AsmToken::Integer)) {
// Consume the line since in cases it is not a well-formed line directive,
// as if were simply a full line comment.
- EatToEndOfLine();
+ eatToEndOfLine();
return false;
}
@@ -1589,13 +1611,13 @@ bool AsmParser::ParseCppHashLineFilenameComment(const SMLoc &L) {
Lex();
if (getLexer().isNot(AsmToken::String)) {
- EatToEndOfLine();
+ eatToEndOfLine();
return false;
}
StringRef Filename = getTok().getString();
// Get rid of the enclosing quotes.
- Filename = Filename.substr(1, Filename.size()-2);
+ Filename = Filename.substr(1, Filename.size() - 2);
// Save the SMLoc, Filename and LineNumber for later use by diagnostics.
CppHashLoc = L;
@@ -1604,14 +1626,14 @@ bool AsmParser::ParseCppHashLineFilenameComment(const SMLoc &L) {
CppHashBuf = CurBuffer;
// Ignore any trailing characters, they're just comment.
- EatToEndOfLine();
+ eatToEndOfLine();
return false;
}
-/// DiagHandler - will use the last parsed cpp hash line filename comment
+/// \brief will use the last parsed cpp hash line filename comment
/// for the Filename and LineNo if any in the diagnostic.
void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
- const AsmParser *Parser = static_cast<const AsmParser*>(Context);
+ const AsmParser *Parser = static_cast<const AsmParser *>(Context);
raw_ostream &OS = errs();
const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
@@ -1619,19 +1641,18 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
int DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
int CppHashBuf = Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc);
- // Like SourceMgr::PrintMessage() we need to print the include stack if any
+ // Like SourceMgr::printMessage() we need to print the include stack if any
// before printing the message.
int DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
if (!Parser->SavedDiagHandler && DiagCurBuffer > 0) {
- SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
- DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
+ SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
+ DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
}
// If we have not parsed a cpp hash line filename comment or the source
// manager changed or buffer changed (like in a nested include) then just
// print the normal diagnostic using its Filename and LineNo.
- if (!Parser->CppHashLineNumber ||
- &DiagSrcMgr != &Parser->SrcMgr ||
+ if (!Parser->CppHashLineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
DiagBuf != CppHashBuf) {
if (Parser->SavedDiagHandler)
Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
@@ -1643,17 +1664,16 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
// Use the CppHashFilename and calculate a line number based on the
// CppHashLoc and CppHashLineNumber relative to this Diag's SMLoc for
// the diagnostic.
- const std::string Filename = Parser->CppHashFilename;
+ const std::string &Filename = Parser->CppHashFilename;
int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
int CppHashLocLineNo =
Parser->SrcMgr.FindLineNumber(Parser->CppHashLoc, CppHashBuf);
- int LineNo = Parser->CppHashLineNumber - 1 +
- (DiagLocLineNo - CppHashLocLineNo);
+ int LineNo =
+ Parser->CppHashLineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
- SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(),
- Filename, LineNo, Diag.getColumnNo(),
- Diag.getKind(), Diag.getMessage(),
+ SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
+ Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
Diag.getLineContents(), Diag.getRanges());
if (Parser->SavedDiagHandler)
@@ -1673,8 +1693,7 @@ static bool isIdentifierChar(char c) {
bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
const MCAsmMacroParameters &Parameters,
- const MCAsmMacroArguments &A,
- const SMLoc &L) {
+ const MCAsmMacroArguments &A, const SMLoc &L) {
unsigned NParameters = Parameters.size();
if (NParameters != 0 && NParameters != A.size())
return Error(L, "Wrong number of arguments");
@@ -1710,27 +1729,28 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
break;
if (!NParameters) {
- switch (Body[Pos+1]) {
- // $$ => $
+ switch (Body[Pos + 1]) {
+ // $$ => $
case '$':
OS << '$';
break;
- // $n => number of arguments
+ // $n => number of arguments
case 'n':
OS << A.size();
break;
- // $[0-9] => argument
+ // $[0-9] => argument
default: {
// Missing arguments are ignored.
- unsigned Index = Body[Pos+1] - '0';
+ unsigned Index = Body[Pos + 1] - '0';
if (Index >= A.size())
break;
// Otherwise substitute with the token values, with spaces eliminated.
for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
- ie = A[Index].end(); it != ie; ++it)
+ ie = A[Index].end();
+ it != ie; ++it)
OS << it->getString();
break;
}
@@ -1741,23 +1761,24 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
while (isIdentifierChar(Body[I]) && I + 1 != End)
++I;
- const char *Begin = Body.data() + Pos +1;
- StringRef Argument(Begin, I - (Pos +1));
+ const char *Begin = Body.data() + Pos + 1;
+ StringRef Argument(Begin, I - (Pos + 1));
unsigned Index = 0;
for (; Index < NParameters; ++Index)
if (Parameters[Index].first == Argument)
break;
if (Index == NParameters) {
- if (Body[Pos+1] == '(' && Body[Pos+2] == ')')
- Pos += 3;
- else {
- OS << '\\' << Argument;
- Pos = I;
- }
+ if (Body[Pos + 1] == '(' && Body[Pos + 2] == ')')
+ Pos += 3;
+ else {
+ OS << '\\' << Argument;
+ Pos = I;
+ }
} else {
for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
- ie = A[Index].end(); it != ie; ++it)
+ ie = A[Index].end();
+ it != ie; ++it)
if (it->getKind() == AsmToken::String)
OS << it->getStringContents();
else
@@ -1773,48 +1794,43 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
return false;
}
-MacroInstantiation::MacroInstantiation(const MCAsmMacro *M, SMLoc IL,
- int EB, SMLoc EL,
- MemoryBuffer *I)
- : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitBuffer(EB),
- ExitLoc(EL)
-{
-}
+MacroInstantiation::MacroInstantiation(const MCAsmMacro *M, SMLoc IL, int EB,
+ SMLoc EL, MemoryBuffer *I)
+ : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitBuffer(EB),
+ ExitLoc(EL) {}
-static bool IsOperator(AsmToken::TokenKind kind)
-{
- switch (kind)
- {
- default:
- return false;
- case AsmToken::Plus:
- case AsmToken::Minus:
- case AsmToken::Tilde:
- case AsmToken::Slash:
- case AsmToken::Star:
- case AsmToken::Dot:
- case AsmToken::Equal:
- case AsmToken::EqualEqual:
- case AsmToken::Pipe:
- case AsmToken::PipePipe:
- case AsmToken::Caret:
- case AsmToken::Amp:
- case AsmToken::AmpAmp:
- case AsmToken::Exclaim:
- case AsmToken::ExclaimEqual:
- case AsmToken::Percent:
- case AsmToken::Less:
- case AsmToken::LessEqual:
- case AsmToken::LessLess:
- case AsmToken::LessGreater:
- case AsmToken::Greater:
- case AsmToken::GreaterEqual:
- case AsmToken::GreaterGreater:
- return true;
+static bool isOperator(AsmToken::TokenKind kind) {
+ switch (kind) {
+ default:
+ return false;
+ case AsmToken::Plus:
+ case AsmToken::Minus:
+ case AsmToken::Tilde:
+ case AsmToken::Slash:
+ case AsmToken::Star:
+ case AsmToken::Dot:
+ case AsmToken::Equal:
+ case AsmToken::EqualEqual:
+ case AsmToken::Pipe:
+ case AsmToken::PipePipe:
+ case AsmToken::Caret:
+ case AsmToken::Amp:
+ case AsmToken::AmpAmp:
+ case AsmToken::Exclaim:
+ case AsmToken::ExclaimEqual:
+ case AsmToken::Percent:
+ case AsmToken::Less:
+ case AsmToken::LessEqual:
+ case AsmToken::LessLess:
+ case AsmToken::LessGreater:
+ case AsmToken::Greater:
+ case AsmToken::GreaterEqual:
+ case AsmToken::GreaterGreater:
+ return true;
}
}
-bool AsmParser::ParseMacroArgument(MCAsmMacroArgument &MA,
+bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA,
AsmToken::TokenKind &ArgumentDelimiter) {
unsigned ParenLevel = 0;
unsigned AddTokens = 0;
@@ -1848,7 +1864,7 @@ bool AsmParser::ParseMacroArgument(MCAsmMacroArgument &MA,
// one into this argument
if (ArgumentDelimiter == AsmToken::Space ||
ArgumentDelimiter == AsmToken::Eof) {
- if (IsOperator(Lexer.getKind())) {
+ if (isOperator(Lexer.getKind())) {
// Check to see whether the token is used as an operator,
// or part of an identifier
const char *NextChar = getTok().getEndLoc().getPointer();
@@ -1858,14 +1874,14 @@ bool AsmParser::ParseMacroArgument(MCAsmMacroArgument &MA,
if (!AddTokens && ParenLevel == 0) {
if (ArgumentDelimiter == AsmToken::Eof &&
- !IsOperator(Lexer.getKind()))
+ !isOperator(Lexer.getKind()))
ArgumentDelimiter = AsmToken::Space;
break;
}
}
}
- // HandleMacroEntry relies on not advancing the lexer here
+ // handleMacroEntry relies on not advancing the lexer here
// to be able to fill in the remaining default parameter values
if (Lexer.is(AsmToken::EndOfStatement))
break;
@@ -1890,10 +1906,11 @@ bool AsmParser::ParseMacroArgument(MCAsmMacroArgument &MA,
}
// Parse the macro instantiation arguments.
-bool AsmParser::ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A) {
+bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
+ MCAsmMacroArguments &A) {
const unsigned NParameters = M ? M->Parameters.size() : 0;
// Argument delimiter is initially unknown. It will be set by
- // ParseMacroArgument()
+ // parseMacroArgument()
AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
// Parse two kinds of macro invocations:
@@ -1903,7 +1920,7 @@ bool AsmParser::ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A)
++Parameter) {
MCAsmMacroArgument MA;
- if (ParseMacroArgument(MA, ArgumentDelimiter))
+ if (parseMacroArgument(MA, ArgumentDelimiter))
return true;
if (!MA.empty() || !NParameters)
@@ -1934,31 +1951,31 @@ bool AsmParser::ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A)
return TokError("Too many arguments");
}
-const MCAsmMacro* AsmParser::LookupMacro(StringRef Name) {
- StringMap<MCAsmMacro*>::iterator I = MacroMap.find(Name);
+const MCAsmMacro *AsmParser::lookupMacro(StringRef Name) {
+ StringMap<MCAsmMacro *>::iterator I = MacroMap.find(Name);
return (I == MacroMap.end()) ? NULL : I->getValue();
}
-void AsmParser::DefineMacro(StringRef Name, const MCAsmMacro& Macro) {
+void AsmParser::defineMacro(StringRef Name, const MCAsmMacro &Macro) {
MacroMap[Name] = new MCAsmMacro(Macro);
}
-void AsmParser::UndefineMacro(StringRef Name) {
- StringMap<MCAsmMacro*>::iterator I = MacroMap.find(Name);
+void AsmParser::undefineMacro(StringRef Name) {
+ StringMap<MCAsmMacro *>::iterator I = MacroMap.find(Name);
if (I != MacroMap.end()) {
delete I->getValue();
MacroMap.erase(I);
}
}
-bool AsmParser::HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
+bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
// Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate
// this, although we should protect against infinite loops.
if (ActiveMacros.size() == 20)
return TokError("macros cannot be nested more than 20 levels deep");
MCAsmMacroArguments A;
- if (ParseMacroArguments(M, A))
+ if (parseMacroArguments(M, A))
return true;
// Remove any trailing empty arguments. Do this after-the-fact as we have
@@ -1981,14 +1998,12 @@ bool AsmParser::HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
OS << ".endmacro\n";
MemoryBuffer *Instantiation =
- MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
+ MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
// Create the macro instantiation object and add to the current macro
// instantiation stack.
- MacroInstantiation *MI = new MacroInstantiation(M, NameLoc,
- CurBuffer,
- getTok().getLoc(),
- Instantiation);
+ MacroInstantiation *MI = new MacroInstantiation(
+ M, NameLoc, CurBuffer, getTok().getLoc(), Instantiation);
ActiveMacros.push_back(MI);
// Jump to the macro instantiation and prime the lexer.
@@ -1999,9 +2014,9 @@ bool AsmParser::HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
return false;
}
-void AsmParser::HandleMacroExit() {
+void AsmParser::handleMacroExit() {
// Jump to the EndOfStatement we should return to, and consume it.
- JumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer);
+ jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer);
Lex();
// Pop the instantiation entry.
@@ -2009,29 +2024,30 @@ void AsmParser::HandleMacroExit() {
ActiveMacros.pop_back();
}
-static bool IsUsedIn(const MCSymbol *Sym, const MCExpr *Value) {
+static bool isUsedIn(const MCSymbol *Sym, const MCExpr *Value) {
switch (Value->getKind()) {
case MCExpr::Binary: {
- const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Value);
- return IsUsedIn(Sym, BE->getLHS()) || IsUsedIn(Sym, BE->getRHS());
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Value);
+ return isUsedIn(Sym, BE->getLHS()) || isUsedIn(Sym, BE->getRHS());
}
case MCExpr::Target:
case MCExpr::Constant:
return false;
case MCExpr::SymbolRef: {
- const MCSymbol &S = static_cast<const MCSymbolRefExpr*>(Value)->getSymbol();
+ const MCSymbol &S =
+ static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
if (S.isVariable())
- return IsUsedIn(Sym, S.getVariableValue());
+ return isUsedIn(Sym, S.getVariableValue());
return &S == Sym;
}
case MCExpr::Unary:
- return IsUsedIn(Sym, static_cast<const MCUnaryExpr*>(Value)->getSubExpr());
+ return isUsedIn(Sym, static_cast<const MCUnaryExpr *>(Value)->getSubExpr());
}
llvm_unreachable("Unknown expr kind!");
}
-bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef,
+bool AsmParser::parseAssignment(StringRef Name, bool allow_redef,
bool NoDeadStrip) {
// FIXME: Use better location, we should use proper tokens.
SMLoc EqualLoc = Lexer.getLoc();
@@ -2064,7 +2080,7 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef,
//
// FIXME: Diagnostics. Note the location of the definition as a label.
// FIXME: Diagnose assignment to protected identifier (e.g., register name).
- if (IsUsedIn(Sym, Value))
+ if (isUsedIn(Sym, Value))
return Error(EqualLoc, "Recursive use of '" + Name + "'");
else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
; // Allow redefinitions of undefined symbols only used in directives.
@@ -2076,7 +2092,7 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef,
return Error(EqualLoc, "invalid assignment to '" + Name + "'");
else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
- Name + "'");
+ Name + "'");
// Don't count these checks as uses.
Sym->setUsed(false);
@@ -2090,7 +2106,6 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef,
if (NoDeadStrip)
Out.EmitSymbolAttribute(Sym, MCSA_NoDeadStrip);
-
return false;
}
@@ -2099,31 +2114,30 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef,
/// ::= string
bool AsmParser::parseIdentifier(StringRef &Res) {
// The assembler has relaxed rules for accepting identifiers, in particular we
- // allow things like '.globl $foo', which would normally be separate
- // tokens. At this level, we have already lexed so we cannot (currently)
+ // allow things like '.globl $foo' and '.def @feat.00', which would normally be
+ // separate tokens. At this level, we have already lexed so we cannot (currently)
// handle this as a context dependent token, instead we detect adjacent tokens
// and return the combined identifier.
- if (Lexer.is(AsmToken::Dollar)) {
- SMLoc DollarLoc = getLexer().getLoc();
+ if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
+ SMLoc PrefixLoc = getLexer().getLoc();
- // Consume the dollar sign, and check for a following identifier.
+ // Consume the prefix character, and check for a following identifier.
Lex();
if (Lexer.isNot(AsmToken::Identifier))
return true;
- // We have a '$' followed by an identifier, make sure they are adjacent.
- if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer())
+ // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
+ if (PrefixLoc.getPointer() + 1 != getTok().getLoc().getPointer())
return true;
// Construct the joined identifier and consume the token.
- Res = StringRef(DollarLoc.getPointer(),
- getTok().getIdentifier().size() + 1);
+ Res =
+ StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
Lex();
return false;
}
- if (Lexer.isNot(AsmToken::Identifier) &&
- Lexer.isNot(AsmToken::String))
+ if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
return true;
Res = getTok().getIdentifier();
@@ -2133,11 +2147,11 @@ bool AsmParser::parseIdentifier(StringRef &Res) {
return false;
}
-/// ParseDirectiveSet:
+/// parseDirectiveSet:
/// ::= .equ identifier ',' expression
/// ::= .equiv identifier ',' expression
/// ::= .set identifier ',' expression
-bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) {
+bool AsmParser::parseDirectiveSet(StringRef IDVal, bool allow_redef) {
StringRef Name;
if (parseIdentifier(Name))
@@ -2147,7 +2161,7 @@ bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) {
return TokError("unexpected token in '" + Twine(IDVal) + "'");
Lex();
- return ParseAssignment(Name, allow_redef, true);
+ return parseAssignment(Name, allow_redef, true);
}
bool AsmParser::parseEscapedString(std::string &Data) {
@@ -2168,15 +2182,15 @@ bool AsmParser::parseEscapedString(std::string &Data) {
return TokError("unexpected backslash at end of string");
// Recognize octal sequences.
- if ((unsigned) (Str[i] - '0') <= 7) {
+ if ((unsigned)(Str[i] - '0') <= 7) {
// Consume up to three octal characters.
unsigned Value = Str[i] - '0';
- if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+ if (i + 1 != e && ((unsigned)(Str[i + 1] - '0')) <= 7) {
++i;
Value = Value * 8 + (Str[i] - '0');
- if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+ if (i + 1 != e && ((unsigned)(Str[i + 1] - '0')) <= 7) {
++i;
Value = Value * 8 + (Str[i] - '0');
}
@@ -2185,7 +2199,7 @@ bool AsmParser::parseEscapedString(std::string &Data) {
if (Value > 255)
return TokError("invalid octal escape sequence (out of range)");
- Data += (unsigned char) Value;
+ Data += (unsigned char)Value;
continue;
}
@@ -2208,9 +2222,9 @@ bool AsmParser::parseEscapedString(std::string &Data) {
return false;
}
-/// ParseDirectiveAscii:
+/// parseDirectiveAscii:
/// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
-bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
+bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
checkForValidSection();
@@ -2241,9 +2255,9 @@ bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
return false;
}
-/// ParseDirectiveValue
+/// parseDirectiveValue
/// ::= (.byte | .short | ... ) [ expression (, expression)* ]
-bool AsmParser::ParseDirectiveValue(unsigned Size) {
+bool AsmParser::parseDirectiveValue(unsigned Size) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
checkForValidSection();
@@ -2277,9 +2291,9 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) {
return false;
}
-/// ParseDirectiveRealValue
+/// parseDirectiveRealValue
/// ::= (.single | .double) [ expression (, expression)* ]
-bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) {
+bool AsmParser::parseDirectiveRealValue(const fltSemantics &Semantics) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
checkForValidSection();
@@ -2309,7 +2323,7 @@ bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) {
else
return TokError("invalid floating point literal");
} else if (Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) ==
- APFloat::opInvalidOp)
+ APFloat::opInvalidOp)
return TokError("invalid floating point literal");
if (IsNeg)
Value.changeSign();
@@ -2335,9 +2349,9 @@ bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) {
return false;
}
-/// ParseDirectiveZero
+/// parseDirectiveZero
/// ::= .zero expression
-bool AsmParser::ParseDirectiveZero() {
+bool AsmParser::parseDirectiveZero() {
checkForValidSection();
int64_t NumBytes;
@@ -2361,35 +2375,40 @@ bool AsmParser::ParseDirectiveZero() {
return false;
}
-/// ParseDirectiveFill
-/// ::= .fill expression , expression , expression
-bool AsmParser::ParseDirectiveFill() {
+/// parseDirectiveFill
+/// ::= .fill expression [ , expression [ , expression ] ]
+bool AsmParser::parseDirectiveFill() {
checkForValidSection();
int64_t NumValues;
if (parseAbsoluteExpression(NumValues))
return true;
- if (getLexer().isNot(AsmToken::Comma))
- return TokError("unexpected token in '.fill' directive");
- Lex();
+ int64_t FillSize = 1;
+ int64_t FillExpr = 0;
- int64_t FillSize;
- if (parseAbsoluteExpression(FillSize))
- return true;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.fill' directive");
+ Lex();
- if (getLexer().isNot(AsmToken::Comma))
- return TokError("unexpected token in '.fill' directive");
- Lex();
+ if (parseAbsoluteExpression(FillSize))
+ return true;
- int64_t FillExpr;
- if (parseAbsoluteExpression(FillExpr))
- return true;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.fill' directive");
+ Lex();
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '.fill' directive");
+ if (parseAbsoluteExpression(FillExpr))
+ return true;
- Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.fill' directive");
+
+ Lex();
+ }
+ }
if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8)
return TokError("invalid '.fill' size, expected 1, 2, 4, or 8");
@@ -2400,9 +2419,9 @@ bool AsmParser::ParseDirectiveFill() {
return false;
}
-/// ParseDirectiveOrg
+/// parseDirectiveOrg
/// ::= .org expression [ , expression ]
-bool AsmParser::ParseDirectiveOrg() {
+bool AsmParser::parseDirectiveOrg() {
checkForValidSection();
const MCExpr *Offset;
@@ -2435,9 +2454,9 @@ bool AsmParser::ParseDirectiveOrg() {
return false;
}
-/// ParseDirectiveAlign
+/// parseDirectiveAlign
/// ::= {.align, ...} expression [ , expression [ , expression ]]
-bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
+bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
checkForValidSection();
SMLoc AlignmentLoc = getLexer().getLoc();
@@ -2501,13 +2520,13 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
if (MaxBytesLoc.isValid()) {
if (MaxBytesToFill < 1) {
Error(MaxBytesLoc, "alignment directive can never be satisfied in this "
- "many bytes, ignoring maximum bytes expression");
+ "many bytes, ignoring maximum bytes expression");
MaxBytesToFill = 0;
}
if (MaxBytesToFill >= Alignment) {
Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and "
- "has no effect");
+ "has no effect");
MaxBytesToFill = 0;
}
}
@@ -2527,10 +2546,10 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
return false;
}
-/// ParseDirectiveFile
+/// parseDirectiveFile
/// ::= .file [number] filename
/// ::= .file number directory filename
-bool AsmParser::ParseDirectiveFile(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
// FIXME: I'm not sure what this is.
int64_t FileNumber = -1;
SMLoc FileNumberLoc = getLexer().getLoc();
@@ -2546,17 +2565,21 @@ bool AsmParser::ParseDirectiveFile(SMLoc DirectiveLoc) {
return TokError("unexpected token in '.file' directive");
// Usually the directory and filename together, otherwise just the directory.
- StringRef Path = getTok().getString();
- Path = Path.substr(1, Path.size()-2);
+ // Allow the strings to have escaped octal character sequence.
+ std::string Path = getTok().getString();
+ if (parseEscapedString(Path))
+ return true;
Lex();
StringRef Directory;
StringRef Filename;
+ std::string FilenameData;
if (getLexer().is(AsmToken::String)) {
if (FileNumber == -1)
return TokError("explicit path specified, but no file number");
- Filename = getTok().getString();
- Filename = Filename.substr(1, Filename.size()-2);
+ if (parseEscapedString(FilenameData))
+ return true;
+ Filename = FilenameData;
Directory = Path;
Lex();
} else {
@@ -2570,8 +2593,9 @@ bool AsmParser::ParseDirectiveFile(SMLoc DirectiveLoc) {
getStreamer().EmitFileDirective(Filename);
else {
if (getContext().getGenDwarfForAssembly() == true)
- Error(DirectiveLoc, "input can't have .file dwarf directives when -g is "
- "used to generate dwarf debug info for assembly code");
+ Error(DirectiveLoc,
+ "input can't have .file dwarf directives when -g is "
+ "used to generate dwarf debug info for assembly code");
if (getStreamer().EmitDwarfFileDirective(FileNumber, Directory, Filename))
Error(FileNumberLoc, "file number already allocated");
@@ -2580,15 +2604,15 @@ bool AsmParser::ParseDirectiveFile(SMLoc DirectiveLoc) {
return false;
}
-/// ParseDirectiveLine
+/// parseDirectiveLine
/// ::= .line [number]
-bool AsmParser::ParseDirectiveLine() {
+bool AsmParser::parseDirectiveLine() {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (getLexer().isNot(AsmToken::Integer))
return TokError("unexpected token in '.line' directive");
int64_t LineNumber = getTok().getIntVal();
- (void) LineNumber;
+ (void)LineNumber;
Lex();
// FIXME: Do something with the .line.
@@ -2600,14 +2624,14 @@ bool AsmParser::ParseDirectiveLine() {
return false;
}
-/// ParseDirectiveLoc
+/// parseDirectiveLoc
/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
/// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
/// The first number is a file number, must have been previously assigned with
/// a .file directive, the second number is the line number and optionally the
/// third number is a column position (zero if not specified). The remaining
/// optional items are .loc sub-directives.
-bool AsmParser::ParseDirectiveLoc() {
+bool AsmParser::parseDirectiveLoc() {
if (getLexer().isNot(AsmToken::Integer))
return TokError("unexpected token in '.loc' directive");
int64_t FileNumber = getTok().getIntVal();
@@ -2620,8 +2644,8 @@ bool AsmParser::ParseDirectiveLoc() {
int64_t LineNumber = 0;
if (getLexer().is(AsmToken::Integer)) {
LineNumber = getTok().getIntVal();
- if (LineNumber < 1)
- return TokError("line number less than one in '.loc' directive");
+ if (LineNumber < 0)
+ return TokError("line number less than zero in '.loc' directive");
Lex();
}
@@ -2701,15 +2725,15 @@ bool AsmParser::ParseDirectiveLoc() {
return false;
}
-/// ParseDirectiveStabs
+/// parseDirectiveStabs
/// ::= .stabs string, number, number, number
-bool AsmParser::ParseDirectiveStabs() {
+bool AsmParser::parseDirectiveStabs() {
return TokError("unsupported directive '.stabs'");
}
-/// ParseDirectiveCFISections
+/// parseDirectiveCFISections
/// ::= .cfi_sections section [, section]
-bool AsmParser::ParseDirectiveCFISections() {
+bool AsmParser::parseDirectiveCFISections() {
StringRef Name;
bool EH = false;
bool Debug = false;
@@ -2738,22 +2762,22 @@ bool AsmParser::ParseDirectiveCFISections() {
return false;
}
-/// ParseDirectiveCFIStartProc
+/// parseDirectiveCFIStartProc
/// ::= .cfi_startproc
-bool AsmParser::ParseDirectiveCFIStartProc() {
+bool AsmParser::parseDirectiveCFIStartProc() {
getStreamer().EmitCFIStartProc();
return false;
}
-/// ParseDirectiveCFIEndProc
+/// parseDirectiveCFIEndProc
/// ::= .cfi_endproc
-bool AsmParser::ParseDirectiveCFIEndProc() {
+bool AsmParser::parseDirectiveCFIEndProc() {
getStreamer().EmitCFIEndProc();
return false;
}
-/// ParseRegisterOrRegisterNumber - parse register name or number.
-bool AsmParser::ParseRegisterOrRegisterNumber(int64_t &Register,
+/// \brief parse register name or number.
+bool AsmParser::parseRegisterOrRegisterNumber(int64_t &Register,
SMLoc DirectiveLoc) {
unsigned RegNo;
@@ -2767,11 +2791,11 @@ bool AsmParser::ParseRegisterOrRegisterNumber(int64_t &Register,
return false;
}
-/// ParseDirectiveCFIDefCfa
+/// parseDirectiveCFIDefCfa
/// ::= .cfi_def_cfa register, offset
-bool AsmParser::ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
int64_t Register = 0;
- if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
if (getLexer().isNot(AsmToken::Comma))
@@ -2786,9 +2810,9 @@ bool AsmParser::ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
return false;
}
-/// ParseDirectiveCFIDefCfaOffset
+/// parseDirectiveCFIDefCfaOffset
/// ::= .cfi_def_cfa_offset offset
-bool AsmParser::ParseDirectiveCFIDefCfaOffset() {
+bool AsmParser::parseDirectiveCFIDefCfaOffset() {
int64_t Offset = 0;
if (parseAbsoluteExpression(Offset))
return true;
@@ -2797,11 +2821,11 @@ bool AsmParser::ParseDirectiveCFIDefCfaOffset() {
return false;
}
-/// ParseDirectiveCFIRegister
+/// parseDirectiveCFIRegister
/// ::= .cfi_register register, register
-bool AsmParser::ParseDirectiveCFIRegister(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
int64_t Register1 = 0;
- if (ParseRegisterOrRegisterNumber(Register1, DirectiveLoc))
+ if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc))
return true;
if (getLexer().isNot(AsmToken::Comma))
@@ -2809,16 +2833,23 @@ bool AsmParser::ParseDirectiveCFIRegister(SMLoc DirectiveLoc) {
Lex();
int64_t Register2 = 0;
- if (ParseRegisterOrRegisterNumber(Register2, DirectiveLoc))
+ if (parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
return true;
getStreamer().EmitCFIRegister(Register1, Register2);
return false;
}
-/// ParseDirectiveCFIAdjustCfaOffset
+/// parseDirectiveCFIWindowSave
+/// ::= .cfi_window_save
+bool AsmParser::parseDirectiveCFIWindowSave() {
+ getStreamer().EmitCFIWindowSave();
+ return false;
+}
+
+/// parseDirectiveCFIAdjustCfaOffset
/// ::= .cfi_adjust_cfa_offset adjustment
-bool AsmParser::ParseDirectiveCFIAdjustCfaOffset() {
+bool AsmParser::parseDirectiveCFIAdjustCfaOffset() {
int64_t Adjustment = 0;
if (parseAbsoluteExpression(Adjustment))
return true;
@@ -2827,24 +2858,24 @@ bool AsmParser::ParseDirectiveCFIAdjustCfaOffset() {
return false;
}
-/// ParseDirectiveCFIDefCfaRegister
+/// parseDirectiveCFIDefCfaRegister
/// ::= .cfi_def_cfa_register register
-bool AsmParser::ParseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
int64_t Register = 0;
- if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
getStreamer().EmitCFIDefCfaRegister(Register);
return false;
}
-/// ParseDirectiveCFIOffset
+/// parseDirectiveCFIOffset
/// ::= .cfi_offset register, offset
-bool AsmParser::ParseDirectiveCFIOffset(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
int64_t Register = 0;
int64_t Offset = 0;
- if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
if (getLexer().isNot(AsmToken::Comma))
@@ -2858,12 +2889,12 @@ bool AsmParser::ParseDirectiveCFIOffset(SMLoc DirectiveLoc) {
return false;
}
-/// ParseDirectiveCFIRelOffset
+/// parseDirectiveCFIRelOffset
/// ::= .cfi_rel_offset register, offset
-bool AsmParser::ParseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
int64_t Register = 0;
- if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
if (getLexer().isNot(AsmToken::Comma))
@@ -2900,11 +2931,11 @@ static bool isValidEncoding(int64_t Encoding) {
return true;
}
-/// ParseDirectiveCFIPersonalityOrLsda
+/// parseDirectiveCFIPersonalityOrLsda
/// IsPersonality true for cfi_personality, false for cfi_lsda
/// ::= .cfi_personality encoding, [symbol_name]
/// ::= .cfi_lsda encoding, [symbol_name]
-bool AsmParser::ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
+bool AsmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
int64_t Encoding = 0;
if (parseAbsoluteExpression(Encoding))
return true;
@@ -2931,46 +2962,46 @@ bool AsmParser::ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
return false;
}
-/// ParseDirectiveCFIRememberState
+/// parseDirectiveCFIRememberState
/// ::= .cfi_remember_state
-bool AsmParser::ParseDirectiveCFIRememberState() {
+bool AsmParser::parseDirectiveCFIRememberState() {
getStreamer().EmitCFIRememberState();
return false;
}
-/// ParseDirectiveCFIRestoreState
+/// parseDirectiveCFIRestoreState
/// ::= .cfi_remember_state
-bool AsmParser::ParseDirectiveCFIRestoreState() {
+bool AsmParser::parseDirectiveCFIRestoreState() {
getStreamer().EmitCFIRestoreState();
return false;
}
-/// ParseDirectiveCFISameValue
+/// parseDirectiveCFISameValue
/// ::= .cfi_same_value register
-bool AsmParser::ParseDirectiveCFISameValue(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
int64_t Register = 0;
- if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
getStreamer().EmitCFISameValue(Register);
return false;
}
-/// ParseDirectiveCFIRestore
+/// parseDirectiveCFIRestore
/// ::= .cfi_restore register
-bool AsmParser::ParseDirectiveCFIRestore(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
int64_t Register = 0;
- if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
getStreamer().EmitCFIRestore(Register);
return false;
}
-/// ParseDirectiveCFIEscape
+/// parseDirectiveCFIEscape
/// ::= .cfi_escape expression[,...]
-bool AsmParser::ParseDirectiveCFIEscape() {
+bool AsmParser::parseDirectiveCFIEscape() {
std::string Values;
int64_t CurrValue;
if (parseAbsoluteExpression(CurrValue))
@@ -2991,9 +3022,9 @@ bool AsmParser::ParseDirectiveCFIEscape() {
return false;
}
-/// ParseDirectiveCFISignalFrame
+/// parseDirectiveCFISignalFrame
/// ::= .cfi_signal_frame
-bool AsmParser::ParseDirectiveCFISignalFrame() {
+bool AsmParser::parseDirectiveCFISignalFrame() {
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(getLexer().getLoc(),
"unexpected token in '.cfi_signal_frame'");
@@ -3002,40 +3033,40 @@ bool AsmParser::ParseDirectiveCFISignalFrame() {
return false;
}
-/// ParseDirectiveCFIUndefined
+/// parseDirectiveCFIUndefined
/// ::= .cfi_undefined register
-bool AsmParser::ParseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
int64_t Register = 0;
- if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
getStreamer().EmitCFIUndefined(Register);
return false;
}
-/// ParseDirectiveMacrosOnOff
+/// parseDirectiveMacrosOnOff
/// ::= .macros_on
/// ::= .macros_off
-bool AsmParser::ParseDirectiveMacrosOnOff(StringRef Directive) {
+bool AsmParser::parseDirectiveMacrosOnOff(StringRef Directive) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(getLexer().getLoc(),
"unexpected token in '" + Directive + "' directive");
- SetMacrosEnabled(Directive == ".macros_on");
+ setMacrosEnabled(Directive == ".macros_on");
return false;
}
-/// ParseDirectiveMacro
+/// parseDirectiveMacro
/// ::= .macro name [parameters]
-bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
StringRef Name;
if (parseIdentifier(Name))
return TokError("expected identifier in '.macro' directive");
MCAsmMacroParameters Parameters;
// Argument delimiter is initially unknown. It will be set by
- // ParseMacroArgument()
+ // parseMacroArgument()
AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
@@ -3045,7 +3076,7 @@ bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) {
if (getLexer().is(AsmToken::Equal)) {
Lex();
- if (ParseMacroArgument(Parameter.second, ArgumentDelimiter))
+ if (parseMacroArgument(Parameter.second, ArgumentDelimiter))
return true;
}
@@ -3085,19 +3116,19 @@ bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) {
eatToEndOfStatement();
}
- if (LookupMacro(Name)) {
+ if (lookupMacro(Name)) {
return Error(DirectiveLoc, "macro '" + Name + "' is already defined");
}
const char *BodyStart = StartToken.getLoc().getPointer();
const char *BodyEnd = EndToken.getLoc().getPointer();
StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
- CheckForBadMacro(DirectiveLoc, Name, Body, Parameters);
- DefineMacro(Name, MCAsmMacro(Name, Body, Parameters));
+ checkForBadMacro(DirectiveLoc, Name, Body, Parameters);
+ defineMacro(Name, MCAsmMacro(Name, Body, Parameters));
return false;
}
-/// CheckForBadMacro
+/// checkForBadMacro
///
/// With the support added for named parameters there may be code out there that
/// is transitioning from positional parameters. In versions of gas that did
@@ -3111,7 +3142,7 @@ bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) {
/// intended or change the macro to use the named parameters. It is possible
/// this warning will trigger when the none of the named parameters are used
/// and the strings like $1 are infact to simply to be passed trough unchanged.
-void AsmParser::CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name,
+void AsmParser::checkForBadMacro(SMLoc DirectiveLoc, StringRef Name,
StringRef Body,
MCAsmMacroParameters Parameters) {
// If this macro is not defined with named parameters the warning we are
@@ -3149,21 +3180,21 @@ void AsmParser::CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name,
break;
if (Body[Pos] == '$') {
- switch (Body[Pos+1]) {
- // $$ => $
+ switch (Body[Pos + 1]) {
+ // $$ => $
case '$':
break;
- // $n => number of arguments
+ // $n => number of arguments
case 'n':
PositionalParametersFound = true;
break;
- // $[0-9] => argument
+ // $[0-9] => argument
default: {
PositionalParametersFound = true;
break;
- }
+ }
}
Pos += 2;
} else {
@@ -3171,19 +3202,19 @@ void AsmParser::CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name,
while (isIdentifierChar(Body[I]) && I + 1 != End)
++I;
- const char *Begin = Body.data() + Pos +1;
- StringRef Argument(Begin, I - (Pos +1));
+ const char *Begin = Body.data() + Pos + 1;
+ StringRef Argument(Begin, I - (Pos + 1));
unsigned Index = 0;
for (; Index < NParameters; ++Index)
if (Parameters[Index].first == Argument)
break;
if (Index == NParameters) {
- if (Body[Pos+1] == '(' && Body[Pos+2] == ')')
- Pos += 3;
- else {
- Pos = I;
- }
+ if (Body[Pos + 1] == '(' && Body[Pos + 2] == ')')
+ Pos += 3;
+ else {
+ Pos = I;
+ }
} else {
NamedParametersFound = true;
Pos += 1 + Argument.size();
@@ -3199,29 +3230,29 @@ void AsmParser::CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name,
"found in body which will have no effect");
}
-/// ParseDirectiveEndMacro
+/// parseDirectiveEndMacro
/// ::= .endm
/// ::= .endmacro
-bool AsmParser::ParseDirectiveEndMacro(StringRef Directive) {
+bool AsmParser::parseDirectiveEndMacro(StringRef Directive) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '" + Directive + "' directive");
// If we are inside a macro instantiation, terminate the current
// instantiation.
- if (InsideMacroInstantiation()) {
- HandleMacroExit();
+ if (isInsideMacroInstantiation()) {
+ handleMacroExit();
return false;
}
// Otherwise, this .endmacro is a stray entry in the file; well formed
// .endmacro directives are handled during the macro definition parsing.
return TokError("unexpected '" + Directive + "' in file, "
- "no current macro definition");
+ "no current macro definition");
}
-/// ParseDirectivePurgeMacro
+/// parseDirectivePurgeMacro
/// ::= .purgem
-bool AsmParser::ParseDirectivePurgeMacro(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
StringRef Name;
if (parseIdentifier(Name))
return TokError("expected identifier in '.purgem' directive");
@@ -3229,16 +3260,16 @@ bool AsmParser::ParseDirectivePurgeMacro(SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.purgem' directive");
- if (!LookupMacro(Name))
+ if (!lookupMacro(Name))
return Error(DirectiveLoc, "macro '" + Name + "' is not defined");
- UndefineMacro(Name);
+ undefineMacro(Name);
return false;
}
-/// ParseDirectiveBundleAlignMode
+/// parseDirectiveBundleAlignMode
/// ::= {.bundle_align_mode} expression
-bool AsmParser::ParseDirectiveBundleAlignMode() {
+bool AsmParser::parseDirectiveBundleAlignMode() {
checkForValidSection();
// Expect a single argument: an expression that evaluates to a constant
@@ -3262,9 +3293,9 @@ bool AsmParser::ParseDirectiveBundleAlignMode() {
return false;
}
-/// ParseDirectiveBundleLock
+/// parseDirectiveBundleLock
/// ::= {.bundle_lock} [align_to_end]
-bool AsmParser::ParseDirectiveBundleLock() {
+bool AsmParser::parseDirectiveBundleLock() {
checkForValidSection();
bool AlignToEnd = false;
@@ -3272,7 +3303,7 @@ bool AsmParser::ParseDirectiveBundleLock() {
StringRef Option;
SMLoc Loc = getTok().getLoc();
const char *kInvalidOptionError =
- "invalid option for '.bundle_lock' directive";
+ "invalid option for '.bundle_lock' directive";
if (parseIdentifier(Option))
return Error(Loc, kInvalidOptionError);
@@ -3291,9 +3322,9 @@ bool AsmParser::ParseDirectiveBundleLock() {
return false;
}
-/// ParseDirectiveBundleLock
+/// parseDirectiveBundleLock
/// ::= {.bundle_lock}
-bool AsmParser::ParseDirectiveBundleUnlock() {
+bool AsmParser::parseDirectiveBundleUnlock() {
checkForValidSection();
if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -3304,9 +3335,9 @@ bool AsmParser::ParseDirectiveBundleUnlock() {
return false;
}
-/// ParseDirectiveSpace
+/// parseDirectiveSpace
/// ::= (.skip | .space) expression [ , expression ]
-bool AsmParser::ParseDirectiveSpace(StringRef IDVal) {
+bool AsmParser::parseDirectiveSpace(StringRef IDVal) {
checkForValidSection();
int64_t NumBytes;
@@ -3329,8 +3360,8 @@ bool AsmParser::ParseDirectiveSpace(StringRef IDVal) {
Lex();
if (NumBytes <= 0)
- return TokError("invalid number of bytes in '" +
- Twine(IDVal) + "' directive");
+ return TokError("invalid number of bytes in '" + Twine(IDVal) +
+ "' directive");
// FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
getStreamer().EmitFill(NumBytes, FillExpr);
@@ -3338,9 +3369,9 @@ bool AsmParser::ParseDirectiveSpace(StringRef IDVal) {
return false;
}
-/// ParseDirectiveLEB128
+/// parseDirectiveLEB128
/// ::= (.sleb128 | .uleb128) expression
-bool AsmParser::ParseDirectiveLEB128(bool Signed) {
+bool AsmParser::parseDirectiveLEB128(bool Signed) {
checkForValidSection();
const MCExpr *Value;
@@ -3358,9 +3389,9 @@ bool AsmParser::ParseDirectiveLEB128(bool Signed) {
return false;
}
-/// ParseDirectiveSymbolAttribute
+/// parseDirectiveSymbolAttribute
/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
-bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
+bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
StringRef Name;
@@ -3375,7 +3406,8 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
if (Sym->isTemporary())
return Error(Loc, "non-local symbol required in directive");
- getStreamer().EmitSymbolAttribute(Sym, Attr);
+ if (!getStreamer().EmitSymbolAttribute(Sym, Attr))
+ return Error(Loc, "unable to emit symbol attribute");
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -3390,9 +3422,9 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
return false;
}
-/// ParseDirectiveComm
+/// parseDirectiveComm
/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
-bool AsmParser::ParseDirectiveComm(bool IsLocal) {
+bool AsmParser::parseDirectiveComm(bool IsLocal) {
checkForValidSection();
SMLoc IDLoc = getLexer().getLoc();
@@ -3442,14 +3474,14 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
// but a size of .lcomm creates a bss symbol of size zero.
if (Size < 0)
return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
- "be less than zero");
+ "be less than zero");
// NOTE: The alignment in the directive is a power of 2 value, the assembler
// may internally end up wanting an alignment in bytes.
// FIXME: Diagnose overflow.
if (Pow2Alignment < 0)
return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
- "alignment, can't be less than zero");
+ "alignment, can't be less than zero");
if (!Sym->isUndefined())
return Error(IDLoc, "invalid symbol redefinition");
@@ -3464,9 +3496,9 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
return false;
}
-/// ParseDirectiveAbort
+/// parseDirectiveAbort
/// ::= .abort [... message ...]
-bool AsmParser::ParseDirectiveAbort() {
+bool AsmParser::parseDirectiveAbort() {
// FIXME: Use loc from directive.
SMLoc Loc = getLexer().getLoc();
@@ -3485,25 +3517,25 @@ bool AsmParser::ParseDirectiveAbort() {
return false;
}
-/// ParseDirectiveInclude
+/// parseDirectiveInclude
/// ::= .include "filename"
-bool AsmParser::ParseDirectiveInclude() {
+bool AsmParser::parseDirectiveInclude() {
if (getLexer().isNot(AsmToken::String))
return TokError("expected string in '.include' directive");
- std::string Filename = getTok().getString();
+ // Allow the strings to have escaped octal character sequence.
+ std::string Filename;
+ if (parseEscapedString(Filename))
+ return true;
SMLoc IncludeLoc = getLexer().getLoc();
Lex();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.include' directive");
- // Strip the quotes.
- Filename = Filename.substr(1, Filename.size()-2);
-
// Attempt to switch the lexer to the included file before consuming the end
// of statement to avoid losing it when we switch.
- if (EnterIncludeFile(Filename)) {
+ if (enterIncludeFile(Filename)) {
Error(IncludeLoc, "Could not find include file '" + Filename + "'");
return true;
}
@@ -3511,24 +3543,24 @@ bool AsmParser::ParseDirectiveInclude() {
return false;
}
-/// ParseDirectiveIncbin
+/// parseDirectiveIncbin
/// ::= .incbin "filename"
-bool AsmParser::ParseDirectiveIncbin() {
+bool AsmParser::parseDirectiveIncbin() {
if (getLexer().isNot(AsmToken::String))
return TokError("expected string in '.incbin' directive");
- std::string Filename = getTok().getString();
+ // Allow the strings to have escaped octal character sequence.
+ std::string Filename;
+ if (parseEscapedString(Filename))
+ return true;
SMLoc IncbinLoc = getLexer().getLoc();
Lex();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.incbin' directive");
- // Strip the quotes.
- Filename = Filename.substr(1, Filename.size()-2);
-
// Attempt to process the included file.
- if (ProcessIncbinFile(Filename)) {
+ if (processIncbinFile(Filename)) {
Error(IncbinLoc, "Could not find incbin file '" + Filename + "'");
return true;
}
@@ -3536,9 +3568,9 @@ bool AsmParser::ParseDirectiveIncbin() {
return false;
}
-/// ParseDirectiveIf
+/// parseDirectiveIf
/// ::= .if expression
-bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) {
TheCondStack.push_back(TheCondState);
TheCondState.TheCond = AsmCond::IfCond;
if (TheCondState.Ignore) {
@@ -3560,9 +3592,9 @@ bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
return false;
}
-/// ParseDirectiveIfb
+/// parseDirectiveIfb
/// ::= .ifb string
-bool AsmParser::ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
+bool AsmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
TheCondStack.push_back(TheCondState);
TheCondState.TheCond = AsmCond::IfCond;
@@ -3583,16 +3615,16 @@ bool AsmParser::ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
return false;
}
-/// ParseDirectiveIfc
+/// parseDirectiveIfc
/// ::= .ifc string1, string2
-bool AsmParser::ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) {
+bool AsmParser::parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) {
TheCondStack.push_back(TheCondState);
TheCondState.TheCond = AsmCond::IfCond;
if (TheCondState.Ignore) {
eatToEndOfStatement();
} else {
- StringRef Str1 = ParseStringToComma();
+ StringRef Str1 = parseStringToComma();
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.ifc' directive");
@@ -3613,9 +3645,9 @@ bool AsmParser::ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) {
return false;
}
-/// ParseDirectiveIfdef
+/// parseDirectiveIfdef
/// ::= .ifdef symbol
-bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
+bool AsmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
StringRef Name;
TheCondStack.push_back(TheCondState);
TheCondState.TheCond = AsmCond::IfCond;
@@ -3640,9 +3672,9 @@ bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
return false;
}
-/// ParseDirectiveElseIf
+/// parseDirectiveElseIf
/// ::= .elseif expression
-bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveElseIf(SMLoc DirectiveLoc) {
if (TheCondState.TheCond != AsmCond::IfCond &&
TheCondState.TheCond != AsmCond::ElseIfCond)
Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
@@ -3671,9 +3703,9 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
return false;
}
-/// ParseDirectiveElse
+/// parseDirectiveElse
/// ::= .else
-bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.else' directive");
@@ -3695,16 +3727,15 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
return false;
}
-/// ParseDirectiveEndIf
+/// parseDirectiveEndIf
/// ::= .endif
-bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.endif' directive");
Lex();
- if ((TheCondState.TheCond == AsmCond::NoCond) ||
- TheCondStack.empty())
+ if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
".else");
if (!TheCondStack.empty()) {
@@ -3748,7 +3779,6 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".extern"] = DK_EXTERN;
DirectiveKindMap[".globl"] = DK_GLOBL;
DirectiveKindMap[".global"] = DK_GLOBAL;
- DirectiveKindMap[".indirect_symbol"] = DK_INDIRECT_SYMBOL;
DirectiveKindMap[".lazy_reference"] = DK_LAZY_REFERENCE;
DirectiveKindMap[".no_dead_strip"] = DK_NO_DEAD_STRIP;
DirectiveKindMap[".symbol_resolver"] = DK_SYMBOL_RESOLVER;
@@ -3810,6 +3840,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
+ DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
DirectiveKindMap[".macros_on"] = DK_MACROS_ON;
DirectiveKindMap[".macros_off"] = DK_MACROS_OFF;
DirectiveKindMap[".macro"] = DK_MACRO;
@@ -3818,8 +3849,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".purgem"] = DK_PURGEM;
}
-
-MCAsmMacro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) {
+MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
AsmToken EndToken, StartToken = getTok();
unsigned NestLevel = 0;
@@ -3836,8 +3866,7 @@ MCAsmMacro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) {
}
// Otherwise, check whether we have reached the .endr.
- if (Lexer.is(AsmToken::Identifier) &&
- getTok().getIdentifier() == ".endr") {
+ if (Lexer.is(AsmToken::Identifier) && getTok().getIdentifier() == ".endr") {
if (NestLevel == 0) {
EndToken = getTok();
Lex();
@@ -3865,19 +3894,17 @@ MCAsmMacro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) {
return &MacroLikeBodies.back();
}
-void AsmParser::InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
+void AsmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
raw_svector_ostream &OS) {
OS << ".endr\n";
MemoryBuffer *Instantiation =
- MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
+ MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
// Create the macro instantiation object and add to the current macro
// instantiation stack.
- MacroInstantiation *MI = new MacroInstantiation(M, DirectiveLoc,
- CurBuffer,
- getTok().getLoc(),
- Instantiation);
+ MacroInstantiation *MI = new MacroInstantiation(
+ M, DirectiveLoc, CurBuffer, getTok().getLoc(), Instantiation);
ActiveMacros.push_back(MI);
// Jump to the macro instantiation and prime the lexer.
@@ -3886,7 +3913,7 @@ void AsmParser::InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
Lex();
}
-bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveRept(SMLoc DirectiveLoc) {
int64_t Count;
if (parseAbsoluteExpression(Count))
return TokError("unexpected token in '.rept' directive");
@@ -3901,7 +3928,7 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) {
Lex();
// Lex the rept definition.
- MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc);
+ MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
if (!M)
return true;
@@ -3915,14 +3942,14 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) {
if (expandMacro(OS, M->Body, Parameters, A, getTok().getLoc()))
return true;
}
- InstantiateMacroLikeBody(M, DirectiveLoc, OS);
+ instantiateMacroLikeBody(M, DirectiveLoc, OS);
return false;
}
-/// ParseDirectiveIrp
+/// parseDirectiveIrp
/// ::= .irp symbol,values
-bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveIrp(SMLoc DirectiveLoc) {
MCAsmMacroParameters Parameters;
MCAsmMacroParameter Parameter;
@@ -3937,14 +3964,14 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
Lex();
MCAsmMacroArguments A;
- if (ParseMacroArguments(0, A))
+ if (parseMacroArguments(0, A))
return true;
// Eat the end of statement.
Lex();
// Lex the irp definition.
- MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc);
+ MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
if (!M)
return true;
@@ -3961,14 +3988,14 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
return true;
}
- InstantiateMacroLikeBody(M, DirectiveLoc, OS);
+ instantiateMacroLikeBody(M, DirectiveLoc, OS);
return false;
}
-/// ParseDirectiveIrpc
+/// parseDirectiveIrpc
/// ::= .irpc symbol,values
-bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveIrpc(SMLoc DirectiveLoc) {
MCAsmMacroParameters Parameters;
MCAsmMacroParameter Parameter;
@@ -3983,7 +4010,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
Lex();
MCAsmMacroArguments A;
- if (ParseMacroArguments(0, A))
+ if (parseMacroArguments(0, A))
return true;
if (A.size() != 1 || A.front().size() != 1)
@@ -3993,7 +4020,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
Lex();
// Lex the irpc definition.
- MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc);
+ MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
if (!M)
return true;
@@ -4006,7 +4033,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
std::size_t I, End = Values.size();
for (I = 0; I < End; ++I) {
MCAsmMacroArgument Arg;
- Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I+1)));
+ Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I + 1)));
MCAsmMacroArguments Args;
Args.push_back(Arg);
@@ -4015,24 +4042,24 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
return true;
}
- InstantiateMacroLikeBody(M, DirectiveLoc, OS);
+ instantiateMacroLikeBody(M, DirectiveLoc, OS);
return false;
}
-bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveEndr(SMLoc DirectiveLoc) {
if (ActiveMacros.empty())
return TokError("unmatched '.endr' directive");
// The only .repl that should get here are the ones created by
- // InstantiateMacroLikeBody.
+ // instantiateMacroLikeBody.
assert(getLexer().is(AsmToken::EndOfStatement));
- HandleMacroExit();
+ handleMacroExit();
return false;
}
-bool AsmParser::ParseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
+bool AsmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
size_t Len) {
const MCExpr *Value;
SMLoc ExprLoc = getLexer().getLoc();
@@ -4049,7 +4076,7 @@ bool AsmParser::ParseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
return false;
}
-bool AsmParser::ParseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
+bool AsmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
const MCExpr *Value;
SMLoc ExprLoc = getLexer().getLoc();
if (parseExpression(Value))
@@ -4061,16 +4088,15 @@ bool AsmParser::ParseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
if (!isPowerOf2_64(IntValue))
return Error(ExprLoc, "literal value not a power of two greater then zero");
- Info.AsmRewrites->push_back(AsmRewrite(AOK_Align, IDLoc, 5,
- Log2_64(IntValue)));
+ Info.AsmRewrites->push_back(
+ AsmRewrite(AOK_Align, IDLoc, 5, Log2_64(IntValue)));
return false;
}
// We are comparing pointers, but the pointers are relative to a single string.
// Thus, this should always be deterministic.
-static int RewritesSort(const void *A, const void *B) {
- const AsmRewrite *AsmRewriteA = static_cast<const AsmRewrite *>(A);
- const AsmRewrite *AsmRewriteB = static_cast<const AsmRewrite *>(B);
+static int rewritesSort(const AsmRewrite *AsmRewriteA,
+ const AsmRewrite *AsmRewriteB) {
if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
return -1;
if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
@@ -4080,25 +4106,22 @@ static int RewritesSort(const void *A, const void *B) {
// rewrite to the same location. Make sure the SizeDirective rewrite is
// performed first, then the Imm/ImmPrefix and finally the Input/Output. This
// ensures the sort algorithm is stable.
- if (AsmRewritePrecedence [AsmRewriteA->Kind] >
- AsmRewritePrecedence [AsmRewriteB->Kind])
+ if (AsmRewritePrecedence[AsmRewriteA->Kind] >
+ AsmRewritePrecedence[AsmRewriteB->Kind])
return -1;
- if (AsmRewritePrecedence [AsmRewriteA->Kind] <
- AsmRewritePrecedence [AsmRewriteB->Kind])
+ if (AsmRewritePrecedence[AsmRewriteA->Kind] <
+ AsmRewritePrecedence[AsmRewriteB->Kind])
return 1;
- llvm_unreachable ("Unstable rewrite sort.");
+ llvm_unreachable("Unstable rewrite sort.");
}
-bool
-AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
- unsigned &NumOutputs, unsigned &NumInputs,
- SmallVectorImpl<std::pair<void *, bool> > &OpDecls,
- SmallVectorImpl<std::string> &Constraints,
- SmallVectorImpl<std::string> &Clobbers,
- const MCInstrInfo *MII,
- const MCInstPrinter *IP,
- MCAsmParserSemaCallback &SI) {
+bool AsmParser::parseMSInlineAsm(
+ void *AsmLoc, std::string &AsmString, unsigned &NumOutputs,
+ unsigned &NumInputs, SmallVectorImpl<std::pair<void *, bool> > &OpDecls,
+ SmallVectorImpl<std::string> &Constraints,
+ SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
+ const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
SmallVector<void *, 4> InputDecls;
SmallVector<void *, 4> OutputDecls;
SmallVector<bool, 4> InputDeclsAddressOf;
@@ -4117,7 +4140,7 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
unsigned OutputIdx = 0;
while (getLexer().isNot(AsmToken::Eof)) {
ParseStatementInfo Info(&AsmStrRewrites);
- if (ParseStatement(Info))
+ if (parseStatement(Info))
return true;
if (Info.ParseError)
@@ -4205,7 +4228,7 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
raw_string_ostream OS(AsmStringIR);
const char *AsmStart = SrcMgr.getMemoryBuffer(0)->getBufferStart();
const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
- array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), RewritesSort);
+ array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(),
E = AsmStrRewrites.end();
I != E; ++I) {
@@ -4230,7 +4253,8 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
unsigned AdditionalSkip = 0;
// Rewrite expressions in $N notation.
switch (Kind) {
- default: break;
+ default:
+ break;
case AOK_Imm:
OS << "$$" << (*I).Val;
break;
@@ -4285,8 +4309,7 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
}
/// \brief Create an MCAsmParser instance.
-MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM,
- MCContext &C, MCStreamer &Out,
- const MCAsmInfo &MAI) {
+MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM, MCContext &C,
+ MCStreamer &Out, const MCAsmInfo &MAI) {
return new AsmParser(SM, C, Out, MAI);
}
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index df1794c..d8343a3 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -35,6 +35,10 @@ class COFFAsmParser : public MCAsmParserExtension {
unsigned Characteristics,
SectionKind Kind);
+ bool ParseSectionSwitch(StringRef Section, unsigned Characteristics,
+ SectionKind Kind, StringRef COMDATSymName,
+ COFF::COMDATType Type, const MCSectionCOFF *Assoc);
+
bool ParseSectionName(StringRef &SectionName);
bool ParseSectionFlags(StringRef FlagsString, unsigned* Flags);
@@ -111,6 +115,8 @@ class COFFAsmParser : public MCAsmParserExtension {
bool ParseDirectiveType(StringRef, SMLoc);
bool ParseDirectiveEndef(StringRef, SMLoc);
bool ParseDirectiveSecRel32(StringRef, SMLoc);
+ bool parseCOMDATTypeAndAssoc(COFF::COMDATType &Type,
+ const MCSectionCOFF *&Assoc);
bool ParseDirectiveLinkOnce(StringRef, SMLoc);
// Win64 EH directives.
@@ -284,12 +290,22 @@ bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
unsigned Characteristics,
SectionKind Kind) {
+ return ParseSectionSwitch(Section, Characteristics, Kind, "",
+ COFF::IMAGE_COMDAT_SELECT_ANY, 0);
+}
+
+bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
+ unsigned Characteristics,
+ SectionKind Kind,
+ StringRef COMDATSymName,
+ COFF::COMDATType Type,
+ const MCSectionCOFF *Assoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in section switching directive");
Lex();
getStreamer().SwitchSection(getContext().getCOFFSection(
- Section, Characteristics, Kind));
+ Section, Characteristics, Kind, COMDATSymName, Type, Assoc));
return false;
}
@@ -303,7 +319,7 @@ bool COFFAsmParser::ParseSectionName(StringRef &SectionName) {
return false;
}
-// .section name [, "flags"]
+// .section name [, "flags"] [, identifier [ identifier ], identifier]
//
// Supported flags:
// a: Ignored.
@@ -340,11 +356,30 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
return true;
}
+ COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
+ const MCSectionCOFF *Assoc = 0;
+ StringRef COMDATSymName;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+
+ Flags |= COFF::IMAGE_SCN_LNK_COMDAT;
+
+ if (parseCOMDATTypeAndAssoc(Type, Assoc))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected comma in directive");
+ Lex();
+
+ if (getParser().parseIdentifier(COMDATSymName))
+ return TokError("expected identifier in directive");
+ }
+
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in directive");
SectionKind Kind = computeSectionKind(Flags);
- ParseSectionSwitch(SectionName, Flags, Kind);
+ ParseSectionSwitch(SectionName, Flags, Kind, COMDATSymName, Type, Assoc);
return false;
}
@@ -409,37 +444,29 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
return false;
}
-/// ParseDirectiveLinkOnce
-/// ::= .linkonce [ identifier [ identifier ] ]
-bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) {
- COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
-
- if (getLexer().is(AsmToken::Identifier)) {
- StringRef TypeId = getTok().getIdentifier();
+/// ::= [ identifier [ identifier ] ]
+bool COFFAsmParser::parseCOMDATTypeAndAssoc(COFF::COMDATType &Type,
+ const MCSectionCOFF *&Assoc) {
+ StringRef TypeId = getTok().getIdentifier();
- Type = StringSwitch<COFF::COMDATType>(TypeId)
- .Case("one_only", COFF::IMAGE_COMDAT_SELECT_NODUPLICATES)
- .Case("discard", COFF::IMAGE_COMDAT_SELECT_ANY)
- .Case("same_size", COFF::IMAGE_COMDAT_SELECT_SAME_SIZE)
- .Case("same_contents", COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH)
- .Case("associative", COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
- .Case("largest", COFF::IMAGE_COMDAT_SELECT_LARGEST)
- .Case("newest", COFF::IMAGE_COMDAT_SELECT_NEWEST)
- .Default((COFF::COMDATType)0);
+ Type = StringSwitch<COFF::COMDATType>(TypeId)
+ .Case("one_only", COFF::IMAGE_COMDAT_SELECT_NODUPLICATES)
+ .Case("discard", COFF::IMAGE_COMDAT_SELECT_ANY)
+ .Case("same_size", COFF::IMAGE_COMDAT_SELECT_SAME_SIZE)
+ .Case("same_contents", COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH)
+ .Case("associative", COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
+ .Case("largest", COFF::IMAGE_COMDAT_SELECT_LARGEST)
+ .Case("newest", COFF::IMAGE_COMDAT_SELECT_NEWEST)
+ .Default((COFF::COMDATType)0);
- if (Type == 0)
- return TokError(Twine("unrecognized COMDAT type '" + TypeId + "'"));
+ if (Type == 0)
+ return TokError(Twine("unrecognized COMDAT type '" + TypeId + "'"));
- Lex();
- }
-
- const MCSectionCOFF *Current = static_cast<const MCSectionCOFF*>(
- getStreamer().getCurrentSection().first);
+ Lex();
- const MCSectionCOFF *Assoc = 0;
if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
- StringRef AssocName;
SMLoc Loc = getTok().getLoc();
+ StringRef AssocName;
if (ParseSectionName(AssocName))
return TokError("expected associated section name");
@@ -447,14 +474,33 @@ bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) {
getContext().getCOFFSection(AssocName));
if (!Assoc)
return Error(Loc, "cannot associate unknown section '" + AssocName + "'");
- if (Assoc == Current)
- return Error(Loc, "cannot associate a section with itself");
if (!(Assoc->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT))
return Error(Loc, "associated section must be a COMDAT section");
if (Assoc->getSelection() == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
return Error(Loc, "associated section cannot be itself associative");
}
+ return false;
+}
+
+/// ParseDirectiveLinkOnce
+/// ::= .linkonce [ identifier [ identifier ] ]
+bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) {
+ COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
+ const MCSectionCOFF *Assoc = 0;
+ if (getLexer().is(AsmToken::Identifier))
+ if (parseCOMDATTypeAndAssoc(Type, Assoc))
+ return true;
+
+ const MCSectionCOFF *Current = static_cast<const MCSectionCOFF*>(
+ getStreamer().getCurrentSection().first);
+
+
+ if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+ if (Assoc == Current)
+ return Error(Loc, "cannot associate a section with itself");
+ }
+
if (Current->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT)
return Error(Loc, Twine("section '") + Current->getSectionName() +
"' is already linkonce");
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 0aeeaf6..4c9bafa 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -45,6 +45,8 @@ public:
this->MCAsmParserExtension::Initialize(Parser);
addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc");
+ addDirectiveHandler<&DarwinAsmParser::ParseDirectiveIndirectSymbol>(
+ ".indirect_symbol");
addDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym");
addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>(
".subsections_via_symbols");
@@ -69,6 +71,7 @@ public:
".end_data_region");
// Special section directives.
+ addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveBss>(".bss");
addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const");
addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(
".const_data");
@@ -163,6 +166,7 @@ public:
}
bool ParseDirectiveDesc(StringRef, SMLoc);
+ bool ParseDirectiveIndirectSymbol(StringRef, SMLoc);
bool ParseDirectiveDumpOrLoad(StringRef, SMLoc);
bool ParseDirectiveLsym(StringRef, SMLoc);
bool ParseDirectiveLinkerOption(StringRef, SMLoc);
@@ -179,6 +183,10 @@ public:
bool ParseDirectiveDataRegionEnd(StringRef, SMLoc);
// Named Section Directive
+ bool ParseSectionDirectiveBss(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__bss");
+ }
+
bool ParseSectionDirectiveConst(StringRef, SMLoc) {
return ParseSectionSwitch("__TEXT", "__const");
}
@@ -415,6 +423,39 @@ bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) {
return false;
}
+/// ParseDirectiveIndirectSymbol
+/// ::= .indirect_symbol identifier
+bool DarwinAsmParser::ParseDirectiveIndirectSymbol(StringRef, SMLoc Loc) {
+ const MCSectionMachO *Current = static_cast<const MCSectionMachO*>(
+ getStreamer().getCurrentSection().first);
+ unsigned SectionType = Current->getType();
+ if (SectionType != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS &&
+ SectionType != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+ SectionType != MCSectionMachO::S_SYMBOL_STUBS)
+ return Error(Loc, "indirect symbol not in a symbol pointer or stub "
+ "section");
+
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in .indirect_symbol directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ // Assembler local symbols don't make any sense here. Complain loudly.
+ if (Sym->isTemporary())
+ return TokError("non-local symbol required in directive");
+
+ if (!getStreamer().EmitSymbolAttribute(Sym, MCSA_IndirectSymbol))
+ return TokError("unable to emit indirect symbol attribute for: " + Name);
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.indirect_symbol' directive");
+
+ Lex();
+
+ return false;
+}
+
/// ParseDirectiveDumpOrLoad
/// ::= ( .dump | .load ) "filename"
bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive,
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 3134fc3..8807975 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ELF.h"
using namespace llvm;
@@ -30,14 +31,11 @@ class ELFAsmParser : public MCAsmParserExtension {
getParser().addDirectiveHandler(Directive, Handler);
}
- bool ParseSectionSwitch(StringRef Section, unsigned Type,
- unsigned Flags, SectionKind Kind);
- bool SeenIdent;
+ bool ParseSectionSwitch(StringRef Section, unsigned Type, unsigned Flags,
+ SectionKind Kind);
public:
- ELFAsmParser() : SeenIdent(false) {
- BracketExpressionsSupported = true;
- }
+ ELFAsmParser() { BracketExpressionsSupported = true; }
virtual void Initialize(MCAsmParser &Parser) {
// Call the base implementation.
@@ -241,7 +239,6 @@ bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
}
for (;;) {
- StringRef Tmp;
unsigned CurSize;
SMLoc PrevLoc = getLexer().getLoc();
@@ -279,14 +276,17 @@ static SectionKind computeSectionKind(unsigned Flags) {
return SectionKind::getDataRel();
}
-static int parseSectionFlags(StringRef flagsStr) {
- int flags = 0;
+static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) {
+ unsigned flags = 0;
for (unsigned i = 0; i < flagsStr.size(); i++) {
switch (flagsStr[i]) {
case 'a':
flags |= ELF::SHF_ALLOC;
break;
+ case 'e':
+ flags |= ELF::SHF_EXCLUDE;
+ break;
case 'x':
flags |= ELF::SHF_EXECINSTR;
break;
@@ -311,8 +311,11 @@ static int parseSectionFlags(StringRef flagsStr) {
case 'G':
flags |= ELF::SHF_GROUP;
break;
+ case '?':
+ *UseLastGroup = true;
+ break;
default:
- return -1;
+ return -1U;
}
}
@@ -352,6 +355,7 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
StringRef GroupName;
unsigned Flags = 0;
const MCExpr *Subsection = 0;
+ bool UseLastGroup = false;
// Set the defaults first.
if (SectionName == ".fini" || SectionName == ".init" ||
@@ -377,13 +381,16 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
StringRef FlagsStr = getTok().getStringContents();
Lex();
- int extraFlags = parseSectionFlags(FlagsStr);
- if (extraFlags < 0)
+ unsigned extraFlags = parseSectionFlags(FlagsStr, &UseLastGroup);
+ if (extraFlags == -1U)
return TokError("unknown flag");
Flags |= extraFlags;
bool Mergeable = Flags & ELF::SHF_MERGE;
bool Group = Flags & ELF::SHF_GROUP;
+ if (Group && UseLastGroup)
+ return TokError("Section cannot specifiy a group name while also acting "
+ "as a member of the last group");
if (getLexer().isNot(AsmToken::Comma)) {
if (Mergeable)
@@ -392,10 +399,13 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
return TokError("Group section must specify the type");
} else {
Lex();
- if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
- return TokError("expected '@' or '%' before type");
+ if (getLexer().is(AsmToken::At) || getLexer().is(AsmToken::Percent) ||
+ getLexer().is(AsmToken::String)) {
+ if (!getLexer().is(AsmToken::String))
+ Lex();
+ } else
+ return TokError("expected '@<type>', '%<type>' or \"<type>\"");
- Lex();
if (getParser().parseIdentifier(TypeName))
return TokError("expected identifier in directive");
@@ -461,6 +471,16 @@ EndStmt:
return TokError("unknown section type");
}
+ if (UseLastGroup) {
+ MCSectionSubPair CurrentSection = getStreamer().getCurrentSection();
+ if (const MCSectionELF *Section =
+ cast_or_null<MCSectionELF>(CurrentSection.first))
+ if (const MCSymbol *Group = Section->getGroup()) {
+ GroupName = Group->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
+ }
+
SectionKind Kind = computeSectionKind(Flags);
getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
Flags, Kind, Size,
@@ -479,7 +499,11 @@ bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
}
/// ParseDirectiveELFType
+/// ::= .type identifier , STT_<TYPE_IN_UPPER_CASE>
+/// ::= .type identifier , #attribute
/// ::= .type identifier , @attribute
+/// ::= .type identifier , %attribute
+/// ::= .type identifier , "attribute"
bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
StringRef Name;
if (getParser().parseIdentifier(Name))
@@ -492,26 +516,42 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
return TokError("unexpected token in '.type' directive");
Lex();
- if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
- return TokError("expected '@' or '%' before type");
- Lex();
-
StringRef Type;
SMLoc TypeLoc;
+ MCSymbolAttr Attr;
+ if (getLexer().is(AsmToken::Identifier)) {
+ TypeLoc = getLexer().getLoc();
+ if (getParser().parseIdentifier(Type))
+ return TokError("expected symbol type in directive");
+ Attr = StringSwitch<MCSymbolAttr>(Type)
+ .Case("STT_FUNC", MCSA_ELF_TypeFunction)
+ .Case("STT_OBJECT", MCSA_ELF_TypeObject)
+ .Case("STT_TLS", MCSA_ELF_TypeTLS)
+ .Case("STT_COMMON", MCSA_ELF_TypeCommon)
+ .Case("STT_NOTYPE", MCSA_ELF_TypeNoType)
+ .Case("STT_GNU_IFUNC", MCSA_ELF_TypeIndFunction)
+ .Default(MCSA_Invalid);
+ } else if (getLexer().is(AsmToken::Hash) || getLexer().is(AsmToken::At) ||
+ getLexer().is(AsmToken::Percent) ||
+ getLexer().is(AsmToken::String)) {
+ if (!getLexer().is(AsmToken::String))
+ Lex();
- TypeLoc = getLexer().getLoc();
- if (getParser().parseIdentifier(Type))
- return TokError("expected symbol type in directive");
-
- MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
- .Case("function", MCSA_ELF_TypeFunction)
- .Case("object", MCSA_ELF_TypeObject)
- .Case("tls_object", MCSA_ELF_TypeTLS)
- .Case("common", MCSA_ELF_TypeCommon)
- .Case("notype", MCSA_ELF_TypeNoType)
- .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
- .Case("gnu_indirect_function", MCSA_ELF_TypeIndFunction)
- .Default(MCSA_Invalid);
+ TypeLoc = getLexer().getLoc();
+ if (getParser().parseIdentifier(Type))
+ return TokError("expected symbol type in directive");
+ Attr = StringSwitch<MCSymbolAttr>(Type)
+ .Case("function", MCSA_ELF_TypeFunction)
+ .Case("object", MCSA_ELF_TypeObject)
+ .Case("tls_object", MCSA_ELF_TypeTLS)
+ .Case("common", MCSA_ELF_TypeCommon)
+ .Case("notype", MCSA_ELF_TypeNoType)
+ .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+ .Case("gnu_indirect_function", MCSA_ELF_TypeIndFunction)
+ .Default(MCSA_Invalid);
+ } else
+ return TokError("expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '@<type>', "
+ "'%<type>' or \"<type>\"");
if (Attr == MCSA_Invalid)
return Error(TypeLoc, "unsupported attribute in '.type' directive");
@@ -536,22 +576,7 @@ bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) {
Lex();
- const MCSection *Comment =
- getContext().getELFSection(".comment", ELF::SHT_PROGBITS,
- ELF::SHF_MERGE |
- ELF::SHF_STRINGS,
- SectionKind::getReadOnly(),
- 1, "");
-
- getStreamer().PushSection();
- getStreamer().SwitchSection(Comment);
- if (!SeenIdent) {
- getStreamer().EmitIntValue(0, 1);
- SeenIdent = true;
- }
- getStreamer().EmitBytes(Data);
- getStreamer().EmitIntValue(0, 1);
- getStreamer().PopSection();
+ getStreamer().EmitIdent(Data);
return false;
}
diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp
index c5c3bb7..f7bf002 100644
--- a/lib/MC/MCPureStreamer.cpp
+++ b/lib/MC/MCPureStreamer.cpp
@@ -29,7 +29,7 @@ private:
public:
MCPureStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
MCCodeEmitter *Emitter)
- : MCObjectStreamer(SK_PureStreamer, Context, TAB, OS, Emitter) {}
+ : MCObjectStreamer(Context, 0, TAB, OS, Emitter) {}
/// @name MCStreamer Interface
/// @{
@@ -51,8 +51,9 @@ public:
virtual void FinishImpl();
- virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+ virtual bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
report_fatal_error("unsupported directive in pure streamer");
+ return false;
}
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
report_fatal_error("unsupported directive in pure streamer");
@@ -93,16 +94,13 @@ public:
virtual void EmitFileDirective(StringRef Filename) {
report_fatal_error("unsupported directive in pure streamer");
}
+ virtual void EmitIdent(StringRef IdentString) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
StringRef Filename, unsigned CUID = 0) {
report_fatal_error("unsupported directive in pure streamer");
}
-
- /// @}
-
- static bool classof(const MCStreamer *S) {
- return S->getKind() == SK_PureStreamer;
- }
};
} // end anonymous namespace.
@@ -120,7 +118,7 @@ void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
assert(getCurrentSection().first && "Cannot emit before setting section!");
- Symbol->setSection(*getCurrentSection().first);
+ AssignSection(Symbol, getCurrentSection().first);
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index bf1a984..09eb3e7 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -32,6 +32,29 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
return false;
}
+static void printName(raw_ostream &OS, StringRef Name) {
+ if (Name.find_first_not_of("0123456789_."
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) {
+ OS << Name;
+ return;
+ }
+ OS << '"';
+ for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) {
+ if (*B == '"') // Unquoted "
+ OS << "\\\"";
+ else if (*B != '\\') // Neither " or backslash
+ OS << *B;
+ else if (B + 1 == E) // Trailing backslash
+ OS << "\\\\";
+ else {
+ OS << B[0] << B[1]; // Quoted character
+ ++B;
+ }
+ }
+ OS << '"';
+}
+
void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
raw_ostream &OS,
const MCExpr *Subsection) const {
@@ -44,27 +67,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
return;
}
- StringRef name = getSectionName();
- if (name.find_first_not_of("0123456789_."
- "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == name.npos) {
- OS << "\t.section\t" << name;
- } else {
- OS << "\t.section\t\"";
- for (const char *b = name.begin(), *e = name.end(); b < e; ++b) {
- if (*b == '"') // Unquoted "
- OS << "\\\"";
- else if (*b != '\\') // Neither " or backslash
- OS << *b;
- else if (b + 1 == e) // Trailing backslash
- OS << "\\\\";
- else {
- OS << b[0] << b[1]; // Quoted character
- ++b;
- }
- }
- OS << '"';
- }
+ OS << "\t.section\t";
+ printName(OS, getSectionName());
// Handle the weird solaris syntax if desired.
if (MAI.usesSunStyleELFSectionSwitchSyntax() &&
@@ -75,6 +79,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << ",#execinstr";
if (Flags & ELF::SHF_WRITE)
OS << ",#write";
+ if (Flags & ELF::SHF_EXCLUDE)
+ OS << ",#exclude";
if (Flags & ELF::SHF_TLS)
OS << ",#tls";
OS << '\n';
@@ -84,6 +90,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << ",\"";
if (Flags & ELF::SHF_ALLOC)
OS << 'a';
+ if (Flags & ELF::SHF_EXCLUDE)
+ OS << 'e';
if (Flags & ELF::SHF_EXECINSTR)
OS << 'x';
if (Flags & ELF::SHF_GROUP)
@@ -131,8 +139,11 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << "," << EntrySize;
}
- if (Flags & ELF::SHF_GROUP)
- OS << "," << Group->getName() << ",comdat";
+ if (Flags & ELF::SHF_GROUP) {
+ OS << ",";
+ printName(OS, Group->getName());
+ OS << ",comdat";
+ }
OS << '\n';
if (Subsection)
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 6542f42..2e1d69b 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -10,6 +10,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -21,10 +22,17 @@
#include <cstdlib>
using namespace llvm;
-MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx)
- : Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
- CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) {
+// Pin the vtables to this file.
+MCTargetStreamer::~MCTargetStreamer() {}
+void ARMTargetStreamer::anchor() {}
+
+MCStreamer::MCStreamer(MCContext &Ctx, MCTargetStreamer *TargetStreamer)
+ : Context(Ctx), TargetStreamer(TargetStreamer), EmitEHFrame(true),
+ EmitDebugFrame(false), CurrentW64UnwindInfo(0), LastSymbol(0),
+ AutoInitSections(false) {
SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
+ if (TargetStreamer)
+ TargetStreamer->setStreamer(this);
}
MCStreamer::~MCStreamer() {
@@ -72,6 +80,13 @@ raw_ostream &MCStreamer::GetCommentOS() {
return nulls();
}
+void MCStreamer::generateCompactUnwindEncodings(MCAsmBackend *MAB) {
+ for (std::vector<MCDwarfFrameInfo>::iterator I = FrameInfos.begin(),
+ E = FrameInfos.end(); I != E; ++I)
+ I->CompactUnwindEncoding =
+ (MAB ? MAB->generateCompactUnwindEncoding(I->Instructions) : 0);
+}
+
void MCStreamer::EmitDwarfSetLineAddr(int64_t LineDelta,
const MCSymbol *Label, int PointerSize) {
// emit the sequence to set the address
@@ -183,17 +198,28 @@ void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
MCSymbol *EHSymbol) {
}
+void MCStreamer::AssignSection(MCSymbol *Symbol, const MCSection *Section) {
+ if (Section)
+ Symbol->setSection(*Section);
+ else
+ Symbol->setUndefined();
+
+ // As we emit symbols into a section, track the order so that they can
+ // be sorted upon later. Zero is reserved to mean 'unemitted'.
+ SymbolOrdering[Symbol] = 1 + SymbolOrdering.size();
+}
+
void MCStreamer::EmitLabel(MCSymbol *Symbol) {
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
assert(getCurrentSection().first && "Cannot emit before setting section!");
- Symbol->setSection(*getCurrentSection().first);
+ AssignSection(Symbol, getCurrentSection().first);
LastSymbol = Symbol;
}
void MCStreamer::EmitDebugLabel(MCSymbol *Symbol) {
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
assert(getCurrentSection().first && "Cannot emit before setting section!");
- Symbol->setSection(*getCurrentSection().first);
+ AssignSection(Symbol, getCurrentSection().first);
LastSymbol = Symbol;
}
@@ -380,6 +406,14 @@ void MCStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) {
CurFrame->Instructions.push_back(Instruction);
}
+void MCStreamer::EmitCFIWindowSave() {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createWindowSave(Label);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
void MCStreamer::setCurrentW64UnwindInfo(MCWin64EHUnwindInfo *Frame) {
W64UnwindInfos.push_back(Frame);
CurrentW64UnwindInfo = W64UnwindInfos.back();
@@ -470,7 +504,9 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
report_fatal_error("Frame register and offset already specified!");
if (Offset & 0x0F)
report_fatal_error("Misaligned frame pointer offset!");
- MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, 0, Register, Offset);
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, Label, Register, Offset);
+ EmitLabel(Label);
CurFrame->LastFrameInst = CurFrame->Instructions.size();
CurFrame->Instructions.push_back(Inst);
}
@@ -534,54 +570,10 @@ void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
llvm_unreachable("This file format doesn't support this directive");
}
-void MCStreamer::EmitFnStart() {
- errs() << "Not implemented yet\n";
- abort();
-}
-
-void MCStreamer::EmitFnEnd() {
- errs() << "Not implemented yet\n";
- abort();
-}
-
-void MCStreamer::EmitCantUnwind() {
- errs() << "Not implemented yet\n";
- abort();
-}
-
-void MCStreamer::EmitHandlerData() {
- errs() << "Not implemented yet\n";
- abort();
-}
-
-void MCStreamer::EmitPersonality(const MCSymbol *Personality) {
- errs() << "Not implemented yet\n";
- abort();
-}
-
-void MCStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) {
- errs() << "Not implemented yet\n";
- abort();
-}
-
-void MCStreamer::EmitPad(int64_t Offset) {
- errs() << "Not implemented yet\n";
- abort();
-}
-
-void MCStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool) {
- errs() << "Not implemented yet\n";
- abort();
-}
-
-void MCStreamer::EmitTCEntry(const MCSymbol &S) {
- llvm_unreachable("Unsupported method");
-}
-
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
-void MCStreamer::EmitRawText(StringRef String) {
+void MCStreamer::EmitRawTextImpl(StringRef String) {
errs() << "EmitRawText called on an MCStreamer that doesn't support it, "
" something must not be fully mc'ized\n";
abort();
@@ -589,19 +581,18 @@ void MCStreamer::EmitRawText(StringRef String) {
void MCStreamer::EmitRawText(const Twine &T) {
SmallString<128> Str;
- T.toVector(Str);
- EmitRawText(Str.str());
+ EmitRawTextImpl(T.toStringRef(Str));
}
-void MCStreamer::EmitFrames(bool usingCFI) {
+void MCStreamer::EmitFrames(MCAsmBackend *MAB, bool usingCFI) {
if (!getNumFrameInfos())
return;
if (EmitEHFrame)
- MCDwarfFrameEmitter::Emit(*this, usingCFI, true);
+ MCDwarfFrameEmitter::Emit(*this, MAB, usingCFI, true);
if (EmitDebugFrame)
- MCDwarfFrameEmitter::Emit(*this, usingCFI, false);
+ MCDwarfFrameEmitter::Emit(*this, MAB, usingCFI, false);
}
void MCStreamer::EmitW64Tables() {
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index f18828d..8d8e290 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -27,6 +27,11 @@ MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) {
FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs,
ProcFeatures, NumFeatures);
+ InitCPUSchedModel(CPU);
+}
+
+void
+MCSubtargetInfo::InitCPUSchedModel(StringRef CPU) {
if (!CPU.empty())
CPUSchedModel = getSchedModelForCPU(CPU);
else
@@ -91,10 +96,8 @@ MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
#endif
// Find entry
- SubtargetInfoKV KV;
- KV.Key = CPU.data();
const SubtargetInfoKV *Found =
- std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, KV);
+ std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, CPU);
if (Found == ProcSchedModels+NumProcs || StringRef(Found->Key) != CPU) {
errs() << "'" << CPU
<< "' is not a recognized processor for this target"
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index b973c57..2416525 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -68,12 +68,23 @@ void MCSymbol::print(raw_ostream &OS) const {
// The name for this MCSymbol is required to be a valid target name. However,
// some targets support quoting names with funny characters. If the name
// contains a funny character, then print it quoted.
- if (!NameNeedsQuoting(getName())) {
- OS << getName();
+ StringRef Name = getName();
+ if (!NameNeedsQuoting(Name)) {
+ OS << Name;
return;
}
- OS << '"' << getName() << '"';
+ OS << '"';
+ for (unsigned I = 0, E = Name.size(); I != E; ++I) {
+ char C = Name[I];
+ if (C == '\n')
+ OS << "\\n";
+ else if (C == '"')
+ OS << "\\\"";
+ else
+ OS << C;
+ }
+ OS << '"';
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index c5b637c..b8b07d3 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -64,7 +64,7 @@ static void EmitAbsDifference(MCStreamer &streamer, MCSymbol *lhs,
static void EmitUnwindCode(MCStreamer &streamer, MCSymbol *begin,
MCWin64EHInstruction &inst) {
- uint8_t b1, b2;
+ uint8_t b2;
uint16_t w;
b2 = (inst.getOperation() & 0x0F);
switch (inst.getOperation()) {
@@ -93,8 +93,7 @@ static void EmitUnwindCode(MCStreamer &streamer, MCSymbol *begin,
streamer.EmitIntValue(b2, 1);
break;
case Win64EH::UOP_SetFPReg:
- b1 = inst.getOffset() & 0xF0;
- streamer.EmitIntValue(b1, 1);
+ EmitAbsDifference(streamer, inst.getLabel(), begin);
streamer.EmitIntValue(b2, 1);
break;
case Win64EH::UOP_SaveNonVol:
@@ -129,14 +128,29 @@ static void EmitUnwindCode(MCStreamer &streamer, MCSymbol *begin,
}
}
+static void EmitSymbolRefWithOfs(MCStreamer &streamer,
+ const MCSymbol *Base,
+ const MCSymbol *Other) {
+ MCContext &Context = streamer.getContext();
+ const MCSymbolRefExpr *BaseRef = MCSymbolRefExpr::Create(Base, Context);
+ const MCSymbolRefExpr *OtherRef = MCSymbolRefExpr::Create(Other, Context);
+ const MCExpr *Ofs = MCBinaryExpr::CreateSub(OtherRef, BaseRef, Context);
+ const MCSymbolRefExpr *BaseRefRel = MCSymbolRefExpr::Create(Base,
+ MCSymbolRefExpr::VK_COFF_IMGREL32,
+ Context);
+ streamer.EmitValue(MCBinaryExpr::CreateAdd(BaseRefRel, Ofs, Context), 4);
+}
+
static void EmitRuntimeFunction(MCStreamer &streamer,
const MCWin64EHUnwindInfo *info) {
MCContext &context = streamer.getContext();
streamer.EmitValueToAlignment(4);
- streamer.EmitValue(MCSymbolRefExpr::Create(info->Begin, context), 4);
- streamer.EmitValue(MCSymbolRefExpr::Create(info->End, context), 4);
- streamer.EmitValue(MCSymbolRefExpr::Create(info->Symbol, context), 4);
+ EmitSymbolRefWithOfs(streamer, info->Function, info->Begin);
+ EmitSymbolRefWithOfs(streamer, info->Function, info->End);
+ streamer.EmitValue(MCSymbolRefExpr::Create(info->Symbol,
+ MCSymbolRefExpr::VK_COFF_IMGREL32,
+ context), 4);
}
static void EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) {
@@ -145,11 +159,11 @@ static void EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) {
MCContext &context = streamer.getContext();
streamer.EmitValueToAlignment(4);
- // Upper 3 bits are the version number (currently 1).
- uint8_t flags = 0x01;
info->Symbol = context.CreateTempSymbol();
streamer.EmitLabel(info->Symbol);
+ // Upper 3 bits are the version number (currently 1).
+ uint8_t flags = 0x01;
if (info->ChainedParent)
flags |= Win64EH::UNW_ChainInfo << 3;
else {
@@ -185,20 +199,26 @@ static void EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) {
EmitUnwindCode(streamer, info->Begin, inst);
}
+ // For alignment purposes, the instruction array will always have an even
+ // number of entries, with the final entry potentially unused (in which case
+ // the array will be one longer than indicated by the count of unwind codes
+ // field).
+ if (numCodes & 1) {
+ streamer.EmitIntValue(0, 2);
+ }
+
if (flags & (Win64EH::UNW_ChainInfo << 3))
EmitRuntimeFunction(streamer, info->ChainedParent);
else if (flags &
((Win64EH::UNW_TerminateHandler|Win64EH::UNW_ExceptionHandler) << 3))
- streamer.EmitValue(MCSymbolRefExpr::Create(info->ExceptionHandler, context),
- 4);
- else if (numCodes < 2) {
+ streamer.EmitValue(MCSymbolRefExpr::Create(info->ExceptionHandler,
+ MCSymbolRefExpr::VK_COFF_IMGREL32,
+ context), 4);
+ else if (numCodes == 0) {
// The minimum size of an UNWIND_INFO struct is 8 bytes. If we're not
// a chained unwind info, if there is no handler, and if there are fewer
// than 2 slots used in the unwind code array, we have to pad to 8 bytes.
- if (numCodes == 1)
- streamer.EmitIntValue(0, 2);
- else
- streamer.EmitIntValue(0, 4);
+ streamer.EmitIntValue(0, 4);
}
}
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index a5ba3c3..8234aff 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -20,12 +20,11 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
#include <vector>
using namespace llvm;
-using namespace llvm::object;
void MachObjectWriter::reset() {
Relocations.clear();
@@ -128,7 +127,7 @@ void MachObjectWriter::WriteHeader(unsigned NumLoadCommands,
uint32_t Flags = 0;
if (SubsectionsViaSymbols)
- Flags |= macho::HF_SubsectionsViaSymbols;
+ Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
// struct mach_header (28 bytes) or
// struct mach_header_64 (32 bytes)
@@ -136,12 +135,12 @@ void MachObjectWriter::WriteHeader(unsigned NumLoadCommands,
uint64_t Start = OS.tell();
(void) Start;
- Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
+ Write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
Write32(TargetObjectWriter->getCPUType());
Write32(TargetObjectWriter->getCPUSubtype());
- Write32(macho::HFT_Object);
+ Write32(MachO::MH_OBJECT);
Write32(NumLoadCommands);
Write32(LoadCommandsSize);
Write32(Flags);
@@ -149,7 +148,7 @@ void MachObjectWriter::WriteHeader(unsigned NumLoadCommands,
Write32(0); // reserved
assert(OS.tell() - Start ==
- (is64Bit() ? macho::Header64Size : macho::Header32Size));
+ (is64Bit()?sizeof(MachO::mach_header_64): sizeof(MachO::mach_header)));
}
/// WriteSegmentLoadCommand - Write a segment load command.
@@ -167,12 +166,12 @@ void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections,
(void) Start;
unsigned SegmentLoadCommandSize =
- is64Bit() ? macho::SegmentLoadCommand64Size:
- macho::SegmentLoadCommand32Size;
- Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
+ is64Bit() ? sizeof(MachO::segment_command_64):
+ sizeof(MachO::segment_command);
+ Write32(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
Write32(SegmentLoadCommandSize +
- NumSections * (is64Bit() ? macho::Section64Size :
- macho::Section32Size));
+ NumSections * (is64Bit() ? sizeof(MachO::section_64) :
+ sizeof(MachO::section)));
WriteBytes("", 16);
if (is64Bit()) {
@@ -186,8 +185,10 @@ void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections,
Write32(SectionDataStartOffset); // file offset
Write32(SectionDataSize); // file size
}
- Write32(0x7); // maxprot
- Write32(0x7); // initprot
+ // maxprot
+ Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
+ // initprot
+ Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
Write32(NumSections);
Write32(0); // flags
@@ -240,8 +241,8 @@ void MachObjectWriter::WriteSection(const MCAssembler &Asm,
if (is64Bit())
Write32(0); // reserved3
- assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
- macho::Section32Size));
+ assert(OS.tell() - Start == (is64Bit() ? sizeof(MachO::section_64) :
+ sizeof(MachO::section)));
}
void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset,
@@ -253,14 +254,14 @@ void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset,
uint64_t Start = OS.tell();
(void) Start;
- Write32(macho::LCT_Symtab);
- Write32(macho::SymtabLoadCommandSize);
+ Write32(MachO::LC_SYMTAB);
+ Write32(sizeof(MachO::symtab_command));
Write32(SymbolOffset);
Write32(NumSymbols);
Write32(StringTableOffset);
Write32(StringTableSize);
- assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
+ assert(OS.tell() - Start == sizeof(MachO::symtab_command));
}
void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
@@ -276,8 +277,8 @@ void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
uint64_t Start = OS.tell();
(void) Start;
- Write32(macho::LCT_Dysymtab);
- Write32(macho::DysymtabLoadCommandSize);
+ Write32(MachO::LC_DYSYMTAB);
+ Write32(sizeof(MachO::dysymtab_command));
Write32(FirstLocalSymbol);
Write32(NumLocalSymbols);
Write32(FirstExternalSymbol);
@@ -297,7 +298,7 @@ void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
Write32(0); // locreloff
Write32(0); // nlocrel
- assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
+ assert(OS.tell() - Start == sizeof(MachO::dysymtab_command));
}
void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
@@ -312,20 +313,20 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
//
// FIXME: Are the prebound or indirect fields possible here?
if (Symbol.isUndefined())
- Type = macho::STT_Undefined;
+ Type = MachO::N_UNDF;
else if (Symbol.isAbsolute())
- Type = macho::STT_Absolute;
+ Type = MachO::N_ABS;
else
- Type = macho::STT_Section;
+ Type = MachO::N_SECT;
// FIXME: Set STAB bits.
if (Data.isPrivateExtern())
- Type |= macho::STF_PrivateExtern;
+ Type |= MachO::N_PEXT;
// Set external bit.
if (Data.isExternal() || Symbol.isUndefined())
- Type |= macho::STF_External;
+ Type |= MachO::N_EXT;
// Compute the symbol address.
if (Symbol.isDefined()) {
@@ -341,7 +342,8 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
if (Log2Size > 15)
report_fatal_error("invalid 'common' alignment '" +
- Twine(Align) + "'");
+ Twine(Align) + "' for '" + Symbol.getName() + "'",
+ false);
// FIXME: Keep this mask with the SymbolFlags enumeration.
Flags = (Flags & 0xF0FF) | (Log2Size << 8);
}
@@ -369,17 +371,17 @@ void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type,
(void) Start;
Write32(Type);
- Write32(macho::LinkeditLoadCommandSize);
+ Write32(sizeof(MachO::linkedit_data_command));
Write32(DataOffset);
Write32(DataSize);
- assert(OS.tell() - Start == macho::LinkeditLoadCommandSize);
+ assert(OS.tell() - Start == sizeof(MachO::linkedit_data_command));
}
static unsigned ComputeLinkerOptionsLoadCommandSize(
const std::vector<std::string> &Options, bool is64Bit)
{
- unsigned Size = sizeof(macho::LinkerOptionsLoadCommand);
+ unsigned Size = sizeof(MachO::linker_options_command);
for (unsigned i = 0, e = Options.size(); i != e; ++i)
Size += Options[i].size() + 1;
return RoundUpToAlignment(Size, is64Bit ? 8 : 4);
@@ -392,10 +394,10 @@ void MachObjectWriter::WriteLinkerOptionsLoadCommand(
uint64_t Start = OS.tell();
(void) Start;
- Write32(macho::LCT_LinkerOptions);
+ Write32(MachO::LC_LINKER_OPTIONS);
Write32(Size);
Write32(Options.size());
- uint64_t BytesWritten = sizeof(macho::LinkerOptionsLoadCommand);
+ uint64_t BytesWritten = sizeof(MachO::linker_options_command);
for (unsigned i = 0, e = Options.size(); i != e; ++i) {
// Write each string, including the null byte.
const std::string &Option = Options[i];
@@ -428,6 +430,22 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
//
// FIXME: Revisit this when the dust settles.
+ // Report errors for use of .indirect_symbol not in a symbol pointer section
+ // or stub section.
+ for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+ ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+ const MCSectionMachO &Section =
+ cast<MCSectionMachO>(it->SectionData->getSection());
+
+ if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS &&
+ Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+ Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) {
+ MCSymbol &Symbol = *it->Symbol;
+ report_fatal_error("indirect symbol '" + Symbol.getName() +
+ "' not in a symbol pointer or stub section");
+ }
+ }
+
// Bind non lazy symbol pointers first.
unsigned IndirectIndex = 0;
for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
@@ -723,14 +741,14 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
// section headers) and the symbol table.
unsigned NumLoadCommands = 1;
uint64_t LoadCommandsSize = is64Bit() ?
- macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
- macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
+ sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64):
+ sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
// Add the data-in-code load command size, if used.
unsigned NumDataRegions = Asm.getDataRegions().size();
if (NumDataRegions) {
++NumLoadCommands;
- LoadCommandsSize += macho::LinkeditLoadCommandSize;
+ LoadCommandsSize += sizeof(MachO::linkedit_data_command);
}
// Add the symbol table load command sizes, if used.
@@ -738,8 +756,8 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
UndefinedSymbolData.size();
if (NumSymbols) {
NumLoadCommands += 2;
- LoadCommandsSize += (macho::SymtabLoadCommandSize +
- macho::DysymtabLoadCommandSize);
+ LoadCommandsSize += (sizeof(MachO::symtab_command) +
+ sizeof(MachO::dysymtab_command));
}
// Add the linker option load commands sizes.
@@ -753,8 +771,8 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
// Compute the total size of the section data, as well as its file size and vm
// size.
- uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
- macho::Header32Size) + LoadCommandsSize;
+ uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
+ sizeof(MachO::mach_header)) + LoadCommandsSize;
uint64_t SectionDataSize = 0;
uint64_t SectionDataFileSize = 0;
uint64_t VMSize = 0;
@@ -791,11 +809,11 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
for (MCAssembler::const_iterator it = Asm.begin(),
ie = Asm.end(); it != ie; ++it) {
- std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+ std::vector<MachO::any_relocation_info> &Relocs = Relocations[it];
unsigned NumRelocs = Relocs.size();
uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
- RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
+ RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
}
// Write the data-in-code load command, if used.
@@ -803,7 +821,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
if (NumDataRegions) {
uint64_t DataRegionsOffset = RelocTableEnd;
uint64_t DataRegionsSize = NumDataRegions * 8;
- WriteLinkeditLoadCommand(macho::LCT_DataInCode, DataRegionsOffset,
+ WriteLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
DataRegionsSize);
}
@@ -830,8 +848,9 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
// The string table is written after symbol table.
uint64_t StringTableOffset =
- SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
- macho::Nlist32Size);
+ SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
+ sizeof(MachO::nlist_64) :
+ sizeof(MachO::nlist));
WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
StringTableOffset, StringTable.size());
@@ -864,10 +883,10 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
ie = Asm.end(); it != ie; ++it) {
// Write the section relocation entries, in reverse order to match 'as'
// (approximately, the exact algorithm is more complicated than this).
- std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+ std::vector<MachO::any_relocation_info> &Relocs = Relocations[it];
for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
- Write32(Relocs[e - i - 1].Word0);
- Write32(Relocs[e - i - 1].Word1);
+ Write32(Relocs[e - i - 1].r_word0);
+ Write32(Relocs[e - i - 1].r_word1);
}
}
@@ -906,9 +925,9 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
// If this symbol is defined and internal, mark it as such.
if (it->Symbol->isDefined() &&
!Asm.getSymbolData(*it->Symbol).isExternal()) {
- uint32_t Flags = macho::ISF_Local;
+ uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
if (it->Symbol->isAbsolute())
- Flags |= macho::ISF_Absolute;
+ Flags |= MachO::INDIRECT_SYMBOL_ABS;
Write32(Flags);
continue;
}
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index 7625abd..2fb91f2 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -121,13 +121,10 @@ void SubtargetFeatures::AddFeature(const StringRef String,
/// Find KV in array using binary search.
static const SubtargetFeatureKV *Find(StringRef S, const SubtargetFeatureKV *A,
size_t L) {
- // Make the lower bound element we're looking for
- SubtargetFeatureKV KV;
- KV.Key = S.data();
// Determine the end of the array
const SubtargetFeatureKV *Hi = A + L;
// Binary search the array
- const SubtargetFeatureKV *F = std::lower_bound(A, Hi, KV);
+ const SubtargetFeatureKV *F = std::lower_bound(A, Hi, S);
// If not found then return NULL
if (F == Hi || StringRef(F->Key) != S) return NULL;
// Return the found array item
@@ -353,8 +350,7 @@ void SubtargetFeatures::dump() const {
}
#endif
-/// getDefaultSubtargetFeatures - Return a string listing the features
-/// associated with the target triple.
+/// Adds the default features for the specified target triple.
///
/// FIXME: This is an inelegant way of specifying the features of a
/// subtarget. It would be better if we could encode this information
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 4e26934..d9ca86d 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -138,7 +138,7 @@ public:
symbol_map SymbolMap;
WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_ostream &OS);
- ~WinCOFFObjectWriter();
+ virtual ~WinCOFFObjectWriter();
COFFSymbol *createSymbol(StringRef Name);
COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol * Symbol);
@@ -148,13 +148,12 @@ public:
object_t *createCOFFEntity(StringRef Name, list_t &List);
void DefineSection(MCSectionData const &SectionData);
- void DefineSymbol(MCSymbolData const &SymbolData,
- MCAssembler &Assembler);
+ void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler,
+ const MCAsmLayout &Layout);
void MakeSymbolReal(COFFSymbol &S, size_t Index);
void MakeSectionReal(COFFSection &S, size_t Number);
- bool ExportSection(COFFSection const *S);
bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm);
bool IsPhysicalSection(COFFSection *S);
@@ -190,17 +189,6 @@ static inline void write_uint32_le(void *Data, uint32_t const &Value) {
Ptr[3] = (Value & 0xFF000000) >> 24;
}
-static inline void write_uint16_le(void *Data, uint16_t const &Value) {
- uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
- Ptr[0] = (Value & 0x00FF) >> 0;
- Ptr[1] = (Value & 0xFF00) >> 8;
-}
-
-static inline void write_uint8_le(void *Data, uint8_t const &Value) {
- uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
- Ptr[0] = (Value & 0xFF) >> 0;
-}
-
//------------------------------------------------------------------------------
// Symbol class implementation
@@ -411,7 +399,8 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
/// This function takes a section data object from the assembler
/// and creates the associated COFF symbol staging object.
void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
- MCAssembler &Assembler) {
+ MCAssembler &Assembler,
+ const MCAsmLayout &Layout) {
MCSymbol const &Symbol = SymbolData.getSymbol();
COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol);
SymbolMap[&Symbol] = coff_symbol;
@@ -452,6 +441,12 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
const MCSymbolData &ResSymData =
Assembler.getSymbolData(Symbol.AliasedSymbol());
+ if (Symbol.isVariable()) {
+ int64_t Addr;
+ if (Symbol.getVariableValue()->EvaluateAsAbsolute(Addr, Layout))
+ coff_symbol->Data.Value = Addr;
+ }
+
coff_symbol->Data.Type = (ResSymData.getFlags() & 0x0000FFFF) >> 0;
coff_symbol->Data.StorageClass = (ResSymData.getFlags() & 0x00FF0000) >> 16;
@@ -463,7 +458,9 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
}
- if (ResSymData.Fragment != NULL)
+ if (Symbol.isAbsolute() || Symbol.AliasedSymbol().isVariable())
+ coff_symbol->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
+ else if (ResSymData.Fragment != NULL)
coff_symbol->Section =
SectionMap[&ResSymData.Fragment->getParent()->getSection()];
@@ -508,10 +505,6 @@ void WinCOFFObjectWriter::MakeSymbolReal(COFFSymbol &S, size_t Index) {
S.Index = Index;
}
-bool WinCOFFObjectWriter::ExportSection(COFFSection const *S) {
- return !S->MCData->getFragmentList().empty();
-}
-
bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
MCAssembler &Asm) {
// This doesn't seem to be right. Strings referred to from the .data section
@@ -625,9 +618,10 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
DefineSection(*i);
for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(),
- e = Asm.symbol_end(); i != e; i++) {
+ e = Asm.symbol_end();
+ i != e; i++) {
if (ExportSymbol(*i, Asm)) {
- DefineSymbol(*i, Asm);
+ DefineSymbol(*i, Asm, Layout);
}
}
}
@@ -921,6 +915,9 @@ MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_) :
Machine(Machine_) {
}
+// Pin the vtable to this file.
+void MCWinCOFFObjectTargetWriter::anchor() {}
+
//------------------------------------------------------------------------------
// WinCOFFObjectWriter factory function
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 04bfeb4..5b5aad7 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -55,7 +55,7 @@ public:
virtual void EmitDebugLabel(MCSymbol *Symbol);
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
virtual void EmitThumbFunc(MCSymbol *Func);
- virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+ virtual bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol);
virtual void EmitCOFFSymbolStorageClass(int StorageClass);
@@ -72,13 +72,10 @@ public:
virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment);
virtual void EmitFileDirective(StringRef Filename);
+ virtual void EmitIdent(StringRef IdentString);
virtual void EmitWin64EHHandlerData();
virtual void FinishImpl();
- static bool classof(const MCStreamer *S) {
- return S->getKind() == SK_WinCOFFStreamer;
- }
-
private:
virtual void EmitInstToData(const MCInst &Inst) {
MCDataFragment *DF = getOrCreateDataFragment();
@@ -134,8 +131,7 @@ private:
WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
MCCodeEmitter &CE, raw_ostream &OS)
- : MCObjectStreamer(SK_WinCOFFStreamer, Context, MAB, OS, &CE),
- CurSymbol(NULL) {}
+ : MCObjectStreamer(Context, 0, MAB, OS, &CE), CurSymbol(NULL) {}
void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment, bool External) {
@@ -155,7 +151,8 @@ void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST;
const MCSection *Section = MCStreamer::getContext().getCOFFSection(
- SectionName, Characteristics, SectionKind::getBSS(), Selection);
+ SectionName, Characteristics, SectionKind::getBSS(), Symbol->getName(),
+ Selection);
MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section);
@@ -164,7 +161,7 @@ void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
SymbolData.setExternal(External);
- Symbol->setSection(*Section);
+ AssignSection(Symbol, Section);
if (ByteAlignment != 1)
new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectionData);
@@ -201,7 +198,7 @@ void WinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) {
llvm_unreachable("not implemented");
}
-void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+bool WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) {
assert(Symbol && "Symbol must be non-null!");
assert((Symbol->isInSection()
@@ -221,8 +218,10 @@ void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
break;
default:
- llvm_unreachable("unsupported attribute");
+ return false;
}
+
+ return true;
}
void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
@@ -309,6 +308,11 @@ void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
// info will be a much large effort.
}
+// TODO: Implement this if you want to emit .comment section in COFF obj files.
+void WinCOFFStreamer::EmitIdent(StringRef IdentString) {
+ llvm_unreachable("unsupported directive");
+}
+
void WinCOFFStreamer::EmitWin64EHHandlerData() {
MCStreamer::EmitWin64EHHandlerData();
@@ -318,6 +322,7 @@ void WinCOFFStreamer::EmitWin64EHHandlerData() {
}
void WinCOFFStreamer::FinishImpl() {
+ EmitFrames(NULL, true);
EmitW64Tables();
MCObjectStreamer::FinishImpl();
}
diff --git a/lib/Makefile b/lib/Makefile
index 0a4435e..2ed0636 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -10,9 +10,8 @@ LEVEL = ..
include $(LEVEL)/Makefile.config
-PARALLEL_DIRS := IR AsmParser Bitcode Analysis Transforms CodeGen \
- Target ExecutionEngine Linker MC Object Option DebugInfo \
- IRReader
+PARALLEL_DIRS := IR AsmParser Bitcode Analysis Transforms CodeGen Target \
+ ExecutionEngine Linker LTO MC Object Option DebugInfo \
+ IRReader
include $(LEVEL)/Makefile.common
-
diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp
index fd9d3b4..de57b4c 100644
--- a/lib/Object/Binary.cpp
+++ b/lib/Object/Binary.cpp
@@ -91,6 +91,7 @@ error_code object::createBinary(MemoryBuffer *Source,
return object_error::success;
}
case sys::fs::file_magic::coff_object:
+ case sys::fs::file_magic::coff_import_library:
case sys::fs::file_magic::pecoff_executable: {
OwningPtr<Binary> ret(
ObjectFile::createCOFFObjectFile(scopedSource.take()));
@@ -100,7 +101,8 @@ error_code object::createBinary(MemoryBuffer *Source,
return object_error::success;
}
case sys::fs::file_magic::unknown:
- case sys::fs::file_magic::bitcode: {
+ case sys::fs::file_magic::bitcode:
+ case sys::fs::file_magic::windows_resource: {
// Unrecognized object file format.
return object_error::invalid_file_type;
}
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 2c2cc8e..1f07cbb 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMObject
Binary.cpp
COFFObjectFile.cpp
COFFYAML.cpp
+ ELF.cpp
ELFObjectFile.cpp
ELFYAML.cpp
Error.cpp
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index cb029f9..3434e70 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -16,6 +16,9 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
#include <ctype.h>
@@ -111,10 +114,8 @@ error_code COFFObjectFile::getSymbolFileOffset(DataRefImpl Symb,
const coff_section *Section = NULL;
if (error_code ec = getSection(symb->SectionNumber, Section))
return ec;
- char Type;
- if (error_code ec = getSymbolNMTypeChar(Symb, Type))
- return ec;
- if (Type == 'U' || Type == 'w')
+
+ if (symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED)
Result = UnknownAddressOrSize;
else if (Section)
Result = Section->PointerToRawData + symb->Value;
@@ -129,10 +130,8 @@ error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
const coff_section *Section = NULL;
if (error_code ec = getSection(symb->SectionNumber, Section))
return ec;
- char Type;
- if (error_code ec = getSymbolNMTypeChar(Symb, Type))
- return ec;
- if (Type == 'U' || Type == 'w')
+
+ if (symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED)
Result = UnknownAddressOrSize;
else if (Section)
Result = Section->VirtualAddress + symb->Value;
@@ -152,12 +151,16 @@ error_code COFFObjectFile::getSymbolType(DataRefImpl Symb,
if (symb->getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) {
Result = SymbolRef::ST_Function;
} else {
- char Type;
- if (error_code ec = getSymbolNMTypeChar(Symb, Type))
- return ec;
- if (Type == 'r' || Type == 'R') {
- Result = SymbolRef::ST_Data;
+ uint32_t Characteristics = 0;
+ if (symb->SectionNumber > 0) {
+ const coff_section *Section = NULL;
+ if (error_code ec = getSection(symb->SectionNumber, Section))
+ return ec;
+ Characteristics = Section->Characteristics;
}
+ if (Characteristics & COFF::IMAGE_SCN_MEM_READ &&
+ ~Characteristics & COFF::IMAGE_SCN_MEM_WRITE) // Read only.
+ Result = SymbolRef::ST_Data;
}
}
return object_error::success;
@@ -196,10 +199,8 @@ error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb,
const coff_section *Section = NULL;
if (error_code ec = getSection(symb->SectionNumber, Section))
return ec;
- char Type;
- if (error_code ec = getSymbolNMTypeChar(Symb, Type))
- return ec;
- if (Type == 'U' || Type == 'w')
+
+ if (symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED)
Result = UnknownAddressOrSize;
else if (Section)
Result = Section->SizeOfRawData - symb->Value;
@@ -208,74 +209,6 @@ error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb,
return object_error::success;
}
-error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
- char &Result) const {
- const coff_symbol *symb = toSymb(Symb);
- StringRef name;
- if (error_code ec = getSymbolName(Symb, name))
- return ec;
- char ret = StringSwitch<char>(name)
- .StartsWith(".debug", 'N')
- .StartsWith(".sxdata", 'N')
- .Default('?');
-
- if (ret != '?') {
- Result = ret;
- return object_error::success;
- }
-
- uint32_t Characteristics = 0;
- if (symb->SectionNumber > 0) {
- const coff_section *Section = NULL;
- if (error_code ec = getSection(symb->SectionNumber, Section))
- return ec;
- Characteristics = Section->Characteristics;
- }
-
- switch (symb->SectionNumber) {
- case COFF::IMAGE_SYM_UNDEFINED:
- // Check storage classes.
- if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) {
- Result = 'w';
- return object_error::success; // Don't do ::toupper.
- } else if (symb->Value != 0) // Check for common symbols.
- ret = 'c';
- else
- ret = 'u';
- break;
- case COFF::IMAGE_SYM_ABSOLUTE:
- ret = 'a';
- break;
- case COFF::IMAGE_SYM_DEBUG:
- ret = 'n';
- break;
- default:
- // Check section type.
- if (Characteristics & COFF::IMAGE_SCN_CNT_CODE)
- ret = 't';
- else if ( Characteristics & COFF::IMAGE_SCN_MEM_READ
- && ~Characteristics & COFF::IMAGE_SCN_MEM_WRITE) // Read only.
- ret = 'r';
- else if (Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
- ret = 'd';
- else if (Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
- ret = 'b';
- else if (Characteristics & COFF::IMAGE_SCN_LNK_INFO)
- ret = 'i';
-
- // Check for section symbol.
- else if ( symb->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC
- && symb->Value == 0)
- ret = 's';
- }
-
- if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
- ret = ::toupper(static_cast<unsigned char>(ret));
-
- Result = ret;
- return object_error::success;
-}
-
error_code COFFObjectFile::getSymbolSection(DataRefImpl Symb,
section_iterator &Result) const {
const coff_symbol *symb = toSymb(Symb);
@@ -406,7 +339,7 @@ error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
return object_error::success;
}
-relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
+relocation_iterator COFFObjectFile::section_rel_begin(DataRefImpl Sec) const {
const coff_section *sec = toSec(Sec);
DataRefImpl ret;
if (sec->NumberOfRelocations == 0)
@@ -417,7 +350,7 @@ relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
return relocation_iterator(RelocationRef(ret, this));
}
-relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
+relocation_iterator COFFObjectFile::section_rel_end(DataRefImpl Sec) const {
const coff_section *sec = toSec(Sec);
DataRefImpl ret;
if (sec->NumberOfRelocations == 0)
@@ -431,6 +364,94 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
return relocation_iterator(RelocationRef(ret, this));
}
+// Initialize the pointer to the symbol table.
+error_code COFFObjectFile::initSymbolTablePtr() {
+ if (error_code ec = getObject(
+ SymbolTable, Data, base() + COFFHeader->PointerToSymbolTable,
+ COFFHeader->NumberOfSymbols * sizeof(coff_symbol)))
+ return ec;
+
+ // Find string table. The first four byte of the string table contains the
+ // total size of the string table, including the size field itself. If the
+ // string table is empty, the value of the first four byte would be 4.
+ const uint8_t *StringTableAddr =
+ base() + COFFHeader->PointerToSymbolTable +
+ COFFHeader->NumberOfSymbols * sizeof(coff_symbol);
+ const ulittle32_t *StringTableSizePtr;
+ if (error_code ec = getObject(StringTableSizePtr, Data, StringTableAddr))
+ return ec;
+ StringTableSize = *StringTableSizePtr;
+ if (error_code ec =
+ getObject(StringTable, Data, StringTableAddr, StringTableSize))
+ return ec;
+
+ // Check that the string table is null terminated if has any in it.
+ if (StringTableSize < 4 ||
+ (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0))
+ return object_error::parse_failed;
+ return object_error::success;
+}
+
+// Returns the file offset for the given RVA.
+error_code COFFObjectFile::getRvaPtr(uint32_t Rva, uintptr_t &Res) const {
+ error_code ec;
+ for (section_iterator i = begin_sections(), e = end_sections(); i != e;
+ i.increment(ec)) {
+ if (ec)
+ return ec;
+ const coff_section *Section = getCOFFSection(i);
+ uint32_t SectionStart = Section->VirtualAddress;
+ uint32_t SectionEnd = Section->VirtualAddress + Section->VirtualSize;
+ if (SectionStart <= Rva && Rva < SectionEnd) {
+ uint32_t Offset = Rva - SectionStart;
+ Res = uintptr_t(base()) + Section->PointerToRawData + Offset;
+ return object_error::success;
+ }
+ }
+ return object_error::parse_failed;
+}
+
+// Returns hint and name fields, assuming \p Rva is pointing to a Hint/Name
+// table entry.
+error_code COFFObjectFile::
+getHintName(uint32_t Rva, uint16_t &Hint, StringRef &Name) const {
+ uintptr_t IntPtr = 0;
+ if (error_code ec = getRvaPtr(Rva, IntPtr))
+ return ec;
+ const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(IntPtr);
+ Hint = *reinterpret_cast<const ulittle16_t *>(Ptr);
+ Name = StringRef(reinterpret_cast<const char *>(Ptr + 2));
+ return object_error::success;
+}
+
+// Find the import table.
+error_code COFFObjectFile::initImportTablePtr() {
+ // First, we get the RVA of the import table. If the file lacks a pointer to
+ // the import table, do nothing.
+ const data_directory *DataEntry;
+ if (getDataDirectory(COFF::IMPORT_TABLE, DataEntry))
+ return object_error::success;
+
+ // Do nothing if the pointer to import table is NULL.
+ if (DataEntry->RelativeVirtualAddress == 0)
+ return object_error::success;
+
+ uint32_t ImportTableRva = DataEntry->RelativeVirtualAddress;
+ NumberOfImportDirectory = DataEntry->Size /
+ sizeof(import_directory_table_entry);
+
+ // Find the section that contains the RVA. This is needed because the RVA is
+ // the import table's memory address which is different from its file offset.
+ uintptr_t IntPtr = 0;
+ if (error_code ec = getRvaPtr(ImportTableRva, IntPtr))
+ return ec;
+ ImportDirectory = reinterpret_cast<
+ const import_directory_table_entry *>(IntPtr);
+
+ // It's an error if there's no section containing the Import Table RVA.
+ return object_error::parse_failed;
+}
+
COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
: ObjectFile(Binary::ID_COFF, Object)
, COFFHeader(0)
@@ -439,7 +460,9 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
, SectionTable(0)
, SymbolTable(0)
, StringTable(0)
- , StringTableSize(0) {
+ , StringTableSize(0)
+ , ImportDirectory(0)
+ , NumberOfImportDirectory(0) {
// Check that we at least have enough room for a header.
if (!checkSize(Data, ec, sizeof(coff_file_header))) return;
@@ -486,49 +509,33 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
CurPtr += COFFHeader->SizeOfOptionalHeader;
}
- if ((ec = getObject(SectionTable, Data, base() + CurPtr,
- COFFHeader->NumberOfSections * sizeof(coff_section))))
- return;
-
- if (COFFHeader->PointerToSymbolTable != 0) {
- if ((ec = getObject(SymbolTable, Data,
- base() + COFFHeader->PointerToSymbolTable,
- COFFHeader->NumberOfSymbols * sizeof(coff_symbol))))
+ if (!COFFHeader->isImportLibrary())
+ if ((ec = getObject(SectionTable, Data, base() + CurPtr,
+ COFFHeader->NumberOfSections * sizeof(coff_section))))
return;
- // Find string table. The first four byte of the string table contains the
- // total size of the string table, including the size field itself. If the
- // string table is empty, the value of the first four byte would be 4.
- const uint8_t *StringTableAddr = base() + COFFHeader->PointerToSymbolTable
- + COFFHeader->NumberOfSymbols * sizeof(coff_symbol);
- const ulittle32_t *StringTableSizePtr;
- if ((ec = getObject(StringTableSizePtr, Data, StringTableAddr)))
- return;
- StringTableSize = *StringTableSizePtr;
- if ((ec = getObject(StringTable, Data, StringTableAddr, StringTableSize)))
+ // Initialize the pointer to the symbol table.
+ if (COFFHeader->PointerToSymbolTable != 0)
+ if ((ec = initSymbolTablePtr()))
return;
- // Check that the string table is null terminated if has any in it.
- if (StringTableSize < 4
- || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) {
- ec = object_error::parse_failed;
- return;
- }
- }
+ // Initialize the pointer to the beginning of the import table.
+ if ((ec = initImportTablePtr()))
+ return;
ec = object_error::success;
}
symbol_iterator COFFObjectFile::begin_symbols() const {
DataRefImpl ret;
- ret.p = reinterpret_cast<intptr_t>(SymbolTable);
+ ret.p = reinterpret_cast<uintptr_t>(SymbolTable);
return symbol_iterator(SymbolRef(ret, this));
}
symbol_iterator COFFObjectFile::end_symbols() const {
// The symbol table ends where the string table begins.
DataRefImpl ret;
- ret.p = reinterpret_cast<intptr_t>(StringTable);
+ ret.p = reinterpret_cast<uintptr_t>(StringTable);
return symbol_iterator(SymbolRef(ret, this));
}
@@ -557,16 +564,34 @@ StringRef COFFObjectFile::getLoadName() const {
return "";
}
+import_directory_iterator COFFObjectFile::import_directory_begin() const {
+ DataRefImpl Imp;
+ Imp.p = reinterpret_cast<uintptr_t>(ImportDirectory);
+ return import_directory_iterator(ImportDirectoryEntryRef(Imp, this));
+}
+
+import_directory_iterator COFFObjectFile::import_directory_end() const {
+ DataRefImpl Imp;
+ if (ImportDirectory) {
+ Imp.p = reinterpret_cast<uintptr_t>(
+ ImportDirectory + (NumberOfImportDirectory - 1));
+ } else {
+ Imp.p = 0;
+ }
+ return import_directory_iterator(ImportDirectoryEntryRef(Imp, this));
+}
section_iterator COFFObjectFile::begin_sections() const {
DataRefImpl ret;
- ret.p = reinterpret_cast<intptr_t>(SectionTable);
+ ret.p = reinterpret_cast<uintptr_t>(SectionTable);
return section_iterator(SectionRef(ret, this));
}
section_iterator COFFObjectFile::end_sections() const {
DataRefImpl ret;
- ret.p = reinterpret_cast<intptr_t>(SectionTable + COFFHeader->NumberOfSections);
+ int numSections = COFFHeader->isImportLibrary()
+ ? 0 : COFFHeader->NumberOfSections;
+ ret.p = reinterpret_cast<uintptr_t>(SectionTable + numSections);
return section_iterator(SectionRef(ret, this));
}
@@ -678,7 +703,7 @@ error_code COFFObjectFile::getSymbolName(const coff_symbol *symbol,
ArrayRef<uint8_t> COFFObjectFile::getSymbolAuxData(
const coff_symbol *symbol) const {
const uint8_t *aux = NULL;
-
+
if ( symbol->NumberOfAuxSymbols > 0 ) {
// AUX data comes immediately after the symbol in COFF
aux = reinterpret_cast<const uint8_t *>(symbol + 1);
@@ -779,7 +804,6 @@ const coff_relocation *COFFObjectFile::getCOFFRelocation(
return toRel(It->getRawDataRefImpl());
}
-
#define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(enum) \
case COFF::enum: res = #enum; break;
@@ -860,6 +884,52 @@ error_code COFFObjectFile::getLibraryPath(DataRefImpl LibData,
report_fatal_error("getLibraryPath not implemented in COFFObjectFile");
}
+bool ImportDirectoryEntryRef::
+operator==(const ImportDirectoryEntryRef &Other) const {
+ return ImportDirectoryPimpl == Other.ImportDirectoryPimpl;
+}
+
+static const import_directory_table_entry *toImportEntry(DataRefImpl Imp) {
+ return reinterpret_cast<const import_directory_table_entry *>(Imp.p);
+}
+
+error_code
+ImportDirectoryEntryRef::getNext(ImportDirectoryEntryRef &Result) const {
+ const import_directory_table_entry *Dir = toImportEntry(ImportDirectoryPimpl);
+ Dir += 1;
+ DataRefImpl Next;
+ Next.p = reinterpret_cast<uintptr_t>(Dir);
+ Result = ImportDirectoryEntryRef(Next, OwningObject);
+ return object_error::success;
+}
+
+error_code ImportDirectoryEntryRef::
+getImportTableEntry(const import_directory_table_entry *&Result) const {
+ Result = toImportEntry(ImportDirectoryPimpl);
+ return object_error::success;
+}
+
+error_code ImportDirectoryEntryRef::getName(StringRef &Result) const {
+ const import_directory_table_entry *Dir = toImportEntry(ImportDirectoryPimpl);
+ uintptr_t IntPtr = 0;
+ if (error_code ec = OwningObject->getRvaPtr(Dir->NameRVA, IntPtr))
+ return ec;
+ const char *Ptr = reinterpret_cast<const char *>(IntPtr);
+ Result = StringRef(Ptr);
+ return object_error::success;
+}
+
+error_code ImportDirectoryEntryRef::getImportLookupEntry(
+ const import_lookup_table_entry32 *&Result) const {
+ const import_directory_table_entry *Dir = toImportEntry(ImportDirectoryPimpl);
+ uintptr_t IntPtr = 0;
+ if (error_code ec = OwningObject->getRvaPtr(
+ Dir->ImportLookupTableRVA, IntPtr))
+ return ec;
+ Result = reinterpret_cast<const import_lookup_table_entry32 *>(IntPtr);
+ return object_error::success;
+}
+
namespace llvm {
ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) {
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
new file mode 100644
index 0000000..7c80d41
--- /dev/null
+++ b/lib/Object/ELF.cpp
@@ -0,0 +1,714 @@
+//===- ELF.cpp - ELF object file implementation -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ELF.h"
+
+namespace llvm {
+namespace object {
+
+#define LLVM_ELF_SWITCH_RELOC_TYPE_NAME(enum) \
+ case ELF::enum: \
+ return #enum; \
+
+StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
+ switch (Machine) {
+ case ELF::EM_X86_64:
+ switch (Type) {
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_NONE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOT32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PLT32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_COPY);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GLOB_DAT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_JUMP_SLOT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_RELATIVE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPCREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32S);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPMOD64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSGD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSLD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTTPOFF);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTOFF64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOT64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPCREL64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPLT64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PLTOFF64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32_TLSDESC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_IRELATIVE);
+ default:
+ break;
+ }
+ break;
+ case ELF::EM_386:
+ switch (Type) {
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_NONE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOT32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PLT32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_COPY);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GLOB_DAT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_JUMP_SLOT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_RELATIVE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTOFF);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTPC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32PLT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTIE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_PUSH);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_POP);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_PUSH);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_POP);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDO_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPMOD32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPOFF32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTDESC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_IRELATIVE);
+ default:
+ break;
+ }
+ break;
+ case ELF::EM_MIPS:
+ switch (Type) {
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NONE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_REL32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_26);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GPREL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_LITERAL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GPREL32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SHIFT5);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SHIFT6);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_DISP);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_PAGE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_OFST);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SUB);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_INSERT_A);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_INSERT_B);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_DELETE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HIGHER);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HIGHEST);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SCN_DISP);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_REL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_ADD_IMMEDIATE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PJUMP);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_RELGOT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JALR);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPMOD32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPMOD64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_GD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_LDM);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_GOTTPREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GLOB_DAT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_COPY);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JUMP_SLOT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_26_S1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_GOT16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_PC16_S1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_CALL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_GOT_DISP);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_GOT_PAGE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_GOT_OFST);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_DTPREL_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_DTPREL_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_TPREL_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_TPREL_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NUM);
+ default:
+ break;
+ }
+ break;
+ case ELF::EM_AARCH64:
+ switch (Type) {
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_NONE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G3);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD_PREL_LO19);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_LO21);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_PG_HI21);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADD_ABS_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST8_ABS_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TSTBR14);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CONDBR19);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_JUMP26);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CALL26);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST16_ABS_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST32_ABS_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST64_ABS_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST128_ABS_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_GOT_PAGE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD64_GOT_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_HI12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD_GOTTPREL_PREL19);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_HI12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADR_PAGE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_LD64_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADD_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_CALL);
+ default:
+ break;
+ }
+ break;
+ case ELF::EM_ARM:
+ switch (Type) {
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_NONE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PC24);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_REL32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_PC_G0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_ABS5);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_SBREL32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_PC8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_BREL_ADJ);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_DESC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_SWI8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_XPC25);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_XPC22);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_DTPMOD32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_DTPOFF32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_TPOFF32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_COPY);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GLOB_DAT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_JUMP_SLOT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_RELATIVE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOTOFF32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_BASE_PREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOT_BREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PLT32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_JUMP24);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP24);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_BASE_ABS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PCREL_7_0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PCREL_15_8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PCREL_23_15);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_SBREL_11_0_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SBREL_19_12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SBREL_27_20_CK);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TARGET1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_SBREL31);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_V4BX);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TARGET2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PREL31);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVW_ABS_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVT_ABS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVW_PREL_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVT_PREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVW_ABS_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVT_ABS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVW_PREL_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVT_PREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP19);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP6);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_ALU_PREL_11_0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_PC12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS32_NOI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_REL32_NOI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G0_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G1_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_PC_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_PC_G2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_PC_G0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_PC_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_PC_G2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_PC_G0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_PC_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_PC_G2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G0_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G1_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_SB_G0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_SB_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_SB_G2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_SB_G0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_SB_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_SB_G2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_SB_G0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_SB_G1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_SB_G2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVW_BREL_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVT_BREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVW_BREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVW_BREL_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVT_BREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVW_BREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_GOTDESC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_DESCSEQ);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PLT32_ABS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOT_ABS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOT_PREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOT_BREL12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOTOFF12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOTRELAX);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GNU_VTENTRY);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GNU_VTINHERIT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP11);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_GD32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LDM32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LDO32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_IE32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LE32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LDO12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LE12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_IE12GP);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_3);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_4);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_5);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_6);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_7);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_9);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_10);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_11);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_13);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_14);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_15);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ME_TOO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_DESCSEQ16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_DESCSEQ32);
+ default:
+ break;
+ }
+ break;
+ case ELF::EM_HEXAGON:
+ switch (Type) {
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_NONE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B22_PCREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B15_PCREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B7_PCREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GPREL16_0);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GPREL16_1);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GPREL16_2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GPREL16_3);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_HL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B13_PCREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B9_PCREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B32_PCREL_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_32_6_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B22_PCREL_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B15_PCREL_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B13_PCREL_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B9_PCREL_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B7_PCREL_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_16_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_12_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_11_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_10_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_9_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_8_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_7_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_6_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_32_PCREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_COPY);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GLOB_DAT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_JMP_SLOT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_RELATIVE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_PLT_B22_PCREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPMOD_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_PLT_B22_PCREL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_LO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_6_PCREL_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_32_6_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_16_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_11_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_32_6_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_16_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_11_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_32_6_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_16_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_11_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_32_6_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_16_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_11_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_32_6_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_16_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_32_6_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_16_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_11_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_32_6_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_16_X);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_11_X);
+ default:
+ break;
+ }
+ break;
+ case ELF::EM_PPC:
+ switch (Type) {
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_NONE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR24);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR14);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR14_BRTAKEN);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR14_BRNTAKEN);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL24);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL14);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL14_BRTAKEN);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL14_BRNTAKEN);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TLS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPMOD32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPREL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPREL16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPREL16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPREL16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPREL32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSGD16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSGD16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSGD16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSGD16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSLD16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSLD16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSLD16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSLD16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TPREL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TPREL16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TPREL16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TPREL16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_DTPREL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_DTPREL16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_DTPREL16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_DTPREL16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TLSGD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TLSLD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL16_HA);
+ default:
+ break;
+ }
+ break;
+ case ELF::EM_PPC64:
+ switch (Type) {
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_NONE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR24);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR14);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR14_BRTAKEN);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR14_BRNTAKEN);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL24);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL14);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL14_BRTAKEN);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL14_BRNTAKEN);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HIGHER);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HIGHERA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HIGHEST);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HIGHESTA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_LO_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT16_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT16_LO_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_LO_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TLS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPMOD64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSGD16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSGD16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSGD16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSGD16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSLD16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSLD16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSLD16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSLD16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TPREL16_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TPREL16_LO_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TPREL16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TPREL16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_DTPREL16_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_DTPREL16_LO_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_DTPREL16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_DTPREL16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_LO_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HIGHER);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HIGHERA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HIGHEST);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HIGHESTA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_LO_DS);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HIGHER);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HIGHERA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HIGHEST);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HIGHESTA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TLSGD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TLSLD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL16_LO);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL16_HI);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL16_HA);
+ default:
+ break;
+ }
+ break;
+ case ELF::EM_S390:
+ switch (Type) {
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_NONE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_8);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLT32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_COPY);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GLOB_DAT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_JMP_SLOT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_RELATIVE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTOFF);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC16DBL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLT16DBL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC32DBL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLT32DBL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPCDBL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLT64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTENT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTOFF16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTOFF64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLTENT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLTOFF16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLTOFF32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLTOFF64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LOAD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GDCALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDCALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GD32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GD64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GOTIE12);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GOTIE32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GOTIE64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDM32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDM64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_IE32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_IE64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_IEENT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LE32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LE64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDO32);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDO64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_DTPMOD);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_DTPOFF);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_TPOFF);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_20);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT20);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT20);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GOTIE20);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_IRELATIVE);
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ return "Unknown";
+}
+
+#undef LLVM_ELF_SWITCH_RELOC_TYPE_NAME
+
+} // end namespace object
+} // end namespace llvm
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index a6c128e..20b7307 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -11,13 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Object/ELF.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/MathExtras.h"
#include <ctype.h>
namespace llvm {
-
using namespace object;
// Creates an in-memory object-file by default: createELFObjectFile(Buffer)
diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp
index e530d3d..2f35cf9 100644
--- a/lib/Object/ELFYAML.cpp
+++ b/lib/Object/ELFYAML.cpp
@@ -266,6 +266,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_SHF>::bitset(IO &IO,
#define BCase(X) IO.bitSetCase(Value, #X, ELF::X);
BCase(SHF_WRITE)
BCase(SHF_ALLOC)
+ BCase(SHF_EXCLUDE)
BCase(SHF_EXECINSTR)
BCase(SHF_MERGE)
BCase(SHF_STRINGS)
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 5d0399e..d2cb8bd 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -14,11 +14,11 @@
#include "llvm/Object/MachO.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
#include <cctype>
#include <cstring>
#include <limits>
@@ -29,16 +29,16 @@ using namespace object;
namespace llvm {
namespace object {
-struct SymbolTableEntryBase {
- uint32_t StringIndex;
- uint8_t Type;
- uint8_t SectionIndex;
- uint16_t Flags;
+struct nlist_base {
+ uint32_t n_strx;
+ uint8_t n_type;
+ uint8_t n_sect;
+ uint16_t n_desc;
};
-struct SectionBase {
- char Name[16];
- char SegmentName[16];
+struct section_base {
+ char sectname[16];
+ char segname[16];
};
template<typename T>
@@ -50,167 +50,174 @@ template<typename T>
static void SwapStruct(T &Value);
template<>
-void SwapStruct(macho::RelocationEntry &H) {
- SwapValue(H.Word0);
- SwapValue(H.Word1);
+void SwapStruct(MachO::any_relocation_info &H) {
+ SwapValue(H.r_word0);
+ SwapValue(H.r_word1);
}
template<>
-void SwapStruct(macho::LoadCommand &L) {
- SwapValue(L.Type);
- SwapValue(L.Size);
+void SwapStruct(MachO::load_command &L) {
+ SwapValue(L.cmd);
+ SwapValue(L.cmdsize);
}
template<>
-void SwapStruct(SymbolTableEntryBase &S) {
- SwapValue(S.StringIndex);
- SwapValue(S.Flags);
+void SwapStruct(nlist_base &S) {
+ SwapValue(S.n_strx);
+ SwapValue(S.n_desc);
}
template<>
-void SwapStruct(macho::Section &S) {
- SwapValue(S.Address);
- SwapValue(S.Size);
- SwapValue(S.Offset);
- SwapValue(S.Align);
- SwapValue(S.RelocationTableOffset);
- SwapValue(S.NumRelocationTableEntries);
- SwapValue(S.Flags);
- SwapValue(S.Reserved1);
- SwapValue(S.Reserved2);
+void SwapStruct(MachO::section &S) {
+ SwapValue(S.addr);
+ SwapValue(S.size);
+ SwapValue(S.offset);
+ SwapValue(S.align);
+ SwapValue(S.reloff);
+ SwapValue(S.nreloc);
+ SwapValue(S.flags);
+ SwapValue(S.reserved1);
+ SwapValue(S.reserved2);
}
template<>
-void SwapStruct(macho::Section64 &S) {
- SwapValue(S.Address);
- SwapValue(S.Size);
- SwapValue(S.Offset);
- SwapValue(S.Align);
- SwapValue(S.RelocationTableOffset);
- SwapValue(S.NumRelocationTableEntries);
- SwapValue(S.Flags);
- SwapValue(S.Reserved1);
- SwapValue(S.Reserved2);
- SwapValue(S.Reserved3);
+void SwapStruct(MachO::section_64 &S) {
+ SwapValue(S.addr);
+ SwapValue(S.size);
+ SwapValue(S.offset);
+ SwapValue(S.align);
+ SwapValue(S.reloff);
+ SwapValue(S.nreloc);
+ SwapValue(S.flags);
+ SwapValue(S.reserved1);
+ SwapValue(S.reserved2);
+ SwapValue(S.reserved3);
}
template<>
-void SwapStruct(macho::SymbolTableEntry &S) {
- SwapValue(S.StringIndex);
- SwapValue(S.Flags);
- SwapValue(S.Value);
+void SwapStruct(MachO::nlist &S) {
+ SwapValue(S.n_strx);
+ SwapValue(S.n_desc);
+ SwapValue(S.n_value);
}
template<>
-void SwapStruct(macho::Symbol64TableEntry &S) {
- SwapValue(S.StringIndex);
- SwapValue(S.Flags);
- SwapValue(S.Value);
+void SwapStruct(MachO::nlist_64 &S) {
+ SwapValue(S.n_strx);
+ SwapValue(S.n_desc);
+ SwapValue(S.n_value);
}
template<>
-void SwapStruct(macho::Header &H) {
- SwapValue(H.Magic);
- SwapValue(H.CPUType);
- SwapValue(H.CPUSubtype);
- SwapValue(H.FileType);
- SwapValue(H.NumLoadCommands);
- SwapValue(H.SizeOfLoadCommands);
- SwapValue(H.Flags);
+void SwapStruct(MachO::mach_header &H) {
+ SwapValue(H.magic);
+ SwapValue(H.cputype);
+ SwapValue(H.cpusubtype);
+ SwapValue(H.filetype);
+ SwapValue(H.ncmds);
+ SwapValue(H.sizeofcmds);
+ SwapValue(H.flags);
}
template<>
-void SwapStruct(macho::Header64Ext &E) {
- SwapValue(E.Reserved);
+void SwapStruct(MachO::mach_header_64 &H) {
+ SwapValue(H.magic);
+ SwapValue(H.cputype);
+ SwapValue(H.cpusubtype);
+ SwapValue(H.filetype);
+ SwapValue(H.ncmds);
+ SwapValue(H.sizeofcmds);
+ SwapValue(H.flags);
+ SwapValue(H.reserved);
}
template<>
-void SwapStruct(macho::SymtabLoadCommand &C) {
- SwapValue(C.Type);
- SwapValue(C.Size);
- SwapValue(C.SymbolTableOffset);
- SwapValue(C.NumSymbolTableEntries);
- SwapValue(C.StringTableOffset);
- SwapValue(C.StringTableSize);
+void SwapStruct(MachO::symtab_command &C) {
+ SwapValue(C.cmd);
+ SwapValue(C.cmdsize);
+ SwapValue(C.symoff);
+ SwapValue(C.nsyms);
+ SwapValue(C.stroff);
+ SwapValue(C.strsize);
}
template<>
-void SwapStruct(macho::DysymtabLoadCommand &C) {
- SwapValue(C.Type);
- SwapValue(C.Size);
- SwapValue(C.LocalSymbolsIndex);
- SwapValue(C.NumLocalSymbols);
- SwapValue(C.ExternalSymbolsIndex);
- SwapValue(C.NumExternalSymbols);
- SwapValue(C.UndefinedSymbolsIndex);
- SwapValue(C.NumUndefinedSymbols);
- SwapValue(C.TOCOffset);
- SwapValue(C.NumTOCEntries);
- SwapValue(C.ModuleTableOffset);
- SwapValue(C.NumModuleTableEntries);
- SwapValue(C.ReferenceSymbolTableOffset);
- SwapValue(C.NumReferencedSymbolTableEntries);
- SwapValue(C.IndirectSymbolTableOffset);
- SwapValue(C.NumIndirectSymbolTableEntries);
- SwapValue(C.ExternalRelocationTableOffset);
- SwapValue(C.NumExternalRelocationTableEntries);
- SwapValue(C.LocalRelocationTableOffset);
- SwapValue(C.NumLocalRelocationTableEntries);
+void SwapStruct(MachO::dysymtab_command &C) {
+ SwapValue(C.cmd);
+ SwapValue(C.cmdsize);
+ SwapValue(C.ilocalsym);
+ SwapValue(C.nlocalsym);
+ SwapValue(C.iextdefsym);
+ SwapValue(C.nextdefsym);
+ SwapValue(C.iundefsym);
+ SwapValue(C.nundefsym);
+ SwapValue(C.tocoff);
+ SwapValue(C.ntoc);
+ SwapValue(C.modtaboff);
+ SwapValue(C.nmodtab);
+ SwapValue(C.extrefsymoff);
+ SwapValue(C.nextrefsyms);
+ SwapValue(C.indirectsymoff);
+ SwapValue(C.nindirectsyms);
+ SwapValue(C.extreloff);
+ SwapValue(C.nextrel);
+ SwapValue(C.locreloff);
+ SwapValue(C.nlocrel);
}
template<>
-void SwapStruct(macho::LinkeditDataLoadCommand &C) {
- SwapValue(C.Type);
- SwapValue(C.Size);
- SwapValue(C.DataOffset);
- SwapValue(C.DataSize);
+void SwapStruct(MachO::linkedit_data_command &C) {
+ SwapValue(C.cmd);
+ SwapValue(C.cmdsize);
+ SwapValue(C.dataoff);
+ SwapValue(C.datasize);
}
template<>
-void SwapStruct(macho::SegmentLoadCommand &C) {
- SwapValue(C.Type);
- SwapValue(C.Size);
- SwapValue(C.VMAddress);
- SwapValue(C.VMSize);
- SwapValue(C.FileOffset);
- SwapValue(C.FileSize);
- SwapValue(C.MaxVMProtection);
- SwapValue(C.InitialVMProtection);
- SwapValue(C.NumSections);
- SwapValue(C.Flags);
+void SwapStruct(MachO::segment_command &C) {
+ SwapValue(C.cmd);
+ SwapValue(C.cmdsize);
+ SwapValue(C.vmaddr);
+ SwapValue(C.vmsize);
+ SwapValue(C.fileoff);
+ SwapValue(C.filesize);
+ SwapValue(C.maxprot);
+ SwapValue(C.initprot);
+ SwapValue(C.nsects);
+ SwapValue(C.flags);
}
template<>
-void SwapStruct(macho::Segment64LoadCommand &C) {
- SwapValue(C.Type);
- SwapValue(C.Size);
- SwapValue(C.VMAddress);
- SwapValue(C.VMSize);
- SwapValue(C.FileOffset);
- SwapValue(C.FileSize);
- SwapValue(C.MaxVMProtection);
- SwapValue(C.InitialVMProtection);
- SwapValue(C.NumSections);
- SwapValue(C.Flags);
+void SwapStruct(MachO::segment_command_64 &C) {
+ SwapValue(C.cmd);
+ SwapValue(C.cmdsize);
+ SwapValue(C.vmaddr);
+ SwapValue(C.vmsize);
+ SwapValue(C.fileoff);
+ SwapValue(C.filesize);
+ SwapValue(C.maxprot);
+ SwapValue(C.initprot);
+ SwapValue(C.nsects);
+ SwapValue(C.flags);
}
template<>
-void SwapStruct(macho::IndirectSymbolTableEntry &C) {
- SwapValue(C.Index);
+void SwapStruct(uint32_t &C) {
+ SwapValue(C);
}
template<>
-void SwapStruct(macho::LinkerOptionsLoadCommand &C) {
- SwapValue(C.Type);
- SwapValue(C.Size);
- SwapValue(C.Count);
+void SwapStruct(MachO::linker_options_command &C) {
+ SwapValue(C.cmd);
+ SwapValue(C.cmdsize);
+ SwapValue(C.count);
}
template<>
-void SwapStruct(macho::DataInCodeTableEntry &C) {
- SwapValue(C.Offset);
- SwapValue(C.Length);
- SwapValue(C.Kind);
+void SwapStruct(MachO::data_in_code_entry &C) {
+ SwapValue(C.offset);
+ SwapValue(C.length);
+ SwapValue(C.kind);
}
template<typename T>
@@ -226,11 +233,11 @@ static uint32_t
getSegmentLoadCommandNumSections(const MachOObjectFile *O,
const MachOObjectFile::LoadCommandInfo &L) {
if (O->is64Bit()) {
- macho::Segment64LoadCommand S = O->getSegment64LoadCommand(L);
- return S.NumSections;
+ MachO::segment_command_64 S = O->getSegment64LoadCommand(L);
+ return S.nsects;
}
- macho::SegmentLoadCommand S = O->getSegmentLoadCommand(L);
- return S.NumSections;
+ MachO::segment_command S = O->getSegmentLoadCommand(L);
+ return S.nsects;
}
static const char *
@@ -239,10 +246,10 @@ getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L,
uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(L.Ptr);
bool Is64 = O->is64Bit();
- unsigned SegmentLoadSize = Is64 ? sizeof(macho::Segment64LoadCommand) :
- sizeof(macho::SegmentLoadCommand);
- unsigned SectionSize = Is64 ? sizeof(macho::Section64) :
- sizeof(macho::Section);
+ unsigned SegmentLoadSize = Is64 ? sizeof(MachO::segment_command_64) :
+ sizeof(MachO::segment_command);
+ unsigned SectionSize = Is64 ? sizeof(MachO::section_64) :
+ sizeof(MachO::section);
uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + Sec * SectionSize;
return reinterpret_cast<const char*>(SectionAddr);
@@ -252,10 +259,10 @@ static const char *getPtr(const MachOObjectFile *O, size_t Offset) {
return O->getData().substr(Offset, 1).data();
}
-static SymbolTableEntryBase
+static nlist_base
getSymbolTableEntryBase(const MachOObjectFile *O, DataRefImpl DRI) {
const char *P = reinterpret_cast<const char *>(DRI.p);
- return getStruct<SymbolTableEntryBase>(O, P);
+ return getStruct<nlist_base>(O, P);
}
static StringRef parseSegmentOrSectionName(const char *P) {
@@ -283,11 +290,11 @@ static void advanceTo(T &it, size_t Val) {
}
static unsigned getCPUType(const MachOObjectFile *O) {
- return O->getHeader().CPUType;
+ return O->getHeader().cputype;
}
static void printRelocationTargetName(const MachOObjectFile *O,
- const macho::RelocationEntry &RE,
+ const MachO::any_relocation_info &RE,
raw_string_ostream &fmt) {
bool IsScattered = O->isRelocationScattered(RE);
@@ -355,59 +362,61 @@ static void printRelocationTargetName(const MachOObjectFile *O,
fmt << S;
}
-static uint32_t getPlainRelocationAddress(const macho::RelocationEntry &RE) {
- return RE.Word0;
+static uint32_t
+getPlainRelocationAddress(const MachO::any_relocation_info &RE) {
+ return RE.r_word0;
}
static unsigned
-getScatteredRelocationAddress(const macho::RelocationEntry &RE) {
- return RE.Word0 & 0xffffff;
+getScatteredRelocationAddress(const MachO::any_relocation_info &RE) {
+ return RE.r_word0 & 0xffffff;
}
static bool getPlainRelocationPCRel(const MachOObjectFile *O,
- const macho::RelocationEntry &RE) {
+ const MachO::any_relocation_info &RE) {
if (O->isLittleEndian())
- return (RE.Word1 >> 24) & 1;
- return (RE.Word1 >> 7) & 1;
+ return (RE.r_word1 >> 24) & 1;
+ return (RE.r_word1 >> 7) & 1;
}
static bool
getScatteredRelocationPCRel(const MachOObjectFile *O,
- const macho::RelocationEntry &RE) {
- return (RE.Word0 >> 30) & 1;
+ const MachO::any_relocation_info &RE) {
+ return (RE.r_word0 >> 30) & 1;
}
static unsigned getPlainRelocationLength(const MachOObjectFile *O,
- const macho::RelocationEntry &RE) {
+ const MachO::any_relocation_info &RE) {
if (O->isLittleEndian())
- return (RE.Word1 >> 25) & 3;
- return (RE.Word1 >> 5) & 3;
+ return (RE.r_word1 >> 25) & 3;
+ return (RE.r_word1 >> 5) & 3;
}
static unsigned
-getScatteredRelocationLength(const macho::RelocationEntry &RE) {
- return (RE.Word0 >> 28) & 3;
+getScatteredRelocationLength(const MachO::any_relocation_info &RE) {
+ return (RE.r_word0 >> 28) & 3;
}
static unsigned getPlainRelocationType(const MachOObjectFile *O,
- const macho::RelocationEntry &RE) {
+ const MachO::any_relocation_info &RE) {
if (O->isLittleEndian())
- return RE.Word1 >> 28;
- return RE.Word1 & 0xf;
+ return RE.r_word1 >> 28;
+ return RE.r_word1 & 0xf;
}
-static unsigned getScatteredRelocationType(const macho::RelocationEntry &RE) {
- return (RE.Word0 >> 24) & 0xf;
+static unsigned
+getScatteredRelocationType(const MachO::any_relocation_info &RE) {
+ return (RE.r_word0 >> 24) & 0xf;
}
static uint32_t getSectionFlags(const MachOObjectFile *O,
DataRefImpl Sec) {
if (O->is64Bit()) {
- macho::Section64 Sect = O->getSection64(Sec);
- return Sect.Flags;
+ MachO::section_64 Sect = O->getSection64(Sec);
+ return Sect.flags;
}
- macho::Section Sect = O->getSection(Sec);
- return Sect.Flags;
+ MachO::section Sect = O->getSection(Sec);
+ return Sect.flags;
}
MachOObjectFile::MachOObjectFile(MemoryBuffer *Object,
@@ -415,22 +424,22 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object,
error_code &ec)
: ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL), DataInCodeLoadCmd(NULL) {
- uint32_t LoadCommandCount = this->getHeader().NumLoadCommands;
- macho::LoadCommandType SegmentLoadType = is64Bit() ?
- macho::LCT_Segment64 : macho::LCT_Segment;
+ uint32_t LoadCommandCount = this->getHeader().ncmds;
+ MachO::LoadCommandType SegmentLoadType = is64Bit() ?
+ MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT;
MachOObjectFile::LoadCommandInfo Load = getFirstLoadCommandInfo();
for (unsigned I = 0; ; ++I) {
- if (Load.C.Type == macho::LCT_Symtab) {
+ if (Load.C.cmd == MachO::LC_SYMTAB) {
assert(!SymtabLoadCmd && "Multiple symbol tables");
SymtabLoadCmd = Load.Ptr;
- } else if (Load.C.Type == macho::LCT_Dysymtab) {
+ } else if (Load.C.cmd == MachO::LC_DYSYMTAB) {
assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables");
DysymtabLoadCmd = Load.Ptr;
- } else if (Load.C.Type == macho::LCT_DataInCode) {
+ } else if (Load.C.cmd == MachO::LC_DATA_IN_CODE) {
assert(!DataInCodeLoadCmd && "Multiple data in code tables");
DataInCodeLoadCmd = Load.Ptr;
- } else if (Load.C.Type == SegmentLoadType) {
+ } else if (Load.C.cmd == SegmentLoadType) {
uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
for (unsigned J = 0; J < NumSections; ++J) {
const char *Sec = getSectionPtr(this, Load, J);
@@ -448,8 +457,8 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object,
error_code MachOObjectFile::getSymbolNext(DataRefImpl Symb,
SymbolRef &Res) const {
unsigned SymbolTableEntrySize = is64Bit() ?
- sizeof(macho::Symbol64TableEntry) :
- sizeof(macho::SymbolTableEntry);
+ sizeof(MachO::nlist_64) :
+ sizeof(MachO::nlist);
Symb.p += SymbolTableEntrySize;
Res = SymbolRef(Symb, this);
return object_error::success;
@@ -458,8 +467,8 @@ error_code MachOObjectFile::getSymbolNext(DataRefImpl Symb,
error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
StringRef &Res) const {
StringRef StringTable = getStringTableData();
- SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
- const char *Start = &StringTable.data()[Entry.StringIndex];
+ nlist_base Entry = getSymbolTableEntryBase(this, Symb);
+ const char *Start = &StringTable.data()[Entry.n_strx];
Res = StringRef(Start);
return object_error::success;
}
@@ -467,11 +476,11 @@ error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
uint64_t &Res) const {
if (is64Bit()) {
- macho::Symbol64TableEntry Entry = getSymbol64TableEntry(Symb);
- Res = Entry.Value;
+ MachO::nlist_64 Entry = getSymbol64TableEntry(Symb);
+ Res = Entry.n_value;
} else {
- macho::SymbolTableEntry Entry = getSymbolTableEntry(Symb);
- Res = Entry.Value;
+ MachO::nlist Entry = getSymbolTableEntry(Symb);
+ Res = Entry.n_value;
}
return object_error::success;
}
@@ -479,18 +488,18 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
error_code
MachOObjectFile::getSymbolFileOffset(DataRefImpl Symb,
uint64_t &Res) const {
- SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+ nlist_base Entry = getSymbolTableEntryBase(this, Symb);
getSymbolAddress(Symb, Res);
- if (Entry.SectionIndex) {
+ if (Entry.n_sect) {
uint64_t Delta;
DataRefImpl SecRel;
- SecRel.d.a = Entry.SectionIndex-1;
+ SecRel.d.a = Entry.n_sect-1;
if (is64Bit()) {
- macho::Section64 Sec = getSection64(SecRel);
- Delta = Sec.Offset - Sec.Address;
+ MachO::section_64 Sec = getSection64(SecRel);
+ Delta = Sec.offset - Sec.addr;
} else {
- macho::Section Sec = getSection(SecRel);
- Delta = Sec.Offset - Sec.Address;
+ MachO::section Sec = getSection(SecRel);
+ Delta = Sec.offset - Sec.addr;
}
Res += Delta;
@@ -504,8 +513,8 @@ error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI,
uint32_t flags;
this->getSymbolFlags(DRI, flags);
if (flags & SymbolRef::SF_Common) {
- SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
- Result = 1 << MachO::GET_COMM_ALIGN(Entry.Flags);
+ nlist_base Entry = getSymbolTableEntryBase(this, DRI);
+ Result = 1 << MachO::GET_COMM_ALIGN(Entry.n_desc);
} else {
Result = 0;
}
@@ -518,13 +527,13 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
uint64_t EndOffset = 0;
uint8_t SectionIndex;
- SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+ nlist_base Entry = getSymbolTableEntryBase(this, DRI);
uint64_t Value;
getSymbolAddress(DRI, Value);
BeginOffset = Value;
- SectionIndex = Entry.SectionIndex;
+ SectionIndex = Entry.n_sect;
if (!SectionIndex) {
uint32_t flags = SymbolRef::SF_None;
this->getSymbolFlags(DRI, flags);
@@ -542,7 +551,7 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
DataRefImpl DRI = I->getRawDataRefImpl();
Entry = getSymbolTableEntryBase(this, DRI);
getSymbolAddress(DRI, Value);
- if (Entry.SectionIndex == SectionIndex && Value > BeginOffset)
+ if (Entry.n_sect == SectionIndex && Value > BeginOffset)
if (!EndOffset || Value < EndOffset)
EndOffset = Value;
}
@@ -560,73 +569,47 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
SymbolRef::Type &Res) const {
- SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
- uint8_t n_type = Entry.Type;
+ nlist_base Entry = getSymbolTableEntryBase(this, Symb);
+ uint8_t n_type = Entry.n_type;
Res = SymbolRef::ST_Other;
// If this is a STAB debugging symbol, we can do nothing more.
- if (n_type & MachO::NlistMaskStab) {
+ if (n_type & MachO::N_STAB) {
Res = SymbolRef::ST_Debug;
return object_error::success;
}
- switch (n_type & MachO::NlistMaskType) {
- case MachO::NListTypeUndefined :
+ switch (n_type & MachO::N_TYPE) {
+ case MachO::N_UNDF :
Res = SymbolRef::ST_Unknown;
break;
- case MachO::NListTypeSection :
+ case MachO::N_SECT :
Res = SymbolRef::ST_Function;
break;
}
return object_error::success;
}
-error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
- char &Res) const {
- SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
- uint8_t Type = Entry.Type;
- uint16_t Flags = Entry.Flags;
-
- char Char;
- switch (Type & macho::STF_TypeMask) {
- case macho::STT_Undefined:
- Char = 'u';
- break;
- case macho::STT_Absolute:
- case macho::STT_Section:
- Char = 's';
- break;
- default:
- Char = '?';
- break;
- }
-
- if (Flags & (macho::STF_External | macho::STF_PrivateExtern))
- Char = toupper(static_cast<unsigned char>(Char));
- Res = Char;
- return object_error::success;
-}
-
error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
uint32_t &Result) const {
- SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+ nlist_base Entry = getSymbolTableEntryBase(this, DRI);
- uint8_t MachOType = Entry.Type;
- uint16_t MachOFlags = Entry.Flags;
+ uint8_t MachOType = Entry.n_type;
+ uint16_t MachOFlags = Entry.n_desc;
// TODO: Correctly set SF_ThreadLocal
Result = SymbolRef::SF_None;
- if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined)
+ if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF)
Result |= SymbolRef::SF_Undefined;
- if (MachOFlags & macho::STF_StabsEntryMask)
+ if (MachOType & MachO::N_STAB)
Result |= SymbolRef::SF_FormatSpecific;
- if (MachOType & MachO::NlistMaskExternal) {
+ if (MachOType & MachO::N_EXT) {
Result |= SymbolRef::SF_Global;
- if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) {
+ if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) {
uint64_t Value;
getSymbolAddress(DRI, Value);
if (Value)
@@ -634,10 +617,10 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
}
}
- if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef))
+ if (MachOFlags & (MachO::N_WEAK_REF | MachO::N_WEAK_DEF))
Result |= SymbolRef::SF_Weak;
- if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeAbsolute)
+ if ((MachOType & MachO::N_TYPE) == MachO::N_ABS)
Result |= SymbolRef::SF_Absolute;
return object_error::success;
@@ -646,8 +629,8 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
error_code
MachOObjectFile::getSymbolSection(DataRefImpl Symb,
section_iterator &Res) const {
- SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
- uint8_t index = Entry.SectionIndex;
+ nlist_base Entry = getSymbolTableEntryBase(this, Symb);
+ uint8_t index = Entry.n_sect;
if (index == 0) {
Res = end_sections();
@@ -682,11 +665,11 @@ MachOObjectFile::getSectionName(DataRefImpl Sec, StringRef &Result) const {
error_code
MachOObjectFile::getSectionAddress(DataRefImpl Sec, uint64_t &Res) const {
if (is64Bit()) {
- macho::Section64 Sect = getSection64(Sec);
- Res = Sect.Address;
+ MachO::section_64 Sect = getSection64(Sec);
+ Res = Sect.addr;
} else {
- macho::Section Sect = getSection(Sec);
- Res = Sect.Address;
+ MachO::section Sect = getSection(Sec);
+ Res = Sect.addr;
}
return object_error::success;
}
@@ -694,11 +677,11 @@ MachOObjectFile::getSectionAddress(DataRefImpl Sec, uint64_t &Res) const {
error_code
MachOObjectFile::getSectionSize(DataRefImpl Sec, uint64_t &Res) const {
if (is64Bit()) {
- macho::Section64 Sect = getSection64(Sec);
- Res = Sect.Size;
+ MachO::section_64 Sect = getSection64(Sec);
+ Res = Sect.size;
} else {
- macho::Section Sect = getSection(Sec);
- Res = Sect.Size;
+ MachO::section Sect = getSection(Sec);
+ Res = Sect.size;
}
return object_error::success;
@@ -710,13 +693,13 @@ MachOObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Res) const {
uint64_t Size;
if (is64Bit()) {
- macho::Section64 Sect = getSection64(Sec);
- Offset = Sect.Offset;
- Size = Sect.Size;
+ MachO::section_64 Sect = getSection64(Sec);
+ Offset = Sect.offset;
+ Size = Sect.size;
} else {
- macho::Section Sect =getSection(Sec);
- Offset = Sect.Offset;
- Size = Sect.Size;
+ MachO::section Sect = getSection(Sec);
+ Offset = Sect.offset;
+ Size = Sect.size;
}
Res = this->getData().substr(Offset, Size);
@@ -727,11 +710,11 @@ error_code
MachOObjectFile::getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const {
uint32_t Align;
if (is64Bit()) {
- macho::Section64 Sect = getSection64(Sec);
- Align = Sect.Align;
+ MachO::section_64 Sect = getSection64(Sec);
+ Align = Sect.align;
} else {
- macho::Section Sect = getSection(Sec);
- Align = Sect.Align;
+ MachO::section Sect = getSection(Sec);
+ Align = Sect.align;
}
Res = uint64_t(1) << Align;
@@ -741,7 +724,7 @@ MachOObjectFile::getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const {
error_code
MachOObjectFile::isSectionText(DataRefImpl Sec, bool &Res) const {
uint32_t Flags = getSectionFlags(this, Sec);
- Res = Flags & macho::SF_PureInstructions;
+ Res = Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
return object_error::success;
}
@@ -775,9 +758,9 @@ error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec,
error_code
MachOObjectFile::isSectionZeroInit(DataRefImpl Sec, bool &Res) const {
uint32_t Flags = getSectionFlags(this, Sec);
- unsigned SectionType = Flags & MachO::SectionFlagMaskSectionType;
- Res = SectionType == MachO::SectionTypeZeroFill ||
- SectionType == MachO::SectionTypeZeroFillLarge;
+ unsigned SectionType = Flags & MachO::SECTION_TYPE;
+ Res = SectionType == MachO::S_ZEROFILL ||
+ SectionType == MachO::S_GB_ZEROFILL;
return object_error::success;
}
@@ -814,14 +797,14 @@ MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
return object_error::success;
}
-relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
+relocation_iterator MachOObjectFile::section_rel_begin(DataRefImpl Sec) const {
uint32_t Offset;
if (is64Bit()) {
- macho::Section64 Sect = getSection64(Sec);
- Offset = Sect.RelocationTableOffset;
+ MachO::section_64 Sect = getSection64(Sec);
+ Offset = Sect.reloff;
} else {
- macho::Section Sect = getSection(Sec);
- Offset = Sect.RelocationTableOffset;
+ MachO::section Sect = getSection(Sec);
+ Offset = Sect.reloff;
}
DataRefImpl Ret;
@@ -830,21 +813,21 @@ relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
}
relocation_iterator
-MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
+MachOObjectFile::section_rel_end(DataRefImpl Sec) const {
uint32_t Offset;
uint32_t Num;
if (is64Bit()) {
- macho::Section64 Sect = getSection64(Sec);
- Offset = Sect.RelocationTableOffset;
- Num = Sect.NumRelocationTableEntries;
+ MachO::section_64 Sect = getSection64(Sec);
+ Offset = Sect.reloff;
+ Num = Sect.nreloc;
} else {
- macho::Section Sect = getSection(Sec);
- Offset = Sect.RelocationTableOffset;
- Num = Sect.NumRelocationTableEntries;
+ MachO::section Sect = getSection(Sec);
+ Offset = Sect.reloff;
+ Num = Sect.nreloc;
}
- const macho::RelocationEntry *P =
- reinterpret_cast<const macho::RelocationEntry*>(getPtr(this, Offset));
+ const MachO::any_relocation_info *P =
+ reinterpret_cast<const MachO::any_relocation_info *>(getPtr(this, Offset));
DataRefImpl Ret;
Ret.p = reinterpret_cast<uintptr_t>(P + Num);
@@ -853,8 +836,8 @@ MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel,
RelocationRef &Res) const {
- const macho::RelocationEntry *P =
- reinterpret_cast<const macho::RelocationEntry *>(Rel.p);
+ const MachO::any_relocation_info *P =
+ reinterpret_cast<const MachO::any_relocation_info *>(Rel.p);
Rel.p = reinterpret_cast<uintptr_t>(P + 1);
Res = RelocationRef(Rel, this);
return object_error::success;
@@ -867,24 +850,24 @@ MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const {
error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
uint64_t &Res) const {
- macho::RelocationEntry RE = getRelocation(Rel);
+ MachO::any_relocation_info RE = getRelocation(Rel);
Res = getAnyRelocationAddress(RE);
return object_error::success;
}
symbol_iterator
MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
- macho::RelocationEntry RE = getRelocation(Rel);
+ MachO::any_relocation_info RE = getRelocation(Rel);
uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE);
bool isExtern = getPlainRelocationExternal(RE);
if (!isExtern)
return end_symbols();
- macho::SymtabLoadCommand S = getSymtabLoadCommand();
+ MachO::symtab_command S = getSymtabLoadCommand();
unsigned SymbolTableEntrySize = is64Bit() ?
- sizeof(macho::Symbol64TableEntry) :
- sizeof(macho::SymbolTableEntry);
- uint64_t Offset = S.SymbolTableOffset + SymbolIdx * SymbolTableEntrySize;
+ sizeof(MachO::nlist_64) :
+ sizeof(MachO::nlist);
+ uint64_t Offset = S.symoff + SymbolIdx * SymbolTableEntrySize;
DataRefImpl Sym;
Sym.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
return symbol_iterator(SymbolRef(Sym, this));
@@ -892,7 +875,7 @@ MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
uint64_t &Res) const {
- macho::RelocationEntry RE = getRelocation(Rel);
+ MachO::any_relocation_info RE = getRelocation(Rel);
Res = getAnyRelocationType(RE);
return object_error::success;
}
@@ -993,7 +976,7 @@ MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
error_code
MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const {
- macho::RelocationEntry RE = getRelocation(Rel);
+ MachO::any_relocation_info RE = getRelocation(Rel);
unsigned Arch = this->getArch();
@@ -1010,47 +993,47 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
bool isPCRel = getAnyRelocationPCRel(RE);
switch (Type) {
- case macho::RIT_X86_64_GOTLoad: // X86_64_RELOC_GOT_LOAD
- case macho::RIT_X86_64_GOT: { // X86_64_RELOC_GOT
+ case MachO::X86_64_RELOC_GOT_LOAD:
+ case MachO::X86_64_RELOC_GOT: {
printRelocationTargetName(this, RE, fmt);
fmt << "@GOT";
if (isPCRel) fmt << "PCREL";
break;
}
- case macho::RIT_X86_64_Subtractor: { // X86_64_RELOC_SUBTRACTOR
+ case MachO::X86_64_RELOC_SUBTRACTOR: {
DataRefImpl RelNext = Rel;
RelNext.d.a++;
- macho::RelocationEntry RENext = getRelocation(RelNext);
+ MachO::any_relocation_info RENext = getRelocation(RelNext);
- // X86_64_SUBTRACTOR must be followed by a relocation of type
+ // X86_64_RELOC_SUBTRACTOR must be followed by a relocation of type
// X86_64_RELOC_UNSIGNED.
// NOTE: Scattered relocations don't exist on x86_64.
unsigned RType = getAnyRelocationType(RENext);
- if (RType != 0)
+ if (RType != MachO::X86_64_RELOC_UNSIGNED)
report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
"X86_64_RELOC_SUBTRACTOR.");
- // The X86_64_RELOC_UNSIGNED contains the minuend symbol,
- // X86_64_SUBTRACTOR contains to the subtrahend.
+ // The X86_64_RELOC_UNSIGNED contains the minuend symbol;
+ // X86_64_RELOC_SUBTRACTOR contains the subtrahend.
printRelocationTargetName(this, RENext, fmt);
fmt << "-";
printRelocationTargetName(this, RE, fmt);
break;
}
- case macho::RIT_X86_64_TLV:
+ case MachO::X86_64_RELOC_TLV:
printRelocationTargetName(this, RE, fmt);
fmt << "@TLV";
if (isPCRel) fmt << "P";
break;
- case macho::RIT_X86_64_Signed1: // X86_64_RELOC_SIGNED1
+ case MachO::X86_64_RELOC_SIGNED_1:
printRelocationTargetName(this, RE, fmt);
fmt << "-1";
break;
- case macho::RIT_X86_64_Signed2: // X86_64_RELOC_SIGNED2
+ case MachO::X86_64_RELOC_SIGNED_2:
printRelocationTargetName(this, RE, fmt);
fmt << "-2";
break;
- case macho::RIT_X86_64_Signed4: // X86_64_RELOC_SIGNED4
+ case MachO::X86_64_RELOC_SIGNED_4:
printRelocationTargetName(this, RE, fmt);
fmt << "-4";
break;
@@ -1059,21 +1042,22 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
break;
}
// X86 and ARM share some relocation types in common.
- } else if (Arch == Triple::x86 || Arch == Triple::arm) {
+ } else if (Arch == Triple::x86 || Arch == Triple::arm ||
+ Arch == Triple::ppc) {
// Generic relocation types...
switch (Type) {
- case macho::RIT_Pair: // GENERIC_RELOC_PAIR - prints no info
+ case MachO::GENERIC_RELOC_PAIR: // prints no info
return object_error::success;
- case macho::RIT_Difference: { // GENERIC_RELOC_SECTDIFF
+ case MachO::GENERIC_RELOC_SECTDIFF: {
DataRefImpl RelNext = Rel;
RelNext.d.a++;
- macho::RelocationEntry RENext = getRelocation(RelNext);
+ MachO::any_relocation_info RENext = getRelocation(RelNext);
// X86 sect diff's must be followed by a relocation of type
// GENERIC_RELOC_PAIR.
unsigned RType = getAnyRelocationType(RENext);
- if (RType != 1)
+ if (RType != MachO::GENERIC_RELOC_PAIR)
report_fatal_error("Expected GENERIC_RELOC_PAIR after "
"GENERIC_RELOC_SECTDIFF.");
@@ -1084,19 +1068,17 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
}
}
- if (Arch == Triple::x86) {
- // All X86 relocations that need special printing were already
- // handled in the generic code.
+ if (Arch == Triple::x86 || Arch == Triple::ppc) {
switch (Type) {
- case macho::RIT_Generic_LocalDifference:{// GENERIC_RELOC_LOCAL_SECTDIFF
+ case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: {
DataRefImpl RelNext = Rel;
RelNext.d.a++;
- macho::RelocationEntry RENext = getRelocation(RelNext);
+ MachO::any_relocation_info RENext = getRelocation(RelNext);
// X86 sect diff's must be followed by a relocation of type
// GENERIC_RELOC_PAIR.
unsigned RType = getAnyRelocationType(RENext);
- if (RType != 1)
+ if (RType != MachO::GENERIC_RELOC_PAIR)
report_fatal_error("Expected GENERIC_RELOC_PAIR after "
"GENERIC_RELOC_LOCAL_SECTDIFF.");
@@ -1105,7 +1087,7 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
printRelocationTargetName(this, RENext, fmt);
break;
}
- case macho::RIT_Generic_TLV: {
+ case MachO::GENERIC_RELOC_TLV: {
printRelocationTargetName(this, RE, fmt);
fmt << "@TLV";
if (IsPCRel) fmt << "P";
@@ -1116,8 +1098,8 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
}
} else { // ARM-specific relocations
switch (Type) {
- case macho::RIT_ARM_Half: // ARM_RELOC_HALF
- case macho::RIT_ARM_HalfDifference: { // ARM_RELOC_HALF_SECTDIFF
+ case MachO::ARM_RELOC_HALF:
+ case MachO::ARM_RELOC_HALF_SECTDIFF: {
// Half relocations steal a bit from the length field to encode
// whether this is an upper16 or a lower16 relocation.
bool isUpper = getAnyRelocationLength(RE) >> 1;
@@ -1130,14 +1112,14 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
DataRefImpl RelNext = Rel;
RelNext.d.a++;
- macho::RelocationEntry RENext = getRelocation(RelNext);
+ MachO::any_relocation_info RENext = getRelocation(RelNext);
// ARM half relocs must be followed by a relocation of type
// ARM_RELOC_PAIR.
unsigned RType = getAnyRelocationType(RENext);
- if (RType != 1)
+ if (RType != MachO::ARM_RELOC_PAIR)
report_fatal_error("Expected ARM_RELOC_PAIR after "
- "GENERIC_RELOC_HALF");
+ "ARM_RELOC_HALF");
// NOTE: The half of the target virtual address is stashed in the
// address field of the secondary relocation, but we can't reverse
@@ -1146,7 +1128,7 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
// ARM_RELOC_HALF_SECTDIFF encodes the second section in the
// symbol/section pointer of the follow-on relocation.
- if (Type == macho::RIT_ARM_HalfDifference) {
+ if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
fmt << "-";
printRelocationTargetName(this, RENext, fmt);
}
@@ -1177,17 +1159,17 @@ MachOObjectFile::getRelocationHidden(DataRefImpl Rel, bool &Result) const {
// On arches that use the generic relocations, GENERIC_RELOC_PAIR
// is always hidden.
- if (Arch == Triple::x86 || Arch == Triple::arm) {
- if (Type == macho::RIT_Pair) Result = true;
+ if (Arch == Triple::x86 || Arch == Triple::arm || Arch == Triple::ppc) {
+ if (Type == MachO::GENERIC_RELOC_PAIR) Result = true;
} else if (Arch == Triple::x86_64) {
// On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows
// an X86_64_RELOC_SUBTRACTOR.
- if (Type == macho::RIT_X86_64_Unsigned && Rel.d.a > 0) {
+ if (Type == MachO::X86_64_RELOC_UNSIGNED && Rel.d.a > 0) {
DataRefImpl RelPrev = Rel;
RelPrev.d.a--;
uint64_t PrevType;
getRelocationType(RelPrev, PrevType);
- if (PrevType == macho::RIT_X86_64_Subtractor)
+ if (PrevType == MachO::X86_64_RELOC_SUBTRACTOR)
Result = true;
}
}
@@ -1210,8 +1192,8 @@ symbol_iterator MachOObjectFile::begin_symbols() const {
if (!SymtabLoadCmd)
return symbol_iterator(SymbolRef(DRI, this));
- macho::SymtabLoadCommand Symtab = getSymtabLoadCommand();
- DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.SymbolTableOffset));
+ MachO::symtab_command Symtab = getSymtabLoadCommand();
+ DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.symoff));
return symbol_iterator(SymbolRef(DRI, this));
}
@@ -1220,12 +1202,12 @@ symbol_iterator MachOObjectFile::end_symbols() const {
if (!SymtabLoadCmd)
return symbol_iterator(SymbolRef(DRI, this));
- macho::SymtabLoadCommand Symtab = getSymtabLoadCommand();
+ MachO::symtab_command Symtab = getSymtabLoadCommand();
unsigned SymbolTableEntrySize = is64Bit() ?
- sizeof(macho::Symbol64TableEntry) :
- sizeof(macho::SymbolTableEntry);
- unsigned Offset = Symtab.SymbolTableOffset +
- Symtab.NumSymbolTableEntries * SymbolTableEntrySize;
+ sizeof(MachO::nlist_64) :
+ sizeof(MachO::nlist);
+ unsigned Offset = Symtab.symoff +
+ Symtab.nsyms * SymbolTableEntrySize;
DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
return symbol_iterator(SymbolRef(DRI, this));
}
@@ -1269,28 +1251,28 @@ StringRef MachOObjectFile::getFileFormatName() const {
unsigned CPUType = getCPUType(this);
if (!is64Bit()) {
switch (CPUType) {
- case llvm::MachO::CPUTypeI386:
+ case llvm::MachO::CPU_TYPE_I386:
return "Mach-O 32-bit i386";
- case llvm::MachO::CPUTypeARM:
+ case llvm::MachO::CPU_TYPE_ARM:
return "Mach-O arm";
- case llvm::MachO::CPUTypePowerPC:
+ case llvm::MachO::CPU_TYPE_POWERPC:
return "Mach-O 32-bit ppc";
default:
- assert((CPUType & llvm::MachO::CPUArchABI64) == 0 &&
+ assert((CPUType & llvm::MachO::CPU_ARCH_ABI64) == 0 &&
"64-bit object file when we're not 64-bit?");
return "Mach-O 32-bit unknown";
}
}
// Make sure the cpu type has the correct mask.
- assert((CPUType & llvm::MachO::CPUArchABI64)
- == llvm::MachO::CPUArchABI64 &&
+ assert((CPUType & llvm::MachO::CPU_ARCH_ABI64)
+ == llvm::MachO::CPU_ARCH_ABI64 &&
"32-bit object file when we're 64-bit?");
switch (CPUType) {
- case llvm::MachO::CPUTypeX86_64:
+ case llvm::MachO::CPU_TYPE_X86_64:
return "Mach-O 64-bit x86-64";
- case llvm::MachO::CPUTypePowerPC64:
+ case llvm::MachO::CPU_TYPE_POWERPC64:
return "Mach-O 64-bit ppc64";
default:
return "Mach-O 64-bit unknown";
@@ -1299,15 +1281,15 @@ StringRef MachOObjectFile::getFileFormatName() const {
Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) {
switch (CPUType) {
- case llvm::MachO::CPUTypeI386:
+ case llvm::MachO::CPU_TYPE_I386:
return Triple::x86;
- case llvm::MachO::CPUTypeX86_64:
+ case llvm::MachO::CPU_TYPE_X86_64:
return Triple::x86_64;
- case llvm::MachO::CPUTypeARM:
+ case llvm::MachO::CPU_TYPE_ARM:
return Triple::arm;
- case llvm::MachO::CPUTypePowerPC:
+ case llvm::MachO::CPU_TYPE_POWERPC:
return Triple::ppc;
- case llvm::MachO::CPUTypePowerPC64:
+ case llvm::MachO::CPU_TYPE_POWERPC64:
return Triple::ppc64;
default:
return Triple::UnknownArch;
@@ -1323,16 +1305,16 @@ StringRef MachOObjectFile::getLoadName() const {
report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
}
-relocation_iterator MachOObjectFile::getSectionRelBegin(unsigned Index) const {
+relocation_iterator MachOObjectFile::section_rel_begin(unsigned Index) const {
DataRefImpl DRI;
DRI.d.a = Index;
- return getSectionRelBegin(DRI);
+ return section_rel_begin(DRI);
}
-relocation_iterator MachOObjectFile::getSectionRelEnd(unsigned Index) const {
+relocation_iterator MachOObjectFile::section_rel_end(unsigned Index) const {
DataRefImpl DRI;
DRI.d.a = Index;
- return getSectionRelEnd(DRI);
+ return section_rel_end(DRI);
}
dice_iterator MachOObjectFile::begin_dices() const {
@@ -1340,8 +1322,8 @@ dice_iterator MachOObjectFile::begin_dices() const {
if (!DataInCodeLoadCmd)
return dice_iterator(DiceRef(DRI, this));
- macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
- DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, DicLC.DataOffset));
+ MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand();
+ DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, DicLC.dataoff));
return dice_iterator(DiceRef(DRI, this));
}
@@ -1350,8 +1332,8 @@ dice_iterator MachOObjectFile::end_dices() const {
if (!DataInCodeLoadCmd)
return dice_iterator(DiceRef(DRI, this));
- macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
- unsigned Offset = DicLC.DataOffset + DicLC.DataSize;
+ MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand();
+ unsigned Offset = DicLC.dataoff + DicLC.datasize;
DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
return dice_iterator(DiceRef(DRI, this));
}
@@ -1364,80 +1346,82 @@ MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
ArrayRef<char>
MachOObjectFile::getSectionRawName(DataRefImpl Sec) const {
- const SectionBase *Base =
- reinterpret_cast<const SectionBase*>(Sections[Sec.d.a]);
- return ArrayRef<char>(Base->Name);
+ const section_base *Base =
+ reinterpret_cast<const section_base *>(Sections[Sec.d.a]);
+ return ArrayRef<char>(Base->sectname);
}
ArrayRef<char>
MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const {
- const SectionBase *Base =
- reinterpret_cast<const SectionBase*>(Sections[Sec.d.a]);
- return ArrayRef<char>(Base->SegmentName);
+ const section_base *Base =
+ reinterpret_cast<const section_base *>(Sections[Sec.d.a]);
+ return ArrayRef<char>(Base->segname);
}
bool
-MachOObjectFile::isRelocationScattered(const macho::RelocationEntry &RE)
+MachOObjectFile::isRelocationScattered(const MachO::any_relocation_info &RE)
const {
- if (getCPUType(this) == llvm::MachO::CPUTypeX86_64)
+ if (getCPUType(this) == MachO::CPU_TYPE_X86_64)
return false;
- return getPlainRelocationAddress(RE) & macho::RF_Scattered;
+ return getPlainRelocationAddress(RE) & MachO::R_SCATTERED;
}
unsigned MachOObjectFile::getPlainRelocationSymbolNum(
- const macho::RelocationEntry &RE) const {
+ const MachO::any_relocation_info &RE) const {
if (isLittleEndian())
- return RE.Word1 & 0xffffff;
- return RE.Word1 >> 8;
+ return RE.r_word1 & 0xffffff;
+ return RE.r_word1 >> 8;
}
bool MachOObjectFile::getPlainRelocationExternal(
- const macho::RelocationEntry &RE) const {
+ const MachO::any_relocation_info &RE) const {
if (isLittleEndian())
- return (RE.Word1 >> 27) & 1;
- return (RE.Word1 >> 4) & 1;
+ return (RE.r_word1 >> 27) & 1;
+ return (RE.r_word1 >> 4) & 1;
}
bool MachOObjectFile::getScatteredRelocationScattered(
- const macho::RelocationEntry &RE) const {
- return RE.Word0 >> 31;
+ const MachO::any_relocation_info &RE) const {
+ return RE.r_word0 >> 31;
}
uint32_t MachOObjectFile::getScatteredRelocationValue(
- const macho::RelocationEntry &RE) const {
- return RE.Word1;
+ const MachO::any_relocation_info &RE) const {
+ return RE.r_word1;
}
unsigned MachOObjectFile::getAnyRelocationAddress(
- const macho::RelocationEntry &RE) const {
+ const MachO::any_relocation_info &RE) const {
if (isRelocationScattered(RE))
return getScatteredRelocationAddress(RE);
return getPlainRelocationAddress(RE);
}
-unsigned
-MachOObjectFile::getAnyRelocationPCRel(const macho::RelocationEntry &RE) const {
+unsigned MachOObjectFile::getAnyRelocationPCRel(
+ const MachO::any_relocation_info &RE) const {
if (isRelocationScattered(RE))
return getScatteredRelocationPCRel(this, RE);
return getPlainRelocationPCRel(this, RE);
}
unsigned MachOObjectFile::getAnyRelocationLength(
- const macho::RelocationEntry &RE) const {
+ const MachO::any_relocation_info &RE) const {
if (isRelocationScattered(RE))
return getScatteredRelocationLength(RE);
return getPlainRelocationLength(this, RE);
}
unsigned
-MachOObjectFile::getAnyRelocationType(const macho::RelocationEntry &RE) const {
+MachOObjectFile::getAnyRelocationType(
+ const MachO::any_relocation_info &RE) const {
if (isRelocationScattered(RE))
return getScatteredRelocationType(RE);
return getPlainRelocationType(this, RE);
}
SectionRef
-MachOObjectFile::getRelocationSection(const macho::RelocationEntry &RE) const {
+MachOObjectFile::getRelocationSection(
+ const MachO::any_relocation_info &RE) const {
if (isRelocationScattered(RE) || getPlainRelocationExternal(RE))
return *end_sections();
unsigned SecNum = getPlainRelocationSymbolNum(RE) - 1;
@@ -1450,133 +1434,132 @@ MachOObjectFile::LoadCommandInfo
MachOObjectFile::getFirstLoadCommandInfo() const {
MachOObjectFile::LoadCommandInfo Load;
- unsigned HeaderSize = is64Bit() ? macho::Header64Size : macho::Header32Size;
+ unsigned HeaderSize = is64Bit() ? sizeof(MachO::mach_header_64) :
+ sizeof(MachO::mach_header);
Load.Ptr = getPtr(this, HeaderSize);
- Load.C = getStruct<macho::LoadCommand>(this, Load.Ptr);
+ Load.C = getStruct<MachO::load_command>(this, Load.Ptr);
return Load;
}
MachOObjectFile::LoadCommandInfo
MachOObjectFile::getNextLoadCommandInfo(const LoadCommandInfo &L) const {
MachOObjectFile::LoadCommandInfo Next;
- Next.Ptr = L.Ptr + L.C.Size;
- Next.C = getStruct<macho::LoadCommand>(this, Next.Ptr);
+ Next.Ptr = L.Ptr + L.C.cmdsize;
+ Next.C = getStruct<MachO::load_command>(this, Next.Ptr);
return Next;
}
-macho::Section MachOObjectFile::getSection(DataRefImpl DRI) const {
- return getStruct<macho::Section>(this, Sections[DRI.d.a]);
+MachO::section MachOObjectFile::getSection(DataRefImpl DRI) const {
+ return getStruct<MachO::section>(this, Sections[DRI.d.a]);
}
-macho::Section64 MachOObjectFile::getSection64(DataRefImpl DRI) const {
- return getStruct<macho::Section64>(this, Sections[DRI.d.a]);
+MachO::section_64 MachOObjectFile::getSection64(DataRefImpl DRI) const {
+ return getStruct<MachO::section_64>(this, Sections[DRI.d.a]);
}
-macho::Section MachOObjectFile::getSection(const LoadCommandInfo &L,
+MachO::section MachOObjectFile::getSection(const LoadCommandInfo &L,
unsigned Index) const {
const char *Sec = getSectionPtr(this, L, Index);
- return getStruct<macho::Section>(this, Sec);
+ return getStruct<MachO::section>(this, Sec);
}
-macho::Section64 MachOObjectFile::getSection64(const LoadCommandInfo &L,
- unsigned Index) const {
+MachO::section_64 MachOObjectFile::getSection64(const LoadCommandInfo &L,
+ unsigned Index) const {
const char *Sec = getSectionPtr(this, L, Index);
- return getStruct<macho::Section64>(this, Sec);
+ return getStruct<MachO::section_64>(this, Sec);
}
-macho::SymbolTableEntry
+MachO::nlist
MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const {
const char *P = reinterpret_cast<const char *>(DRI.p);
- return getStruct<macho::SymbolTableEntry>(this, P);
+ return getStruct<MachO::nlist>(this, P);
}
-macho::Symbol64TableEntry
+MachO::nlist_64
MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const {
const char *P = reinterpret_cast<const char *>(DRI.p);
- return getStruct<macho::Symbol64TableEntry>(this, P);
+ return getStruct<MachO::nlist_64>(this, P);
}
-macho::LinkeditDataLoadCommand MachOObjectFile::getLinkeditDataLoadCommand(
- const MachOObjectFile::LoadCommandInfo &L) const {
- return getStruct<macho::LinkeditDataLoadCommand>(this, L.Ptr);
+MachO::linkedit_data_command
+MachOObjectFile::getLinkeditDataLoadCommand(const LoadCommandInfo &L) const {
+ return getStruct<MachO::linkedit_data_command>(this, L.Ptr);
}
-macho::SegmentLoadCommand
+MachO::segment_command
MachOObjectFile::getSegmentLoadCommand(const LoadCommandInfo &L) const {
- return getStruct<macho::SegmentLoadCommand>(this, L.Ptr);
+ return getStruct<MachO::segment_command>(this, L.Ptr);
}
-macho::Segment64LoadCommand
+MachO::segment_command_64
MachOObjectFile::getSegment64LoadCommand(const LoadCommandInfo &L) const {
- return getStruct<macho::Segment64LoadCommand>(this, L.Ptr);
+ return getStruct<MachO::segment_command_64>(this, L.Ptr);
}
-macho::LinkerOptionsLoadCommand
+MachO::linker_options_command
MachOObjectFile::getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const {
- return getStruct<macho::LinkerOptionsLoadCommand>(this, L.Ptr);
+ return getStruct<MachO::linker_options_command>(this, L.Ptr);
}
-macho::RelocationEntry
+MachO::any_relocation_info
MachOObjectFile::getRelocation(DataRefImpl Rel) const {
const char *P = reinterpret_cast<const char *>(Rel.p);
- return getStruct<macho::RelocationEntry>(this, P);
+ return getStruct<MachO::any_relocation_info>(this, P);
}
-macho::DataInCodeTableEntry
+MachO::data_in_code_entry
MachOObjectFile::getDice(DataRefImpl Rel) const {
const char *P = reinterpret_cast<const char *>(Rel.p);
- return getStruct<macho::DataInCodeTableEntry>(this, P);
+ return getStruct<MachO::data_in_code_entry>(this, P);
}
-macho::Header MachOObjectFile::getHeader() const {
- return getStruct<macho::Header>(this, getPtr(this, 0));
+MachO::mach_header MachOObjectFile::getHeader() const {
+ return getStruct<MachO::mach_header>(this, getPtr(this, 0));
}
-macho::Header64Ext MachOObjectFile::getHeader64Ext() const {
- return
- getStruct<macho::Header64Ext>(this, getPtr(this, sizeof(macho::Header)));
+MachO::mach_header_64 MachOObjectFile::getHeader64() const {
+ return getStruct<MachO::mach_header_64>(this, getPtr(this, 0));
}
-macho::IndirectSymbolTableEntry MachOObjectFile::getIndirectSymbolTableEntry(
- const macho::DysymtabLoadCommand &DLC,
- unsigned Index) const {
- uint64_t Offset = DLC.IndirectSymbolTableOffset +
- Index * sizeof(macho::IndirectSymbolTableEntry);
- return getStruct<macho::IndirectSymbolTableEntry>(this, getPtr(this, Offset));
+uint32_t MachOObjectFile::getIndirectSymbolTableEntry(
+ const MachO::dysymtab_command &DLC,
+ unsigned Index) const {
+ uint64_t Offset = DLC.indirectsymoff + Index * sizeof(uint32_t);
+ return getStruct<uint32_t>(this, getPtr(this, Offset));
}
-macho::DataInCodeTableEntry
+MachO::data_in_code_entry
MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset,
unsigned Index) const {
- uint64_t Offset = DataOffset + Index * sizeof(macho::DataInCodeTableEntry);
- return getStruct<macho::DataInCodeTableEntry>(this, getPtr(this, Offset));
+ uint64_t Offset = DataOffset + Index * sizeof(MachO::data_in_code_entry);
+ return getStruct<MachO::data_in_code_entry>(this, getPtr(this, Offset));
}
-macho::SymtabLoadCommand MachOObjectFile::getSymtabLoadCommand() const {
- return getStruct<macho::SymtabLoadCommand>(this, SymtabLoadCmd);
+MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const {
+ return getStruct<MachO::symtab_command>(this, SymtabLoadCmd);
}
-macho::DysymtabLoadCommand MachOObjectFile::getDysymtabLoadCommand() const {
- return getStruct<macho::DysymtabLoadCommand>(this, DysymtabLoadCmd);
+MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const {
+ return getStruct<MachO::dysymtab_command>(this, DysymtabLoadCmd);
}
-macho::LinkeditDataLoadCommand
+MachO::linkedit_data_command
MachOObjectFile::getDataInCodeLoadCommand() const {
if (DataInCodeLoadCmd)
- return getStruct<macho::LinkeditDataLoadCommand>(this, DataInCodeLoadCmd);
+ return getStruct<MachO::linkedit_data_command>(this, DataInCodeLoadCmd);
// If there is no DataInCodeLoadCmd return a load command with zero'ed fields.
- macho::LinkeditDataLoadCommand Cmd;
- Cmd.Type = macho::LCT_DataInCode;
- Cmd.Size = macho::LinkeditLoadCommandSize;
- Cmd.DataOffset = 0;
- Cmd.DataSize = 0;
+ MachO::linkedit_data_command Cmd;
+ Cmd.cmd = MachO::LC_DATA_IN_CODE;
+ Cmd.cmdsize = sizeof(MachO::linkedit_data_command);
+ Cmd.dataoff = 0;
+ Cmd.datasize = 0;
return Cmd;
}
StringRef MachOObjectFile::getStringTableData() const {
- macho::SymtabLoadCommand S = getSymtabLoadCommand();
- return getData().substr(S.StringTableOffset, S.StringTableSize);
+ MachO::symtab_command S = getSymtabLoadCommand();
+ return getData().substr(S.stroff, S.strsize);
}
bool MachOObjectFile::is64Bit() const {
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index b76f10e..75160af 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -31,18 +31,18 @@ template<typename T>
static void SwapStruct(T &Value);
template<>
-void SwapStruct(macho::FatHeader &H) {
- SwapValue(H.Magic);
- SwapValue(H.NumFatArch);
+void SwapStruct(MachO::fat_header &H) {
+ SwapValue(H.magic);
+ SwapValue(H.nfat_arch);
}
template<>
-void SwapStruct(macho::FatArchHeader &H) {
- SwapValue(H.CPUType);
- SwapValue(H.CPUSubtype);
- SwapValue(H.Offset);
- SwapValue(H.Size);
- SwapValue(H.Align);
+void SwapStruct(MachO::fat_arch &H) {
+ SwapValue(H.cputype);
+ SwapValue(H.cpusubtype);
+ SwapValue(H.offset);
+ SwapValue(H.size);
+ SwapValue(H.align);
}
template<typename T>
@@ -63,10 +63,10 @@ MachOUniversalBinary::ObjectForArch::ObjectForArch(
} else {
// Parse object header.
StringRef ParentData = Parent->getData();
- const char *HeaderPos = ParentData.begin() + macho::FatHeaderSize +
- Index * macho::FatArchHeaderSize;
- Header = getUniversalBinaryStruct<macho::FatArchHeader>(HeaderPos);
- if (ParentData.size() < Header.Offset + Header.Size) {
+ const char *HeaderPos = ParentData.begin() + sizeof(MachO::fat_header) +
+ Index * sizeof(MachO::fat_arch);
+ Header = getUniversalBinaryStruct<MachO::fat_arch>(HeaderPos);
+ if (ParentData.size() < Header.offset + Header.size) {
clear();
}
}
@@ -76,10 +76,10 @@ error_code MachOUniversalBinary::ObjectForArch::getAsObjectFile(
OwningPtr<ObjectFile> &Result) const {
if (Parent) {
StringRef ParentData = Parent->getData();
- StringRef ObjectData = ParentData.substr(Header.Offset, Header.Size);
+ StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
std::string ObjectName =
Parent->getFileName().str() + ":" +
- Triple::getArchTypeName(MachOObjectFile::getArch(Header.CPUType));
+ Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype));
MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer(
ObjectData, ObjectName, false);
if (ObjectFile *Obj = ObjectFile::createMachOObjectFile(ObjBuffer)) {
@@ -96,31 +96,31 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBuffer *Source,
error_code &ec)
: Binary(Binary::ID_MachOUniversalBinary, Source),
NumberOfObjects(0) {
- if (Source->getBufferSize() < macho::FatHeaderSize) {
+ if (Source->getBufferSize() < sizeof(MachO::fat_header)) {
ec = object_error::invalid_file_type;
return;
}
// Check for magic value and sufficient header size.
StringRef Buf = getData();
- macho::FatHeader H = getUniversalBinaryStruct<macho::FatHeader>(Buf.begin());
- NumberOfObjects = H.NumFatArch;
- uint32_t MinSize = macho::FatHeaderSize +
- macho::FatArchHeaderSize * NumberOfObjects;
- if (H.Magic != macho::HM_Universal || Buf.size() < MinSize) {
+ MachO::fat_header H= getUniversalBinaryStruct<MachO::fat_header>(Buf.begin());
+ NumberOfObjects = H.nfat_arch;
+ uint32_t MinSize = sizeof(MachO::fat_header) +
+ sizeof(MachO::fat_arch) * NumberOfObjects;
+ if (H.magic != MachO::FAT_MAGIC || Buf.size() < MinSize) {
ec = object_error::parse_failed;
return;
}
ec = object_error::success;
}
-static bool getCTMForArch(Triple::ArchType Arch, mach::CPUTypeMachine &CTM) {
+static bool getCTMForArch(Triple::ArchType Arch, MachO::CPUType &CTM) {
switch (Arch) {
- case Triple::x86: CTM = mach::CTM_i386; return true;
- case Triple::x86_64: CTM = mach::CTM_x86_64; return true;
- case Triple::arm: CTM = mach::CTM_ARM; return true;
- case Triple::sparc: CTM = mach::CTM_SPARC; return true;
- case Triple::ppc: CTM = mach::CTM_PowerPC; return true;
- case Triple::ppc64: CTM = mach::CTM_PowerPC64; return true;
+ case Triple::x86: CTM = MachO::CPU_TYPE_I386; return true;
+ case Triple::x86_64: CTM = MachO::CPU_TYPE_X86_64; return true;
+ case Triple::arm: CTM = MachO::CPU_TYPE_ARM; return true;
+ case Triple::sparc: CTM = MachO::CPU_TYPE_SPARC; return true;
+ case Triple::ppc: CTM = MachO::CPU_TYPE_POWERPC; return true;
+ case Triple::ppc64: CTM = MachO::CPU_TYPE_POWERPC64; return true;
default: return false;
}
}
@@ -128,7 +128,7 @@ static bool getCTMForArch(Triple::ArchType Arch, mach::CPUTypeMachine &CTM) {
error_code
MachOUniversalBinary::getObjectForArch(Triple::ArchType Arch,
OwningPtr<ObjectFile> &Result) const {
- mach::CPUTypeMachine CTM;
+ MachO::CPUType CTM;
if (!getCTMForArch(Arch, CTM))
return object_error::arch_not_found;
for (object_iterator I = begin_objects(), E = end_objects(); I != E; ++I) {
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 1d1dafd..0e626d6 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -49,6 +49,7 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
case sys::fs::file_magic::bitcode:
case sys::fs::file_magic::archive:
case sys::fs::file_magic::macho_universal_binary:
+ case sys::fs::file_magic::windows_resource:
delete Object;
return 0;
case sys::fs::file_magic::elf_relocatable:
@@ -68,6 +69,7 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
case sys::fs::file_magic::macho_dsym_companion:
return createMachOObjectFile(Object);
case sys::fs::file_magic::coff_object:
+ case sys::fs::file_magic::coff_import_library:
case sys::fs::file_magic::pecoff_executable:
return createCOFFObjectFile(Object);
}
diff --git a/lib/Object/YAML.cpp b/lib/Object/YAML.cpp
index 21bacb8..c527bde 100644
--- a/lib/Object/YAML.cpp
+++ b/lib/Object/YAML.cpp
@@ -15,6 +15,7 @@
#include "llvm/Object/YAML.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <cctype>
using namespace llvm;
using namespace object::yaml;
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index 98e63bc..6fa459a 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -14,26 +14,27 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cctype>
#include <map>
using namespace llvm;
using namespace llvm::opt;
-// Ordering on Info. The ordering is *almost* lexicographic, with two
-// exceptions. First, '\0' comes at the end of the alphabet instead of
-// the beginning (thus options precede any other options which prefix
-// them). Second, for options with the same name, the less permissive
-// version should come first; a Flag option should precede a Joined
-// option, for example.
+namespace llvm {
+namespace opt {
-static int StrCmpOptionName(const char *A, const char *B) {
- char a = *A, b = *B;
+// Ordering on Info. The ordering is *almost* case-insensitive lexicographic,
+// with an exceptions. '\0' comes at the end of the alphabet instead of the
+// beginning (thus options precede any other options which prefix them).
+static int StrCmpOptionNameIgnoreCase(const char *A, const char *B) {
+ const char *X = A, *Y = B;
+ char a = tolower(*A), b = tolower(*B);
while (a == b) {
if (a == '\0')
return 0;
- a = *++A;
- b = *++B;
+ a = tolower(*++X);
+ b = tolower(*++Y);
}
if (a == '\0') // A is a prefix of B.
@@ -45,21 +46,25 @@ static int StrCmpOptionName(const char *A, const char *B) {
return (a < b) ? -1 : 1;
}
-namespace llvm {
-namespace opt {
+#ifndef NDEBUG
+static int StrCmpOptionName(const char *A, const char *B) {
+ if (int N = StrCmpOptionNameIgnoreCase(A, B))
+ return N;
+ return strcmp(A, B);
+}
static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
if (&A == &B)
return false;
if (int N = StrCmpOptionName(A.Name, B.Name))
- return N == -1;
+ return N < 0;
for (const char * const *APre = A.Prefixes,
* const *BPre = B.Prefixes;
*APre != 0 && *BPre != 0; ++APre, ++BPre) {
if (int N = StrCmpOptionName(*APre, *BPre))
- return N == -1;
+ return N < 0;
}
// Names are the same, check that classes are in order; exactly one
@@ -68,22 +73,22 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
"Unexpected classes for options with same name.");
return B.Kind == Option::JoinedClass;
}
+#endif
// Support lower_bound between info and an option name.
static inline bool operator<(const OptTable::Info &I, const char *Name) {
- return StrCmpOptionName(I.Name, Name) == -1;
-}
-static inline bool operator<(const char *Name, const OptTable::Info &I) {
- return StrCmpOptionName(Name, I.Name) == -1;
+ return StrCmpOptionNameIgnoreCase(I.Name, Name) < 0;
}
}
}
OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
-OptTable::OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos)
+OptTable::OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos,
+ bool _IgnoreCase)
: OptionInfos(_OptionInfos),
NumOptionInfos(_NumOptionInfos),
+ IgnoreCase(_IgnoreCase),
TheInputOptionID(0),
TheUnknownOptionID(0),
FirstSearchableIndex(0)
@@ -171,11 +176,18 @@ static bool isInput(const llvm::StringSet<> &Prefixes, StringRef Arg) {
}
/// \returns Matched size. 0 means no match.
-static unsigned matchOption(const OptTable::Info *I, StringRef Str) {
+static unsigned matchOption(const OptTable::Info *I, StringRef Str,
+ bool IgnoreCase) {
for (const char * const *Pre = I->Prefixes; *Pre != 0; ++Pre) {
StringRef Prefix(*Pre);
- if (Str.startswith(Prefix) && Str.substr(Prefix.size()).startswith(I->Name))
- return Prefix.size() + StringRef(I->Name).size();
+ if (Str.startswith(Prefix)) {
+ StringRef Rest = Str.substr(Prefix.size());
+ bool Matched = IgnoreCase
+ ? Rest.startswith_lower(I->Name)
+ : Rest.startswith(I->Name);
+ if (Matched)
+ return Prefix.size() + StringRef(I->Name).size();
+ }
}
return 0;
}
@@ -210,7 +222,7 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
unsigned ArgSize = 0;
// Scan for first option which is a proper prefix.
for (; Start != End; ++Start)
- if ((ArgSize = matchOption(Start, Str)))
+ if ((ArgSize = matchOption(Start, Str, IgnoreCase)))
break;
if (Start == End)
break;
@@ -259,20 +271,6 @@ InputArgList *OptTable::ParseArgs(const char *const *ArgBegin,
continue;
}
- if (Str == "--") {
- // Everything after -- is a filename.
- ++Index;
-
- assert(TheInputOptionID != 0 && "Invalid input option ID.");
- while (Index < End) {
- Args->append(new Arg(getOption(TheInputOptionID),
- Args->getArgString(Index), Index,
- Args->getArgString(Index)));
- ++Index;
- }
- break;
- }
-
unsigned Prev = Index;
Arg *A = ParseOneArg(*Args, Index, FlagsToInclude, FlagsToExclude);
assert(Index > Prev && "Parser failed to consume argument.");
@@ -308,6 +306,7 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) {
break;
case Option::SeparateClass: case Option::JoinedOrSeparateClass:
+ case Option::RemainingArgsClass:
Name += ' ';
// FALLTHROUGH
case Option::JoinedClass: case Option::CommaJoinedClass:
diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp
index 1d6a3d3..7b5ff2b 100644
--- a/lib/Option/Option.cpp
+++ b/lib/Option/Option.cpp
@@ -52,6 +52,7 @@ void Option::dump() const {
P(MultiArgClass);
P(JoinedOrSeparateClass);
P(JoinedAndSeparateClass);
+ P(RemainingArgsClass);
#undef P
}
@@ -214,6 +215,16 @@ Arg *Option::accept(const ArgList &Args,
return new Arg(UnaliasedOption, Spelling, Index - 2,
Args.getArgString(Index - 2) + ArgSize,
Args.getArgString(Index - 1));
+ case RemainingArgsClass: {
+ // Matches iff this is an exact match.
+ // FIXME: Avoid strlen.
+ if (ArgSize != strlen(Args.getArgString(Index)))
+ return 0;
+ Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
+ while (Index < Args.getNumInputArgStrings())
+ A->getValues().push_back(Args.getArgString(Index++));
+ return A;
+ }
default:
llvm_unreachable("Invalid option kind!");
}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 34bc6b6..676e2d4 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -3546,11 +3546,14 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
// Set FormatPrecision if zero. We want to do this before we
// truncate trailing zeros, as those are part of the precision.
if (!FormatPrecision) {
- // It's an interesting question whether to use the nominal
- // precision or the active precision here for denormals.
-
- // FormatPrecision = ceil(significandBits / lg_2(10))
- FormatPrecision = (semantics->precision * 59 + 195) / 196;
+ // We use enough digits so the number can be round-tripped back to an
+ // APFloat. The formula comes from "How to Print Floating-Point Numbers
+ // Accurately" by Steele and White.
+ // FIXME: Using a formula based purely on the precision is conservative;
+ // we can print fewer digits depending on the actual value being printed.
+
+ // FormatPrecision = 2 + floor(significandBits / lg_2(10))
+ FormatPrecision = 2 + semantics->precision * 59 / 196;
}
// Ignore trailing binary zeros.
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index 3c4191b..6e7a541 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -26,6 +26,10 @@ BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold,
: SlabSize(size), SizeThreshold(std::min(size, threshold)),
Allocator(allocator), CurSlab(0), BytesAllocated(0) { }
+BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold)
+ : SlabSize(size), SizeThreshold(std::min(size, threshold)),
+ Allocator(DefaultSlabAllocator), CurSlab(0), BytesAllocated(0) { }
+
BumpPtrAllocator::~BumpPtrAllocator() {
DeallocateSlabs(CurSlab);
}
@@ -167,9 +171,6 @@ void BumpPtrAllocator::PrintStats() const {
<< " (includes alignment, etc)\n";
}
-MallocSlabAllocator BumpPtrAllocator::DefaultSlabAllocator =
- MallocSlabAllocator();
-
SlabAllocator::~SlabAllocator() { }
MallocSlabAllocator::~MallocSlabAllocator() { }
diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp
index 5e45e46..00efe90 100644
--- a/lib/Support/BlockFrequency.cpp
+++ b/lib/Support/BlockFrequency.cpp
@@ -19,52 +19,69 @@
using namespace llvm;
/// Multiply FREQ by N and store result in W array.
-static void mult96bit(uint64_t freq, uint32_t N, uint64_t W[2]) {
+static void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) {
uint64_t u0 = freq & UINT32_MAX;
uint64_t u1 = freq >> 32;
- // Represent 96-bit value as w[2]:w[1]:w[0];
- uint32_t w[3] = { 0, 0, 0 };
-
+ // Represent 96-bit value as W[2]:W[1]:W[0];
uint64_t t = u0 * N;
uint64_t k = t >> 32;
- w[0] = t;
+ W[0] = t;
t = u1 * N + k;
- w[1] = t;
- w[2] = t >> 32;
-
- // W[1] - higher bits.
- // W[0] - lower bits.
- W[0] = w[0] + ((uint64_t) w[1] << 32);
- W[1] = w[2];
+ W[1] = t;
+ W[2] = t >> 32;
}
-
-/// Divide 96-bit value stored in W array by D.
-/// Return 64-bit quotient, saturated to UINT64_MAX on overflow.
-static uint64_t div96bit(uint64_t W[2], uint32_t D) {
- uint64_t y = W[0];
- uint64_t x = W[1];
- unsigned i;
-
- assert(x != 0 && "This is really a 64-bit division");
-
- // This long division algorithm automatically saturates on overflow.
- for (i = 0; i < 64 && x; ++i) {
- uint32_t t = -((x >> 31) & 1); // Splat bit 31 to bits 0-31.
- x = (x << 1) | (y >> 63);
- y = y << 1;
- if ((x | t) >= D) {
- x -= D;
- ++y;
+/// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a
+/// 32 bit unsigned integer, we can use a short division algorithm.
+static uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) {
+ // We assume that W[2] is non-zero since if W[2] is not then the user should
+ // just use hardware division.
+ assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is "
+ "zero, the caller should just use 64/32 hardware.");
+ uint32_t Q[3] = { 0, 0, 0 };
+
+ // The generalized short division algorithm sets i to m + n - 1, where n is
+ // the number of words in the divisior and m is the number of words by which
+ // the divident exceeds the divisor (i.e. m + n == the length of the dividend
+ // in words). Due to our assumption that W[2] is non-zero, we know that the
+ // dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to
+ // m + n - 1 = 2 + 1 - 1 = 2.
+ uint32_t R = 0;
+ for (int i = 2; i >= 0; --i) {
+ uint64_t PartialD = uint64_t(R) << 32 | W[i];
+ if (PartialD == 0) {
+ Q[i] = 0;
+ R = 0;
+ } else if (PartialD < D) {
+ Q[i] = 0;
+ R = uint32_t(PartialD);
+ } else if (PartialD == D) {
+ Q[i] = 1;
+ R = 0;
+ } else {
+ Q[i] = uint32_t(PartialD / D);
+ R = uint32_t(PartialD - (Q[i] * D));
}
}
- return y << (64 - i);
-}
+ // If Q[2] is non-zero, then we overflowed.
+ uint64_t Result;
+ if (Q[2]) {
+ Result = UINT64_MAX;
+ R = D;
+ } else {
+ // Form the final uint64_t result, avoiding endianness issues.
+ Result = uint64_t(Q[0]) | (uint64_t(Q[1]) << 32);
+ }
+
+ if (Rout)
+ *Rout = R;
+ return Result;
+}
-void BlockFrequency::scale(uint32_t N, uint32_t D) {
+uint32_t BlockFrequency::scale(uint32_t N, uint32_t D) {
assert(D != 0 && "Division by zero");
// Calculate Frequency * N.
@@ -75,15 +92,16 @@ void BlockFrequency::scale(uint32_t N, uint32_t D) {
// If the product fits in 64 bits, just use built-in division.
if (MulHi <= UINT32_MAX && MulRes >= MulLo) {
Frequency = MulRes / D;
- return;
+ return MulRes % D;
}
// Product overflowed, use 96-bit operations.
- // 96-bit value represented as W[1]:W[0].
- uint64_t W[2];
+ // 96-bit value represented as W[2]:W[1]:W[0].
+ uint32_t W[3];
+ uint32_t R;
mult96bit(Frequency, N, W);
- Frequency = div96bit(W, D);
- return;
+ Frequency = divrem96bit(W, D, &R);
+ return R;
}
BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
@@ -127,6 +145,10 @@ BlockFrequency::operator+(const BlockFrequency &Prob) const {
return Freq;
}
+uint32_t BlockFrequency::scale(const BranchProbability &Prob) {
+ return scale(Prob.getNumerator(), Prob.getDenominator());
+}
+
void BlockFrequency::print(raw_ostream &OS) const {
// Convert fixed-point number to decimal.
OS << Frequency / getEntryFrequency() << ".";
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 5823836..3aecf3f 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_library(LLVMSupport
ToolOutputFile.cpp
Triple.cpp
Twine.cpp
+ Unicode.cpp
YAMLParser.cpp
YAMLTraits.cpp
raw_os_ostream.cpp
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index a47af27..44a88d8 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -60,6 +60,8 @@ TEMPLATE_INSTANTIATION(class opt<char>);
TEMPLATE_INSTANTIATION(class opt<bool>);
} } // end namespace llvm::cl
+// Pin the vtables to this file.
+void GenericOptionValue::anchor() {}
void OptionValue<boolOrDefault>::anchor() {}
void OptionValue<std::string>::anchor() {}
void Option::anchor() {}
@@ -73,6 +75,7 @@ void parser<double>::anchor() {}
void parser<float>::anchor() {}
void parser<std::string>::anchor() {}
void parser<char>::anchor() {}
+void StringSaver::anchor() {}
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/Compression.cpp b/lib/Support/Compression.cpp
index fd8a874..b5ddb70 100644
--- a/lib/Support/Compression.cpp
+++ b/lib/Support/Compression.cpp
@@ -81,6 +81,10 @@ zlib::Status zlib::uncompress(StringRef InputBuffer,
return Res;
}
+uint32_t zlib::crc32(StringRef Buffer) {
+ return ::crc32(0, (const Bytef *)Buffer.data(), Buffer.size());
+}
+
#else
bool zlib::isAvailable() { return false; }
zlib::Status zlib::compress(StringRef InputBuffer,
@@ -93,5 +97,8 @@ zlib::Status zlib::uncompress(StringRef InputBuffer,
size_t UncompressedSize) {
return zlib::StatusUnsupported;
}
+uint32_t zlib::crc32(StringRef Buffer) {
+ llvm_unreachable("zlib::crc32 is unavailable");
+}
#endif
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index bb38cd1..265b6e9 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -144,9 +144,6 @@ bool ConstantRange::isSignWrappedSet() const {
/// getSetSize - Return the number of elements in this set.
///
APInt ConstantRange::getSetSize() const {
- if (isEmptySet())
- return APInt(getBitWidth()+1, 0);
-
if (isFullSet()) {
APInt Size(getBitWidth()+1, 0);
Size.setBit(getBitWidth());
@@ -448,6 +445,11 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
unsigned SrcTySize = getBitWidth();
assert(SrcTySize < DstTySize && "Not a value extension");
+
+ // special case: [X, INT_MIN) -- not really wrapping around
+ if (Upper.isMinSignedValue())
+ return ConstantRange(Lower.sext(DstTySize), Upper.zext(DstTySize));
+
if (isFullSet() || isSignWrappedSet()) {
return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1),
APInt::getLowBitsSet(DstTySize, SrcTySize-1) + 1);
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index d2a3895..92c370d 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/Config/config.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/ThreadLocal.h"
#include <cstdio>
@@ -21,7 +22,7 @@ namespace {
struct CrashRecoveryContextImpl;
-static sys::ThreadLocal<const CrashRecoveryContextImpl> CurrentContext;
+static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContextImpl> > CurrentContext;
struct CrashRecoveryContextImpl {
CrashRecoveryContext *CRC;
@@ -34,11 +35,11 @@ public:
CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC),
Failed(false),
SwitchedThread(false) {
- CurrentContext.set(this);
+ CurrentContext->set(this);
}
~CrashRecoveryContextImpl() {
if (!SwitchedThread)
- CurrentContext.erase();
+ CurrentContext->erase();
}
/// \brief Called when the separate crash-recovery thread was finished, to
@@ -48,7 +49,7 @@ public:
void HandleCrash() {
// Eliminate the current context entry, to avoid re-entering in case the
// cleanup code crashes.
- CurrentContext.erase();
+ CurrentContext->erase();
assert(!Failed && "Crash recovery context already failed!");
Failed = true;
@@ -62,10 +63,10 @@ public:
}
-static sys::Mutex gCrashRecoveryContexMutex;
+static ManagedStatic<sys::Mutex> gCrashRecoveryContextMutex;
static bool gCrashRecoveryEnabled = false;
-static sys::ThreadLocal<const CrashRecoveryContextCleanup>
+static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContextCleanup> >
tlIsRecoveringFromCrash;
CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
@@ -73,7 +74,7 @@ CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
CrashRecoveryContext::~CrashRecoveryContext() {
// Reclaim registered resources.
CrashRecoveryContextCleanup *i = head;
- tlIsRecoveringFromCrash.set(head);
+ tlIsRecoveringFromCrash->set(head);
while (i) {
CrashRecoveryContextCleanup *tmp = i;
i = tmp->next;
@@ -81,21 +82,21 @@ CrashRecoveryContext::~CrashRecoveryContext() {
tmp->recoverResources();
delete tmp;
}
- tlIsRecoveringFromCrash.erase();
+ tlIsRecoveringFromCrash->erase();
CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
delete CRCI;
}
bool CrashRecoveryContext::isRecoveringFromCrash() {
- return tlIsRecoveringFromCrash.get() != 0;
+ return tlIsRecoveringFromCrash->get() != 0;
}
CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
if (!gCrashRecoveryEnabled)
return 0;
- const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+ const CrashRecoveryContextImpl *CRCI = CurrentContext->get();
if (!CRCI)
return 0;
@@ -154,7 +155,7 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
{
// Lookup the current thread local recovery object.
- const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+ const CrashRecoveryContextImpl *CRCI = CurrentContext->get();
if (!CRCI) {
// Something has gone horribly wrong, so let's just tell everyone
@@ -182,7 +183,7 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
static sys::ThreadLocal<const void> sCurrentExceptionHandle;
void CrashRecoveryContext::Enable() {
- sys::ScopedLock L(gCrashRecoveryContexMutex);
+ sys::ScopedLock L(*gCrashRecoveryContextMutex);
if (gCrashRecoveryEnabled)
return;
@@ -198,7 +199,7 @@ void CrashRecoveryContext::Enable() {
}
void CrashRecoveryContext::Disable() {
- sys::ScopedLock L(gCrashRecoveryContexMutex);
+ sys::ScopedLock L(*gCrashRecoveryContextMutex);
if (!gCrashRecoveryEnabled)
return;
@@ -236,7 +237,7 @@ static struct sigaction PrevActions[NumSignals];
static void CrashRecoverySignalHandler(int Signal) {
// Lookup the current thread local recovery object.
- const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+ const CrashRecoveryContextImpl *CRCI = CurrentContext->get();
if (!CRCI) {
// We didn't find a crash recovery context -- this means either we got a
@@ -267,7 +268,7 @@ static void CrashRecoverySignalHandler(int Signal) {
}
void CrashRecoveryContext::Enable() {
- sys::ScopedLock L(gCrashRecoveryContexMutex);
+ sys::ScopedLock L(*gCrashRecoveryContextMutex);
if (gCrashRecoveryEnabled)
return;
@@ -286,7 +287,7 @@ void CrashRecoveryContext::Enable() {
}
void CrashRecoveryContext::Disable() {
- sys::ScopedLock L(gCrashRecoveryContexMutex);
+ sys::ScopedLock L(*gCrashRecoveryContextMutex);
if (!gCrashRecoveryEnabled)
return;
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index 8a80139..c000b63 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+
using namespace llvm;
using namespace dwarf;
@@ -59,8 +61,8 @@ const char *llvm::dwarf::TagString(unsigned Tag) {
case DW_TAG_namelist_item: return "DW_TAG_namelist_item";
case DW_TAG_packed_type: return "DW_TAG_packed_type";
case DW_TAG_subprogram: return "DW_TAG_subprogram";
- case DW_TAG_template_type_parameter: return "DW_TAG_template_type_parameter";
- case DW_TAG_template_value_parameter:return "DW_TAG_template_value_parameter";
+ case DW_TAG_template_type_parameter: return "DW_TAG_template_type_parameter";
+ case DW_TAG_template_value_parameter: return "DW_TAG_template_value_parameter";
case DW_TAG_thrown_type: return "DW_TAG_thrown_type";
case DW_TAG_try_block: return "DW_TAG_try_block";
case DW_TAG_variant_part: return "DW_TAG_variant_part";
@@ -454,10 +456,11 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
case DW_OP_bit_piece: return "DW_OP_bit_piece";
case DW_OP_implicit_value: return "DW_OP_implicit_value";
case DW_OP_stack_value: return "DW_OP_stack_value";
- case DW_OP_lo_user: return "DW_OP_lo_user";
- case DW_OP_hi_user: return "DW_OP_hi_user";
- // DWARF5 Fission Proposal Op Extensions
+ // GNU thread-local storage
+ case DW_OP_GNU_push_tls_address: return "DW_OP_GNU_push_tls_address";
+
+ // DWARF5 Fission Proposal Op Extensions
case DW_OP_GNU_addr_index: return "DW_OP_GNU_addr_index";
case DW_OP_GNU_const_index: return "DW_OP_GNU_const_index";
}
@@ -723,3 +726,51 @@ const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
}
return 0;
}
+
+const char *llvm::dwarf::AtomTypeString(unsigned AT) {
+ switch (AT) {
+ case dwarf::DW_ATOM_null:
+ return "DW_ATOM_null";
+ case dwarf::DW_ATOM_die_offset:
+ return "DW_ATOM_die_offset";
+ case DW_ATOM_cu_offset:
+ return "DW_ATOM_cu_offset";
+ case DW_ATOM_die_tag:
+ return "DW_ATOM_die_tag";
+ case DW_ATOM_type_flags:
+ return "DW_ATOM_type_flags";
+ }
+ return 0;
+}
+
+const char *llvm::dwarf::GDBIndexEntryKindString(GDBIndexEntryKind Kind) {
+ switch (Kind) {
+ case GIEK_NONE:
+ return "NONE";
+ case GIEK_TYPE:
+ return "TYPE";
+ case GIEK_VARIABLE:
+ return "VARIABLE";
+ case GIEK_FUNCTION:
+ return "FUNCTION";
+ case GIEK_OTHER:
+ return "OTHER";
+ case GIEK_UNUSED5:
+ return "UNUSED5";
+ case GIEK_UNUSED6:
+ return "UNUSED6";
+ case GIEK_UNUSED7:
+ return "UNUSED7";
+ }
+ llvm_unreachable("Unknown GDBIndexEntryKind value");
+}
+
+const char *llvm::dwarf::GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage) {
+ switch (Linkage) {
+ case GIEL_EXTERNAL:
+ return "EXTERNAL";
+ case GIEL_STATIC:
+ return "STATIC";
+ }
+ llvm_unreachable("Unknown GDBIndexEntryLinkage value");
+}
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index f14cb45..a825c68 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -14,39 +14,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Mutex.h"
+#include "llvm-c/Support.h"
#include <cstdio>
#include <cstring>
// Collection of symbol name/value pairs to be searched prior to any libraries.
-static llvm::StringMap<void *> *ExplicitSymbols = 0;
-
-namespace {
-
-struct ExplicitSymbolsDeleter {
- ~ExplicitSymbolsDeleter() {
- delete ExplicitSymbols;
- }
-};
-
-}
-
-static ExplicitSymbolsDeleter Dummy;
-
-
-static llvm::sys::SmartMutex<true>& getMutex() {
- static llvm::sys::SmartMutex<true> HandlesMutex;
- return HandlesMutex;
-}
+static llvm::ManagedStatic<llvm::StringMap<void *> > ExplicitSymbols;
+static llvm::ManagedStatic<llvm::sys::SmartMutex<true> > SymbolsMutex;
void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName,
void *symbolValue) {
- SmartScopedLock<true> lock(getMutex());
- if (ExplicitSymbols == 0)
- ExplicitSymbols = new StringMap<void*>();
+ SmartScopedLock<true> lock(*SymbolsMutex);
(*ExplicitSymbols)[symbolName] = symbolValue;
}
@@ -72,7 +55,7 @@ static DenseSet<void *> *OpenedHandles = 0;
DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
std::string *errMsg) {
- SmartScopedLock<true> lock(getMutex());
+ SmartScopedLock<true> lock(*SymbolsMutex);
void *handle = dlopen(filename, RTLD_LAZY|RTLD_GLOBAL);
if (handle == 0) {
@@ -126,10 +109,10 @@ void *SearchForAddressOfSpecialSymbol(const char* symbolName);
}
void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
- SmartScopedLock<true> Lock(getMutex());
+ SmartScopedLock<true> Lock(*SymbolsMutex);
// First check symbols added via AddSymbol().
- if (ExplicitSymbols) {
+ if (ExplicitSymbols.isConstructed()) {
StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName);
if (i != ExplicitSymbols->end())
@@ -187,3 +170,11 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
}
#endif // LLVM_ON_WIN32
+
+//===----------------------------------------------------------------------===//
+// C API.
+//===----------------------------------------------------------------------===//
+
+LLVMBool LLVMLoadLibraryPermanently(const char* Filename) {
+ return llvm::sys::DynamicLibrary::LoadLibraryPermanently(Filename);
+}
diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp
index ed17f60..1eefa3e 100644
--- a/lib/Support/Errno.cpp
+++ b/lib/Support/Errno.cpp
@@ -39,28 +39,27 @@ std::string StrError(int errnum) {
char buffer[MaxErrStrLen];
buffer[0] = '\0';
std::string str;
+ if (errnum == 0)
+ return str;
+
#ifdef HAVE_STRERROR_R
// strerror_r is thread-safe.
- if (errnum)
-# if defined(__GLIBC__) && defined(_GNU_SOURCE)
- // glibc defines its own incompatible version of strerror_r
- // which may not use the buffer supplied.
- str = strerror_r(errnum,buffer,MaxErrStrLen-1);
-# else
- strerror_r(errnum,buffer,MaxErrStrLen-1);
- str = buffer;
-# endif
+#if defined(__GLIBC__) && defined(_GNU_SOURCE)
+ // glibc defines its own incompatible version of strerror_r
+ // which may not use the buffer supplied.
+ str = strerror_r(errnum, buffer, MaxErrStrLen - 1);
+#else
+ strerror_r(errnum, buffer, MaxErrStrLen - 1);
+ str = buffer;
+#endif
#elif HAVE_DECL_STRERROR_S // "Windows Secure API"
- if (errnum) {
- strerror_s(buffer, MaxErrStrLen - 1, errnum);
- str = buffer;
- }
+ strerror_s(buffer, MaxErrStrLen - 1, errnum);
+ str = buffer;
#elif defined(HAVE_STRERROR)
// Copy the thread un-safe result of strerror into
// the buffer as fast as possible to minimize impact
// of collision of strerror in multiple threads.
- if (errnum)
- str = strerror(errnum);
+ str = strerror(errnum);
#else
// Strange that this system doesn't even have strerror
// but, oh well, just use a generic message
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 9425445..1eafb96 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -20,6 +20,7 @@
#include "llvm/Support/Signals.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm-c/Core.h"
#include <cassert>
#include <cstdlib>
@@ -102,3 +103,19 @@ void llvm::llvm_unreachable_internal(const char *msg, const char *file,
LLVM_BUILTIN_UNREACHABLE;
#endif
}
+
+static void bindingsErrorHandler(void *user_data, const std::string& reason,
+ bool gen_crash_diag) {
+ LLVMFatalErrorHandler handler =
+ LLVM_EXTENSION reinterpret_cast<LLVMFatalErrorHandler>(user_data);
+ handler(reason.c_str());
+}
+
+void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler) {
+ install_fatal_error_handler(bindingsErrorHandler,
+ LLVM_EXTENSION reinterpret_cast<void *>(Handler));
+}
+
+void LLVMResetFatalErrorHandler() {
+ remove_fatal_error_handler();
+}
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index 7a9400d..85be415 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -219,5 +219,8 @@ void llvm::DisplayGraph(StringRef FilenameRef, bool wait,
errs() << "Running 'dotty' program... ";
if (!ExecGraphViewer(dotty, args, Filename, wait, ErrMsg))
return;
+#else
+ (void)Filename;
+ (void)ErrMsg;
#endif
}
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 90e4389..6e9a5c9 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -52,8 +52,54 @@ using namespace llvm;
/// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
/// specified arguments. If we can't run cpuid on the host, return true.
-static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
- unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+ unsigned *rECX, unsigned *rEDX) {
+#if defined(__GNUC__) || defined(__clang__)
+ #if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+// pedantic #else returns to appease -Wunreachable-code (so we don't generate
+// postprocessed code that looks like "return true; return false;")
+ #else
+ return true;
+ #endif
+#elif defined(_MSC_VER)
+ // The MSVC intrinsic is portable across x86 and x64.
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+#else
+ return true;
+#endif
+}
+
+/// GetX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the
+/// 4 values in the specified arguments. If we can't run cpuid on the host,
+/// return true.
+bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
#if defined(__GNUC__)
// gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
@@ -64,16 +110,22 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
"=S" (*rEBX),
"=c" (*rECX),
"=d" (*rEDX)
- : "a" (value));
+ : "a" (value),
+ "c" (subleaf));
return false;
#elif defined(_MSC_VER)
- int registers[4];
- __cpuid(registers, value);
- *rEAX = registers[0];
- *rEBX = registers[1];
- *rECX = registers[2];
- *rEDX = registers[3];
- return false;
+ // __cpuidex was added in MSVC++ 9.0 SP1
+ #if (_MSC_VER > 1500) || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729)
+ int registers[4];
+ __cpuidex(registers, value, subleaf);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #else
+ return true;
+ #endif
#else
return true;
#endif
@@ -86,11 +138,13 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
"=S" (*rEBX),
"=c" (*rECX),
"=d" (*rEDX)
- : "a" (value));
+ : "a" (value),
+ "c" (subleaf));
return false;
#elif defined(_MSC_VER)
__asm {
mov eax,value
+ mov ecx,subleaf
cpuid
mov esi,rEAX
mov dword ptr [esi],eax
@@ -102,8 +156,6 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
mov dword ptr [esi],edx
}
return false;
-// pedantic #else returns to appease -Wunreachable-code (so we don't generate
-// postprocessed code that looks like "return true; return false;")
#else
return true;
#endif
@@ -148,6 +200,14 @@ std::string sys::getHostCPUName() {
unsigned Model = 0;
DetectX86FamilyModel(EAX, Family, Model);
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+
+ unsigned MaxLeaf = EAX;
bool HasSSE3 = (ECX & 0x1);
bool HasSSE41 = (ECX & 0x80000);
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
@@ -155,15 +215,12 @@ std::string sys::getHostCPUName() {
// switch, then we have full AVX support.
const unsigned AVXBits = (1 << 27) | (1 << 28);
bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport();
+ bool HasAVX2 = HasAVX && MaxLeaf >= 0x7 &&
+ !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX) &&
+ (EBX & 0x20);
GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
bool Em64T = (EDX >> 29) & 0x1;
- union {
- unsigned u[3];
- char c[12];
- } text;
-
- GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
if (memcmp(text.c, "GenuineIntel", 12) == 0) {
switch (Family) {
case 3:
@@ -271,10 +328,20 @@ std::string sys::getHostCPUName() {
// Ivy Bridge:
case 58:
+ case 62: // Ivy Bridge EP
// Not all Ivy Bridge processors support AVX (such as the Pentium
// versions instead of the i7 versions).
return HasAVX ? "core-avx-i" : "corei7";
+ // Haswell:
+ case 60:
+ case 63:
+ case 69:
+ case 70:
+ // Not all Haswell processors support AVX too (such as the Pentium
+ // versions instead of the i7 versions).
+ return HasAVX2 ? "core-avx2" : "corei7";
+
case 28: // Most 45 nm Intel Atom processors
case 38: // 45 nm Atom Lincroft
case 39: // 32 nm Atom Medfield
@@ -282,6 +349,12 @@ std::string sys::getHostCPUName() {
case 54: // 32 nm Atom Midview
return "atom";
+ // Atom Silvermont codes from the Intel software optimization guide.
+ case 55:
+ case 74:
+ case 77:
+ return "slm";
+
default: return (Em64T) ? "x86-64" : "i686";
}
case 15: {
@@ -359,9 +432,11 @@ std::string sys::getHostCPUName() {
case 21:
if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback.
return "btver1";
- if (Model > 15 && Model <= 31)
- return "bdver2";
- return "bdver1";
+ if (Model >= 0x30)
+ return "bdver3"; // 30h-3Fh: Steamroller
+ if (Model >= 0x10)
+ return "bdver2"; // 10h-1Fh: Piledriver
+ return "bdver1"; // 00h-0Fh: Bulldozer
case 22:
if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback.
return "btver1";
@@ -546,6 +621,48 @@ std::string sys::getHostCPUName() {
return "generic";
}
+#elif defined(__linux__) && defined(__s390x__)
+std::string sys::getHostCPUName() {
+ // STIDP is a privileged operation, so use /proc/cpuinfo instead.
+ // Note: We cannot mmap /proc/cpuinfo here and then process the resulting
+ // memory buffer because the 'file' has 0 size (it can be read from only
+ // as a stream).
+
+ std::string Err;
+ DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err);
+ if (!DS) {
+ DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << Err << "\n");
+ return "generic";
+ }
+
+ // The "processor 0:" line comes after a fair amount of other information,
+ // including a cache breakdown, but this should be plenty.
+ char buffer[2048];
+ size_t CPUInfoSize = DS->GetBytes((unsigned char*) buffer, sizeof(buffer));
+ delete DS;
+
+ StringRef Str(buffer, CPUInfoSize);
+ SmallVector<StringRef, 32> Lines;
+ Str.split(Lines, "\n");
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
+ if (Lines[I].startswith("processor ")) {
+ size_t Pos = Lines[I].find("machine = ");
+ if (Pos != StringRef::npos) {
+ Pos += sizeof("machine = ") - 1;
+ unsigned int Id;
+ if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
+ if (Id >= 2827)
+ return "zEC12";
+ if (Id >= 2817)
+ return "z196";
+ }
+ }
+ break;
+ }
+ }
+
+ return "generic";
+}
#else
std::string sys::getHostCPUName() {
return "generic";
diff --git a/lib/Support/Locale.cpp b/lib/Support/Locale.cpp
index 17b9b6c..35ddf7f 100644
--- a/lib/Support/Locale.cpp
+++ b/lib/Support/Locale.cpp
@@ -1,10 +1,31 @@
#include "llvm/Support/Locale.h"
-#include "llvm/Config/config.h"
+#include "llvm/Support/Unicode.h"
-#ifdef __APPLE__
-#include "LocaleXlocale.inc"
-#elif LLVM_ON_WIN32
-#include "LocaleWindows.inc"
+namespace llvm {
+namespace sys {
+namespace locale {
+
+int columnWidth(StringRef Text) {
+#if LLVM_ON_WIN32
+ return Text.size();
#else
-#include "LocaleGeneric.inc"
+ return llvm::sys::unicode::columnWidthUTF8(Text);
#endif
+}
+
+bool isPrint(int UCS) {
+#if LLVM_ON_WIN32
+ // Restrict characters that we'll try to print to the the lower part of ASCII
+ // except for the control characters (0x20 - 0x7E). In general one can not
+ // reliably output code points U+0080 and higher using narrow character C/C++
+ // output functions in Windows, because the meaning of the upper 128 codes is
+ // determined by the active code page in the console.
+ return ' ' <= UCS && UCS <= '~';
+#else
+ return llvm::sys::unicode::isPrintable(UCS);
+#endif
+}
+
+} // namespace locale
+} // namespace sys
+} // namespace llvm
diff --git a/lib/Support/LocaleGeneric.inc b/lib/Support/LocaleGeneric.inc
deleted file mode 100644
index 3a939b8..0000000
--- a/lib/Support/LocaleGeneric.inc
+++ /dev/null
@@ -1,382 +0,0 @@
-//===- llvm/Support/LocaleGeneric.inc - Locale-dependent stuff -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements llvm::sys::locale::columnWidth and
-// llvm::sys::locale::isPrint functions for UTF-8 locales.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/ConvertUTF.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/UnicodeCharRanges.h"
-
-namespace llvm {
-namespace sys {
-namespace locale {
-
-enum ColumnWidthErrors {
- ErrorInvalidUTF8 = -2,
- ErrorNonPrintableCharacter = -1
-};
-
-/// Determines if a character is likely to be displayed correctly on the
-/// terminal. Exact implementation would have to depend on the specific
-/// terminal, so we define the semantic that should be suitable for generic case
-/// of a terminal capable to output Unicode characters.
-/// All characters from the Unicode codepoint range are considered printable
-/// except for:
-/// * C0 and C1 control character ranges;
-/// * default ignorable code points as per 5.21 of
-/// http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf
-/// * format characters (category = Cf);
-/// * surrogates (category = Cs);
-/// * unassigned characters (category = Cn).
-/// \return true if the character is considered printable.
-bool isPrint(int UCS) {
- // Sorted list of non-overlapping intervals of code points that are not
- // supposed to be printable.
- static const UnicodeCharRange NonPrintableRanges[] = {
- { 0x0000, 0x001F }, { 0x007F, 0x009F }, { 0x00AD, 0x00AD },
- { 0x034F, 0x034F }, { 0x0378, 0x0379 }, { 0x037F, 0x0383 },
- { 0x038B, 0x038B }, { 0x038D, 0x038D }, { 0x03A2, 0x03A2 },
- { 0x0528, 0x0530 }, { 0x0557, 0x0558 }, { 0x0560, 0x0560 },
- { 0x0588, 0x0588 }, { 0x058B, 0x058E }, { 0x0590, 0x0590 },
- { 0x05C8, 0x05CF }, { 0x05EB, 0x05EF }, { 0x05F5, 0x0605 },
- { 0x061C, 0x061D }, { 0x06DD, 0x06DD }, { 0x070E, 0x070F },
- { 0x074B, 0x074C }, { 0x07B2, 0x07BF }, { 0x07FB, 0x07FF },
- { 0x082E, 0x082F }, { 0x083F, 0x083F }, { 0x085C, 0x085D },
- { 0x085F, 0x089F }, { 0x08A1, 0x08A1 }, { 0x08AD, 0x08E3 },
- { 0x08FF, 0x08FF }, { 0x0978, 0x0978 }, { 0x0980, 0x0980 },
- { 0x0984, 0x0984 }, { 0x098D, 0x098E }, { 0x0991, 0x0992 },
- { 0x09A9, 0x09A9 }, { 0x09B1, 0x09B1 }, { 0x09B3, 0x09B5 },
- { 0x09BA, 0x09BB }, { 0x09C5, 0x09C6 }, { 0x09C9, 0x09CA },
- { 0x09CF, 0x09D6 }, { 0x09D8, 0x09DB }, { 0x09DE, 0x09DE },
- { 0x09E4, 0x09E5 }, { 0x09FC, 0x0A00 }, { 0x0A04, 0x0A04 },
- { 0x0A0B, 0x0A0E }, { 0x0A11, 0x0A12 }, { 0x0A29, 0x0A29 },
- { 0x0A31, 0x0A31 }, { 0x0A34, 0x0A34 }, { 0x0A37, 0x0A37 },
- { 0x0A3A, 0x0A3B }, { 0x0A3D, 0x0A3D }, { 0x0A43, 0x0A46 },
- { 0x0A49, 0x0A4A }, { 0x0A4E, 0x0A50 }, { 0x0A52, 0x0A58 },
- { 0x0A5D, 0x0A5D }, { 0x0A5F, 0x0A65 }, { 0x0A76, 0x0A80 },
- { 0x0A84, 0x0A84 }, { 0x0A8E, 0x0A8E }, { 0x0A92, 0x0A92 },
- { 0x0AA9, 0x0AA9 }, { 0x0AB1, 0x0AB1 }, { 0x0AB4, 0x0AB4 },
- { 0x0ABA, 0x0ABB }, { 0x0AC6, 0x0AC6 }, { 0x0ACA, 0x0ACA },
- { 0x0ACE, 0x0ACF }, { 0x0AD1, 0x0ADF }, { 0x0AE4, 0x0AE5 },
- { 0x0AF2, 0x0B00 }, { 0x0B04, 0x0B04 }, { 0x0B0D, 0x0B0E },
- { 0x0B11, 0x0B12 }, { 0x0B29, 0x0B29 }, { 0x0B31, 0x0B31 },
- { 0x0B34, 0x0B34 }, { 0x0B3A, 0x0B3B }, { 0x0B45, 0x0B46 },
- { 0x0B49, 0x0B4A }, { 0x0B4E, 0x0B55 }, { 0x0B58, 0x0B5B },
- { 0x0B5E, 0x0B5E }, { 0x0B64, 0x0B65 }, { 0x0B78, 0x0B81 },
- { 0x0B84, 0x0B84 }, { 0x0B8B, 0x0B8D }, { 0x0B91, 0x0B91 },
- { 0x0B96, 0x0B98 }, { 0x0B9B, 0x0B9B }, { 0x0B9D, 0x0B9D },
- { 0x0BA0, 0x0BA2 }, { 0x0BA5, 0x0BA7 }, { 0x0BAB, 0x0BAD },
- { 0x0BBA, 0x0BBD }, { 0x0BC3, 0x0BC5 }, { 0x0BC9, 0x0BC9 },
- { 0x0BCE, 0x0BCF }, { 0x0BD1, 0x0BD6 }, { 0x0BD8, 0x0BE5 },
- { 0x0BFB, 0x0C00 }, { 0x0C04, 0x0C04 }, { 0x0C0D, 0x0C0D },
- { 0x0C11, 0x0C11 }, { 0x0C29, 0x0C29 }, { 0x0C34, 0x0C34 },
- { 0x0C3A, 0x0C3C }, { 0x0C45, 0x0C45 }, { 0x0C49, 0x0C49 },
- { 0x0C4E, 0x0C54 }, { 0x0C57, 0x0C57 }, { 0x0C5A, 0x0C5F },
- { 0x0C64, 0x0C65 }, { 0x0C70, 0x0C77 }, { 0x0C80, 0x0C81 },
- { 0x0C84, 0x0C84 }, { 0x0C8D, 0x0C8D }, { 0x0C91, 0x0C91 },
- { 0x0CA9, 0x0CA9 }, { 0x0CB4, 0x0CB4 }, { 0x0CBA, 0x0CBB },
- { 0x0CC5, 0x0CC5 }, { 0x0CC9, 0x0CC9 }, { 0x0CCE, 0x0CD4 },
- { 0x0CD7, 0x0CDD }, { 0x0CDF, 0x0CDF }, { 0x0CE4, 0x0CE5 },
- { 0x0CF0, 0x0CF0 }, { 0x0CF3, 0x0D01 }, { 0x0D04, 0x0D04 },
- { 0x0D0D, 0x0D0D }, { 0x0D11, 0x0D11 }, { 0x0D3B, 0x0D3C },
- { 0x0D45, 0x0D45 }, { 0x0D49, 0x0D49 }, { 0x0D4F, 0x0D56 },
- { 0x0D58, 0x0D5F }, { 0x0D64, 0x0D65 }, { 0x0D76, 0x0D78 },
- { 0x0D80, 0x0D81 }, { 0x0D84, 0x0D84 }, { 0x0D97, 0x0D99 },
- { 0x0DB2, 0x0DB2 }, { 0x0DBC, 0x0DBC }, { 0x0DBE, 0x0DBF },
- { 0x0DC7, 0x0DC9 }, { 0x0DCB, 0x0DCE }, { 0x0DD5, 0x0DD5 },
- { 0x0DD7, 0x0DD7 }, { 0x0DE0, 0x0DF1 }, { 0x0DF5, 0x0E00 },
- { 0x0E3B, 0x0E3E }, { 0x0E5C, 0x0E80 }, { 0x0E83, 0x0E83 },
- { 0x0E85, 0x0E86 }, { 0x0E89, 0x0E89 }, { 0x0E8B, 0x0E8C },
- { 0x0E8E, 0x0E93 }, { 0x0E98, 0x0E98 }, { 0x0EA0, 0x0EA0 },
- { 0x0EA4, 0x0EA4 }, { 0x0EA6, 0x0EA6 }, { 0x0EA8, 0x0EA9 },
- { 0x0EAC, 0x0EAC }, { 0x0EBA, 0x0EBA }, { 0x0EBE, 0x0EBF },
- { 0x0EC5, 0x0EC5 }, { 0x0EC7, 0x0EC7 }, { 0x0ECE, 0x0ECF },
- { 0x0EDA, 0x0EDB }, { 0x0EE0, 0x0EFF }, { 0x0F48, 0x0F48 },
- { 0x0F6D, 0x0F70 }, { 0x0F98, 0x0F98 }, { 0x0FBD, 0x0FBD },
- { 0x0FCD, 0x0FCD }, { 0x0FDB, 0x0FFF }, { 0x10C6, 0x10C6 },
- { 0x10C8, 0x10CC }, { 0x10CE, 0x10CF }, { 0x115F, 0x1160 },
- { 0x1249, 0x1249 }, { 0x124E, 0x124F }, { 0x1257, 0x1257 },
- { 0x1259, 0x1259 }, { 0x125E, 0x125F }, { 0x1289, 0x1289 },
- { 0x128E, 0x128F }, { 0x12B1, 0x12B1 }, { 0x12B6, 0x12B7 },
- { 0x12BF, 0x12BF }, { 0x12C1, 0x12C1 }, { 0x12C6, 0x12C7 },
- { 0x12D7, 0x12D7 }, { 0x1311, 0x1311 }, { 0x1316, 0x1317 },
- { 0x135B, 0x135C }, { 0x137D, 0x137F }, { 0x139A, 0x139F },
- { 0x13F5, 0x13FF }, { 0x169D, 0x169F }, { 0x16F1, 0x16FF },
- { 0x170D, 0x170D }, { 0x1715, 0x171F }, { 0x1737, 0x173F },
- { 0x1754, 0x175F }, { 0x176D, 0x176D }, { 0x1771, 0x1771 },
- { 0x1774, 0x177F }, { 0x17B4, 0x17B5 }, { 0x17DE, 0x17DF },
- { 0x17EA, 0x17EF }, { 0x17FA, 0x17FF }, { 0x180B, 0x180D },
- { 0x180F, 0x180F }, { 0x181A, 0x181F }, { 0x1878, 0x187F },
- { 0x18AB, 0x18AF }, { 0x18F6, 0x18FF }, { 0x191D, 0x191F },
- { 0x192C, 0x192F }, { 0x193C, 0x193F }, { 0x1941, 0x1943 },
- { 0x196E, 0x196F }, { 0x1975, 0x197F }, { 0x19AC, 0x19AF },
- { 0x19CA, 0x19CF }, { 0x19DB, 0x19DD }, { 0x1A1C, 0x1A1D },
- { 0x1A5F, 0x1A5F }, { 0x1A7D, 0x1A7E }, { 0x1A8A, 0x1A8F },
- { 0x1A9A, 0x1A9F }, { 0x1AAE, 0x1AFF }, { 0x1B4C, 0x1B4F },
- { 0x1B7D, 0x1B7F }, { 0x1BF4, 0x1BFB }, { 0x1C38, 0x1C3A },
- { 0x1C4A, 0x1C4C }, { 0x1C80, 0x1CBF }, { 0x1CC8, 0x1CCF },
- { 0x1CF7, 0x1CFF }, { 0x1DE7, 0x1DFB }, { 0x1F16, 0x1F17 },
- { 0x1F1E, 0x1F1F }, { 0x1F46, 0x1F47 }, { 0x1F4E, 0x1F4F },
- { 0x1F58, 0x1F58 }, { 0x1F5A, 0x1F5A }, { 0x1F5C, 0x1F5C },
- { 0x1F5E, 0x1F5E }, { 0x1F7E, 0x1F7F }, { 0x1FB5, 0x1FB5 },
- { 0x1FC5, 0x1FC5 }, { 0x1FD4, 0x1FD5 }, { 0x1FDC, 0x1FDC },
- { 0x1FF0, 0x1FF1 }, { 0x1FF5, 0x1FF5 }, { 0x1FFF, 0x1FFF },
- { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x206F },
- { 0x2072, 0x2073 }, { 0x208F, 0x208F }, { 0x209D, 0x209F },
- { 0x20BB, 0x20CF }, { 0x20F1, 0x20FF }, { 0x218A, 0x218F },
- { 0x23F4, 0x23FF }, { 0x2427, 0x243F }, { 0x244B, 0x245F },
- { 0x2700, 0x2700 }, { 0x2B4D, 0x2B4F }, { 0x2B5A, 0x2BFF },
- { 0x2C2F, 0x2C2F }, { 0x2C5F, 0x2C5F }, { 0x2CF4, 0x2CF8 },
- { 0x2D26, 0x2D26 }, { 0x2D28, 0x2D2C }, { 0x2D2E, 0x2D2F },
- { 0x2D68, 0x2D6E }, { 0x2D71, 0x2D7E }, { 0x2D97, 0x2D9F },
- { 0x2DA7, 0x2DA7 }, { 0x2DAF, 0x2DAF }, { 0x2DB7, 0x2DB7 },
- { 0x2DBF, 0x2DBF }, { 0x2DC7, 0x2DC7 }, { 0x2DCF, 0x2DCF },
- { 0x2DD7, 0x2DD7 }, { 0x2DDF, 0x2DDF }, { 0x2E3C, 0x2E7F },
- { 0x2E9A, 0x2E9A }, { 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF },
- { 0x2FFC, 0x2FFF }, { 0x3040, 0x3040 }, { 0x3097, 0x3098 },
- { 0x3100, 0x3104 }, { 0x312E, 0x3130 }, { 0x3164, 0x3164 },
- { 0x318F, 0x318F }, { 0x31BB, 0x31BF }, { 0x31E4, 0x31EF },
- { 0x321F, 0x321F }, { 0x32FF, 0x32FF }, { 0x4DB6, 0x4DBF },
- { 0x9FCD, 0x9FFF }, { 0xA48D, 0xA48F }, { 0xA4C7, 0xA4CF },
- { 0xA62C, 0xA63F }, { 0xA698, 0xA69E }, { 0xA6F8, 0xA6FF },
- { 0xA78F, 0xA78F }, { 0xA794, 0xA79F }, { 0xA7AB, 0xA7F7 },
- { 0xA82C, 0xA82F }, { 0xA83A, 0xA83F }, { 0xA878, 0xA87F },
- { 0xA8C5, 0xA8CD }, { 0xA8DA, 0xA8DF }, { 0xA8FC, 0xA8FF },
- { 0xA954, 0xA95E }, { 0xA97D, 0xA97F }, { 0xA9CE, 0xA9CE },
- { 0xA9DA, 0xA9DD }, { 0xA9E0, 0xA9FF }, { 0xAA37, 0xAA3F },
- { 0xAA4E, 0xAA4F }, { 0xAA5A, 0xAA5B }, { 0xAA7C, 0xAA7F },
- { 0xAAC3, 0xAADA }, { 0xAAF7, 0xAB00 }, { 0xAB07, 0xAB08 },
- { 0xAB0F, 0xAB10 }, { 0xAB17, 0xAB1F }, { 0xAB27, 0xAB27 },
- { 0xAB2F, 0xABBF }, { 0xABEE, 0xABEF }, { 0xABFA, 0xABFF },
- { 0xD7A4, 0xD7AF }, { 0xD7C7, 0xD7CA }, { 0xD7FC, 0xDFFF },
- { 0xFA6E, 0xFA6F }, { 0xFADA, 0xFAFF }, { 0xFB07, 0xFB12 },
- { 0xFB18, 0xFB1C }, { 0xFB37, 0xFB37 }, { 0xFB3D, 0xFB3D },
- { 0xFB3F, 0xFB3F }, { 0xFB42, 0xFB42 }, { 0xFB45, 0xFB45 },
- { 0xFBC2, 0xFBD2 }, { 0xFD40, 0xFD4F }, { 0xFD90, 0xFD91 },
- { 0xFDC8, 0xFDEF }, { 0xFDFE, 0xFE0F }, { 0xFE1A, 0xFE1F },
- { 0xFE27, 0xFE2F }, { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 },
- { 0xFE6C, 0xFE6F }, { 0xFE75, 0xFE75 }, { 0xFEFD, 0xFEFF },
- { 0xFF00, 0xFF00 }, { 0xFFA0, 0xFFA0 }, { 0xFFBF, 0xFFC1 },
- { 0xFFC8, 0xFFC9 }, { 0xFFD0, 0xFFD1 }, { 0xFFD8, 0xFFD9 },
- { 0xFFDD, 0xFFDF }, { 0xFFE7, 0xFFE7 }, { 0xFFEF, 0xFFFB },
- { 0xFFFE, 0xFFFF }, { 0x1000C, 0x1000C }, { 0x10027, 0x10027 },
- { 0x1003B, 0x1003B }, { 0x1003E, 0x1003E }, { 0x1004E, 0x1004F },
- { 0x1005E, 0x1007F }, { 0x100FB, 0x100FF }, { 0x10103, 0x10106 },
- { 0x10134, 0x10136 }, { 0x1018B, 0x1018F }, { 0x1019C, 0x101CF },
- { 0x101FE, 0x1027F }, { 0x1029D, 0x1029F }, { 0x102D1, 0x102FF },
- { 0x1031F, 0x1031F }, { 0x10324, 0x1032F }, { 0x1034B, 0x1037F },
- { 0x1039E, 0x1039E }, { 0x103C4, 0x103C7 }, { 0x103D6, 0x103FF },
- { 0x1049E, 0x1049F }, { 0x104AA, 0x107FF }, { 0x10806, 0x10807 },
- { 0x10809, 0x10809 }, { 0x10836, 0x10836 }, { 0x10839, 0x1083B },
- { 0x1083D, 0x1083E }, { 0x10856, 0x10856 }, { 0x10860, 0x108FF },
- { 0x1091C, 0x1091E }, { 0x1093A, 0x1093E }, { 0x10940, 0x1097F },
- { 0x109B8, 0x109BD }, { 0x109C0, 0x109FF }, { 0x10A04, 0x10A04 },
- { 0x10A07, 0x10A0B }, { 0x10A14, 0x10A14 }, { 0x10A18, 0x10A18 },
- { 0x10A34, 0x10A37 }, { 0x10A3B, 0x10A3E }, { 0x10A48, 0x10A4F },
- { 0x10A59, 0x10A5F }, { 0x10A80, 0x10AFF }, { 0x10B36, 0x10B38 },
- { 0x10B56, 0x10B57 }, { 0x10B73, 0x10B77 }, { 0x10B80, 0x10BFF },
- { 0x10C49, 0x10E5F }, { 0x10E7F, 0x10FFF }, { 0x1104E, 0x11051 },
- { 0x11070, 0x1107F }, { 0x110BD, 0x110BD }, { 0x110C2, 0x110CF },
- { 0x110E9, 0x110EF }, { 0x110FA, 0x110FF }, { 0x11135, 0x11135 },
- { 0x11144, 0x1117F }, { 0x111C9, 0x111CF }, { 0x111DA, 0x1167F },
- { 0x116B8, 0x116BF }, { 0x116CA, 0x11FFF }, { 0x1236F, 0x123FF },
- { 0x12463, 0x1246F }, { 0x12474, 0x12FFF }, { 0x1342F, 0x167FF },
- { 0x16A39, 0x16EFF }, { 0x16F45, 0x16F4F }, { 0x16F7F, 0x16F8E },
- { 0x16FA0, 0x1AFFF }, { 0x1B002, 0x1CFFF }, { 0x1D0F6, 0x1D0FF },
- { 0x1D127, 0x1D128 }, { 0x1D173, 0x1D17A }, { 0x1D1DE, 0x1D1FF },
- { 0x1D246, 0x1D2FF }, { 0x1D357, 0x1D35F }, { 0x1D372, 0x1D3FF },
- { 0x1D455, 0x1D455 }, { 0x1D49D, 0x1D49D }, { 0x1D4A0, 0x1D4A1 },
- { 0x1D4A3, 0x1D4A4 }, { 0x1D4A7, 0x1D4A8 }, { 0x1D4AD, 0x1D4AD },
- { 0x1D4BA, 0x1D4BA }, { 0x1D4BC, 0x1D4BC }, { 0x1D4C4, 0x1D4C4 },
- { 0x1D506, 0x1D506 }, { 0x1D50B, 0x1D50C }, { 0x1D515, 0x1D515 },
- { 0x1D51D, 0x1D51D }, { 0x1D53A, 0x1D53A }, { 0x1D53F, 0x1D53F },
- { 0x1D545, 0x1D545 }, { 0x1D547, 0x1D549 }, { 0x1D551, 0x1D551 },
- { 0x1D6A6, 0x1D6A7 }, { 0x1D7CC, 0x1D7CD }, { 0x1D800, 0x1EDFF },
- { 0x1EE04, 0x1EE04 }, { 0x1EE20, 0x1EE20 }, { 0x1EE23, 0x1EE23 },
- { 0x1EE25, 0x1EE26 }, { 0x1EE28, 0x1EE28 }, { 0x1EE33, 0x1EE33 },
- { 0x1EE38, 0x1EE38 }, { 0x1EE3A, 0x1EE3A }, { 0x1EE3C, 0x1EE41 },
- { 0x1EE43, 0x1EE46 }, { 0x1EE48, 0x1EE48 }, { 0x1EE4A, 0x1EE4A },
- { 0x1EE4C, 0x1EE4C }, { 0x1EE50, 0x1EE50 }, { 0x1EE53, 0x1EE53 },
- { 0x1EE55, 0x1EE56 }, { 0x1EE58, 0x1EE58 }, { 0x1EE5A, 0x1EE5A },
- { 0x1EE5C, 0x1EE5C }, { 0x1EE5E, 0x1EE5E }, { 0x1EE60, 0x1EE60 },
- { 0x1EE63, 0x1EE63 }, { 0x1EE65, 0x1EE66 }, { 0x1EE6B, 0x1EE6B },
- { 0x1EE73, 0x1EE73 }, { 0x1EE78, 0x1EE78 }, { 0x1EE7D, 0x1EE7D },
- { 0x1EE7F, 0x1EE7F }, { 0x1EE8A, 0x1EE8A }, { 0x1EE9C, 0x1EEA0 },
- { 0x1EEA4, 0x1EEA4 }, { 0x1EEAA, 0x1EEAA }, { 0x1EEBC, 0x1EEEF },
- { 0x1EEF2, 0x1EFFF }, { 0x1F02C, 0x1F02F }, { 0x1F094, 0x1F09F },
- { 0x1F0AF, 0x1F0B0 }, { 0x1F0BF, 0x1F0C0 }, { 0x1F0D0, 0x1F0D0 },
- { 0x1F0E0, 0x1F0FF }, { 0x1F10B, 0x1F10F }, { 0x1F12F, 0x1F12F },
- { 0x1F16C, 0x1F16F }, { 0x1F19B, 0x1F1E5 }, { 0x1F203, 0x1F20F },
- { 0x1F23B, 0x1F23F }, { 0x1F249, 0x1F24F }, { 0x1F252, 0x1F2FF },
- { 0x1F321, 0x1F32F }, { 0x1F336, 0x1F336 }, { 0x1F37D, 0x1F37F },
- { 0x1F394, 0x1F39F }, { 0x1F3C5, 0x1F3C5 }, { 0x1F3CB, 0x1F3DF },
- { 0x1F3F1, 0x1F3FF }, { 0x1F43F, 0x1F43F }, { 0x1F441, 0x1F441 },
- { 0x1F4F8, 0x1F4F8 }, { 0x1F4FD, 0x1F4FF }, { 0x1F53E, 0x1F53F },
- { 0x1F544, 0x1F54F }, { 0x1F568, 0x1F5FA }, { 0x1F641, 0x1F644 },
- { 0x1F650, 0x1F67F }, { 0x1F6C6, 0x1F6FF }, { 0x1F774, 0x1FFFF },
- { 0x2A6D7, 0x2A6FF }, { 0x2B735, 0x2B73F }, { 0x2B81E, 0x2F7FF },
- { 0x2FA1E, 0xF0000 }, { 0xFFFFE, 0xFFFFF }, { 0x10FFFE, 0x10FFFF }
- };
-
- return UCS >= 0 && UCS <= 0x10FFFF && !isCharInSet(UCS, NonPrintableRanges);
-}
-
-/// Gets the number of positions a character is likely to occupy when output
-/// on a terminal ("character width"). This depends on the implementation of the
-/// terminal, and there's no standard definition of character width.
-/// The implementation defines it in a way that is expected to be compatible
-/// with a generic Unicode-capable terminal.
-/// \return Character width:
-/// * ErrorNonPrintableCharacter (-1) for non-printable characters (as
-/// identified by isPrint);
-/// * 0 for non-spacing and enclosing combining marks;
-/// * 2 for CJK characters excluding halfwidth forms;
-/// * 1 for all remaining characters.
-static inline int charWidth(int UCS)
-{
- if (!isPrint(UCS))
- return ErrorNonPrintableCharacter;
-
- // Sorted list of non-spacing and enclosing combining mark intervals as
- // defined in "3.6 Combination" of
- // http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf
- static const UnicodeCharRange CombiningCharacters[] = {
- { 0x0300, 0x036F }, { 0x0483, 0x0489 }, { 0x0591, 0x05BD },
- { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 },
- { 0x05C7, 0x05C7 }, { 0x0610, 0x061A }, { 0x064B, 0x065F },
- { 0x0670, 0x0670 }, { 0x06D6, 0x06DC }, { 0x06DF, 0x06E4 },
- { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, { 0x0711, 0x0711 },
- { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 },
- { 0x0816, 0x0819 }, { 0x081B, 0x0823 }, { 0x0825, 0x0827 },
- { 0x0829, 0x082D }, { 0x0859, 0x085B }, { 0x08E4, 0x08FE },
- { 0x0900, 0x0902 }, { 0x093A, 0x093A }, { 0x093C, 0x093C },
- { 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0957 },
- { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC },
- { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 },
- { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 },
- { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A51, 0x0A51 },
- { 0x0A70, 0x0A71 }, { 0x0A75, 0x0A75 }, { 0x0A81, 0x0A82 },
- { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
- { 0x0ACD, 0x0ACD }, { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 },
- { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B44 },
- { 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B62, 0x0B63 },
- { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD },
- { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D },
- { 0x0C55, 0x0C56 }, { 0x0C62, 0x0C63 }, { 0x0CBC, 0x0CBC },
- { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
- { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D44 }, { 0x0D4D, 0x0D4D },
- { 0x0D62, 0x0D63 }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
- { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
- { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
- { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
- { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
- { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
- { 0x0F8D, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
- { 0x102D, 0x1030 }, { 0x1032, 0x1037 }, { 0x1039, 0x103A },
- { 0x103D, 0x103E }, { 0x1058, 0x1059 }, { 0x105E, 0x1060 },
- { 0x1071, 0x1074 }, { 0x1082, 0x1082 }, { 0x1085, 0x1086 },
- { 0x108D, 0x108D }, { 0x109D, 0x109D }, { 0x135D, 0x135F },
- { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
- { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
- { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
- { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
- { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
- { 0x1A17, 0x1A18 }, { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E },
- { 0x1A60, 0x1A60 }, { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C },
- { 0x1A73, 0x1A7C }, { 0x1A7F, 0x1A7F }, { 0x1B00, 0x1B03 },
- { 0x1B34, 0x1B34 }, { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C },
- { 0x1B42, 0x1B42 }, { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B81 },
- { 0x1BA2, 0x1BA5 }, { 0x1BA8, 0x1BA9 }, { 0x1BAB, 0x1BAB },
- { 0x1BE6, 0x1BE6 }, { 0x1BE8, 0x1BE9 }, { 0x1BED, 0x1BED },
- { 0x1BEF, 0x1BF1 }, { 0x1C2C, 0x1C33 }, { 0x1C36, 0x1C37 },
- { 0x1CD0, 0x1CD2 }, { 0x1CD4, 0x1CE0 }, { 0x1CE2, 0x1CE8 },
- { 0x1CED, 0x1CED }, { 0x1CF4, 0x1CF4 }, { 0x1DC0, 0x1DE6 },
- { 0x1DFC, 0x1DFF }, { 0x20D0, 0x20F0 }, { 0x2CEF, 0x2CF1 },
- { 0x2D7F, 0x2D7F }, { 0x2DE0, 0x2DFF }, { 0x302A, 0x302D },
- { 0x3099, 0x309A }, { 0xA66F, 0xA672 }, { 0xA674, 0xA67D },
- { 0xA69F, 0xA69F }, { 0xA6F0, 0xA6F1 }, { 0xA802, 0xA802 },
- { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 },
- { 0xA8C4, 0xA8C4 }, { 0xA8E0, 0xA8F1 }, { 0xA926, 0xA92D },
- { 0xA947, 0xA951 }, { 0xA980, 0xA982 }, { 0xA9B3, 0xA9B3 },
- { 0xA9B6, 0xA9B9 }, { 0xA9BC, 0xA9BC }, { 0xAA29, 0xAA2E },
- { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 }, { 0xAA43, 0xAA43 },
- { 0xAA4C, 0xAA4C }, { 0xAAB0, 0xAAB0 }, { 0xAAB2, 0xAAB4 },
- { 0xAAB7, 0xAAB8 }, { 0xAABE, 0xAABF }, { 0xAAC1, 0xAAC1 },
- { 0xAAEC, 0xAAED }, { 0xAAF6, 0xAAF6 }, { 0xABE5, 0xABE5 },
- { 0xABE8, 0xABE8 }, { 0xABED, 0xABED }, { 0xFB1E, 0xFB1E },
- { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE26 }, { 0x101FD, 0x101FD },
- { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
- { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x11001, 0x11001 },
- { 0x11038, 0x11046 }, { 0x11080, 0x11081 }, { 0x110B3, 0x110B6 },
- { 0x110B9, 0x110BA }, { 0x11100, 0x11102 }, { 0x11127, 0x1112B },
- { 0x1112D, 0x11134 }, { 0x11180, 0x11181 }, { 0x111B6, 0x111BE },
- { 0x116AB, 0x116AB }, { 0x116AD, 0x116AD }, { 0x116B0, 0x116B5 },
- { 0x116B7, 0x116B7 }, { 0x16F8F, 0x16F92 }, { 0x1D167, 0x1D169 },
- { 0x1D17B, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
- { 0x1D242, 0x1D244 }, { 0xE0100, 0xE01EF },
- };
-
- if (isCharInSet(UCS, CombiningCharacters))
- return 0;
-
- static const UnicodeCharRange DoubleWidthCharacters[] = {
- // Hangul Jamo
- { 0x1100, 0x11FF },
- // Deprecated fullwidth angle brackets
- { 0x2329, 0x232A },
- // CJK Misc, CJK Unified Ideographs, Yijing Hexagrams, Yi
- // excluding U+303F (IDEOGRAPHIC HALF FILL SPACE)
- { 0x2E80, 0x303E }, { 0x3040, 0xA4CF },
- // Hangul
- { 0xAC00, 0xD7A3 }, { 0xD7B0, 0xD7C6 }, { 0xD7CB, 0xD7FB },
- // CJK Unified Ideographs
- { 0xF900, 0xFAFF },
- // Vertical forms
- { 0xFE10, 0xFE19 },
- // CJK Compatibility Forms + Small Form Variants
- { 0xFE30, 0xFE6F },
- // Fullwidth forms
- { 0xFF01, 0xFF60 }, { 0xFFE0, 0xFFE6 },
- // CJK Unified Ideographs
- { 0x20000, 0x2A6DF }, { 0x2A700, 0x2B81F }, { 0x2F800, 0x2FA1F }
- };
-
- if (isCharInSet(UCS, DoubleWidthCharacters))
- return 2;
- return 1;
-}
-
-int columnWidth(StringRef Text) {
- unsigned ColumnWidth = 0;
- unsigned Length;
- for (size_t i = 0, e = Text.size(); i < e; i += Length) {
- Length = getNumBytesForUTF8(Text[i]);
- if (Length <= 0 || i + Length > Text.size())
- return ErrorInvalidUTF8;
- UTF32 buf[1];
- const UTF8 *Start = reinterpret_cast<const UTF8 *>(Text.data() + i);
- UTF32 *Target = &buf[0];
- if (conversionOK != ConvertUTF8toUTF32(&Start, Start + Length, &Target,
- Target + 1, strictConversion))
- return ErrorInvalidUTF8;
- int Width = charWidth(buf[0]);
- if (Width < 0)
- return ErrorNonPrintableCharacter;
- ColumnWidth += Width;
- }
- return ColumnWidth;
-}
-
-}
-}
-}
diff --git a/lib/Support/LocaleWindows.inc b/lib/Support/LocaleWindows.inc
deleted file mode 100644
index 28e429c..0000000
--- a/lib/Support/LocaleWindows.inc
+++ /dev/null
@@ -1,15 +0,0 @@
-namespace llvm {
-namespace sys {
-namespace locale {
-
-int columnWidth(StringRef s) {
- return s.size();
-}
-
-bool isPrint(int c) {
- return ' ' <= c && c <= '~';
-}
-
-}
-}
-}
diff --git a/lib/Support/LocaleXlocale.inc b/lib/Support/LocaleXlocale.inc
deleted file mode 100644
index 389fe3d..0000000
--- a/lib/Support/LocaleXlocale.inc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ManagedStatic.h"
-#include <cassert>
-#include <xlocale.h>
-
-
-namespace {
- struct locale_holder {
- locale_holder()
- : l(newlocale(LC_CTYPE_MASK,"en_US.UTF-8",LC_GLOBAL_LOCALE))
- {
- assert(NULL!=l);
- }
- ~locale_holder() {
- freelocale(l);
- }
-
- int mbswidth(llvm::SmallString<16> s) const {
- // this implementation assumes no '\0' in s
- assert(s.size()==strlen(s.c_str()));
-
- size_t size = mbstowcs_l(NULL,s.c_str(),0,l);
- assert(size!=(size_t)-1);
- if (size==0)
- return 0;
- llvm::SmallVector<wchar_t,200> ws(size);
- size = mbstowcs_l(&ws[0],s.c_str(),ws.size(),l);
- assert(ws.size()==size);
- return wcswidth_l(&ws[0],ws.size(),l);
- }
-
- int isprint(int c) const {
- return iswprint_l(c,l);
- }
-
- private:
-
- locale_t l;
- };
-
- llvm::ManagedStatic<locale_holder> l;
-}
-
-namespace llvm {
-namespace sys {
-namespace locale {
-
-int columnWidth(StringRef s) {
- int width = l->mbswidth(s);
- assert(width>=0);
- return width;
-}
-
-bool isPrint(int c) {
- return l->isprint(c);
-}
-
-}
-}
-}
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index cab45c7..dcd5529 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -177,7 +177,7 @@ error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename,
//===----------------------------------------------------------------------===//
namespace {
-/// \brief Memorry maps a file descriptor using sys::fs::mapped_file_region.
+/// \brief Memory maps a file descriptor using sys::fs::mapped_file_region.
///
/// This handles converting the offset into a legal offset on the platform.
class MemoryBufferMMapFile : public MemoryBuffer {
@@ -217,7 +217,7 @@ public:
};
}
-static error_code getMemoryBufferForStream(int FD,
+static error_code getMemoryBufferForStream(int FD,
StringRef BufferName,
OwningPtr<MemoryBuffer> &result) {
const ssize_t ChunkSize = 4096*4;
@@ -238,14 +238,19 @@ static error_code getMemoryBufferForStream(int FD,
return error_code::success();
}
-error_code MemoryBuffer::getFile(StringRef Filename,
+static error_code getFileAux(const char *Filename,
+ OwningPtr<MemoryBuffer> &result, int64_t FileSize,
+ bool RequiresNullTerminator);
+
+error_code MemoryBuffer::getFile(Twine Filename,
OwningPtr<MemoryBuffer> &result,
int64_t FileSize,
bool RequiresNullTerminator) {
// Ensure the path is null terminated.
- SmallString<256> PathBuf(Filename.begin(), Filename.end());
- return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize,
- RequiresNullTerminator);
+ SmallString<256> PathBuf;
+ StringRef NullTerminatedName = Filename.toNullTerminatedStringRef(PathBuf);
+ return getFileAux(NullTerminatedName.data(), result, FileSize,
+ RequiresNullTerminator);
}
static error_code getOpenFileImpl(int FD, const char *Filename,
@@ -253,10 +258,9 @@ static error_code getOpenFileImpl(int FD, const char *Filename,
uint64_t FileSize, uint64_t MapSize,
int64_t Offset, bool RequiresNullTerminator);
-error_code MemoryBuffer::getFile(const char *Filename,
- OwningPtr<MemoryBuffer> &result,
- int64_t FileSize,
- bool RequiresNullTerminator) {
+static error_code getFileAux(const char *Filename,
+ OwningPtr<MemoryBuffer> &result, int64_t FileSize,
+ bool RequiresNullTerminator) {
int FD;
error_code EC = sys::fs::openFileForRead(Filename, FD);
if (EC)
@@ -276,7 +280,7 @@ static bool shouldUseMmap(int FD,
int PageSize) {
// We don't use mmap for small files because this can severely fragment our
// address space.
- if (MapSize < 4096*4)
+ if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize)
return false;
if (!RequiresNullTerminator)
@@ -302,6 +306,15 @@ static bool shouldUseMmap(int FD,
if (End != FileSize)
return false;
+#if defined(_WIN32) || defined(__CYGWIN__)
+ // Don't peek the next page if file is multiple of *physical* pagesize(4k)
+ // but is not multiple of AllocationGranularity(64k),
+ // when a null terminator is required.
+ // FIXME: It's not good to hardcode 4096 here. dwPageSize shows 4096.
+ if ((FileSize & (4096 - 1)) == 0)
+ return false;
+#endif
+
// Don't try to map files that are exactly a multiple of the system page size
// if we need a null terminator.
if ((FileSize & (PageSize -1)) == 0)
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index cfd9ed6..c869b30 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -77,7 +77,7 @@ namespace {
return path.substr(0, 1);
// * {file,directory}name
- size_t end = path.find_first_of(separators, 2);
+ size_t end = path.find_first_of(separators);
return path.substr(0, end);
}
@@ -449,23 +449,18 @@ void replace_extension(SmallVectorImpl<char> &path, const Twine &extension) {
}
void native(const Twine &path, SmallVectorImpl<char> &result) {
+ assert((!path.isSingleStringRef() ||
+ path.getSingleStringRef().data() != result.data()) &&
+ "path and result are not allowed to overlap!");
// Clear result.
result.clear();
-#ifdef LLVM_ON_WIN32
- SmallString<128> path_storage;
- StringRef p = path.toStringRef(path_storage);
- result.reserve(p.size());
- for (StringRef::const_iterator i = p.begin(),
- e = p.end();
- i != e;
- ++i) {
- if (*i == '/')
- result.push_back('\\');
- else
- result.push_back(*i);
- }
-#else
path.toVector(result);
+ native(result);
+}
+
+void native(SmallVectorImpl<char> &path) {
+#ifdef LLVM_ON_WIN32
+ std::replace(path.begin(), path.end(), '/', '\\');
#endif
}
@@ -852,6 +847,21 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
if (Magic.size() < 4)
return file_magic::unknown;
switch ((unsigned char)Magic[0]) {
+ case 0x00: {
+ // COFF short import library file
+ if (Magic[1] == (char)0x00 && Magic[2] == (char)0xff &&
+ Magic[3] == (char)0xff)
+ return file_magic::coff_import_library;
+ // Windows resource file
+ const char Expected[] = { 0, 0, 0, 0, '\x20', 0, 0, 0, '\xff' };
+ if (Magic.size() >= sizeof(Expected) &&
+ memcmp(Magic.data(), Expected, sizeof(Expected)) == 0)
+ return file_magic::windows_resource;
+ // 0x0000 = COFF unknown machine type
+ if (Magic[1] == 0)
+ return file_magic::coff_object;
+ break;
+ }
case 0xDE: // 0x0B17C0DE = BC wraper
if (Magic[1] == (char)0xC0 && Magic[2] == (char)0x17 &&
Magic[3] == (char)0x0B)
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index 23ee5ab..722f4ca 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -15,10 +15,12 @@
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/ThreadLocal.h"
#include "llvm/Support/Watchdog.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm-c/Core.h"
#ifdef HAVE_CRASHREPORTERCLIENT_H
#include <CrashReporterClient.h>
@@ -26,12 +28,7 @@
using namespace llvm;
-namespace llvm {
- bool DisablePrettyStackTrace = false;
-}
-
-// FIXME: This should be thread local when llvm supports threads.
-static sys::ThreadLocal<const PrettyStackTraceEntry> PrettyStackTraceHead;
+static ManagedStatic<sys::ThreadLocal<const PrettyStackTraceEntry> > PrettyStackTraceHead;
static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
unsigned NextID = 0;
@@ -49,12 +46,12 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
/// PrintCurStackTrace - Print the current stack trace to the specified stream.
static void PrintCurStackTrace(raw_ostream &OS) {
// Don't print an empty trace.
- if (PrettyStackTraceHead.get() == 0) return;
+ if (PrettyStackTraceHead->get() == 0) return;
// If there are pretty stack frames registered, walk and emit them.
OS << "Stack dump:\n";
- PrintStack(PrettyStackTraceHead.get(), OS);
+ PrintStack(PrettyStackTraceHead->get(), OS);
OS.flush();
}
@@ -102,26 +99,28 @@ static void CrashHandler(void *) {
#endif
}
-static bool RegisterCrashPrinter() {
- if (!DisablePrettyStackTrace)
- sys::AddSignalHandler(CrashHandler, 0);
- return false;
-}
-
PrettyStackTraceEntry::PrettyStackTraceEntry() {
- // The first time this is called, we register the crash printer.
- static bool HandlerRegistered = RegisterCrashPrinter();
- (void)HandlerRegistered;
-
// Link ourselves.
- NextEntry = PrettyStackTraceHead.get();
- PrettyStackTraceHead.set(this);
+ NextEntry = PrettyStackTraceHead->get();
+ PrettyStackTraceHead->set(this);
}
PrettyStackTraceEntry::~PrettyStackTraceEntry() {
- assert(PrettyStackTraceHead.get() == this &&
+ // Do nothing if PrettyStackTraceHead is uninitialized. This can only happen
+ // if a shutdown occurred after we created the PrettyStackTraceEntry. That
+ // does occur in the following idiom:
+ //
+ // PrettyStackTraceProgram X(...);
+ // llvm_shutdown_obj Y;
+ //
+ // Without this check, we may end up removing ourselves from the stack trace
+ // after PrettyStackTraceHead has already been destroyed.
+ if (!PrettyStackTraceHead.isConstructed())
+ return;
+
+ assert(PrettyStackTraceHead->get() == this &&
"Pretty stack trace entry destruction is out of order");
- PrettyStackTraceHead.set(getNextEntry());
+ PrettyStackTraceHead->set(getNextEntry());
}
void PrettyStackTraceString::print(raw_ostream &OS) const {
@@ -135,3 +134,18 @@ void PrettyStackTraceProgram::print(raw_ostream &OS) const {
OS << ArgV[i] << ' ';
OS << '\n';
}
+
+static bool RegisterCrashPrinter() {
+ sys::AddSignalHandler(CrashHandler, 0);
+ return false;
+}
+
+void llvm::EnablePrettyStackTrace() {
+ // The first time this is called, we register the crash printer.
+ static bool HandlerRegistered = RegisterCrashPrinter();
+ (void)HandlerRegistered;
+}
+
+void LLVMEnablePrettyStackTrace() {
+ EnablePrettyStackTrace();
+}
diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp
index 2c0d37b..d5168f0 100644
--- a/lib/Support/Process.cpp
+++ b/lib/Support/Process.cpp
@@ -80,6 +80,24 @@ TimeValue self_process::get_wall_time() const {
#endif
+#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m"
+
+#define ALLCOLORS(FGBG,BOLD) {\
+ COLOR(FGBG, "0", BOLD),\
+ COLOR(FGBG, "1", BOLD),\
+ COLOR(FGBG, "2", BOLD),\
+ COLOR(FGBG, "3", BOLD),\
+ COLOR(FGBG, "4", BOLD),\
+ COLOR(FGBG, "5", BOLD),\
+ COLOR(FGBG, "6", BOLD),\
+ COLOR(FGBG, "7", BOLD)\
+ }
+
+static const char colorcodes[2][2][8][10] = {
+ { ALLCOLORS("3",""), ALLCOLORS("3","1;") },
+ { ALLCOLORS("4",""), ALLCOLORS("4","1;") }
+};
+
// Include the platform-specific parts of this class.
#ifdef LLVM_ON_UNIX
#include "Unix/Process.inc"
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index 79f7e5f..83f2ec4 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -22,30 +22,40 @@ using namespace sys;
//=== independent code.
//===----------------------------------------------------------------------===//
-static bool Execute(void **Data, StringRef Program, const char **args,
+static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
const char **env, const StringRef **Redirects,
unsigned memoryLimit, std::string *ErrMsg);
-static int Wait(void *&Data, StringRef Program, unsigned secondsToWait,
- std::string *ErrMsg);
-
int sys::ExecuteAndWait(StringRef Program, const char **args, const char **envp,
const StringRef **redirects, unsigned secondsToWait,
unsigned memoryLimit, std::string *ErrMsg,
bool *ExecutionFailed) {
- void *Data = 0;
- if (Execute(&Data, Program, args, envp, redirects, memoryLimit, ErrMsg)) {
- if (ExecutionFailed) *ExecutionFailed = false;
- return Wait(Data, Program, secondsToWait, ErrMsg);
+ ProcessInfo PI;
+ if (Execute(PI, Program, args, envp, redirects, memoryLimit, ErrMsg)) {
+ if (ExecutionFailed)
+ *ExecutionFailed = false;
+ ProcessInfo Result = Wait(PI, secondsToWait, true, ErrMsg);
+ return Result.ReturnCode;
}
- if (ExecutionFailed) *ExecutionFailed = true;
+
+ if (ExecutionFailed)
+ *ExecutionFailed = true;
+
return -1;
}
-void sys::ExecuteNoWait(StringRef Program, const char **args, const char **envp,
- const StringRef **redirects, unsigned memoryLimit,
- std::string *ErrMsg) {
- Execute(/*Data*/ 0, Program, args, envp, redirects, memoryLimit, ErrMsg);
+ProcessInfo sys::ExecuteNoWait(StringRef Program, const char **args,
+ const char **envp, const StringRef **redirects,
+ unsigned memoryLimit, std::string *ErrMsg,
+ bool *ExecutionFailed) {
+ ProcessInfo PI;
+ if (ExecutionFailed)
+ *ExecutionFailed = false;
+ if (!Execute(PI, Program, args, envp, redirects, memoryLimit, ErrMsg))
+ if (ExecutionFailed)
+ *ExecutionFailed = true;
+
+ return PI;
}
// Include the platform-specific parts of this class.
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index dec967e..5413641 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -43,7 +43,7 @@ bool Regex::isValid(std::string &Error) {
size_t len = llvm_regerror(error, preg, NULL, 0);
- Error.resize(len);
+ Error.resize(len - 1);
llvm_regerror(error, preg, &Error[0], len);
return false;
}
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index f0fed77..dd417b4 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -202,8 +202,13 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
} else if (CurArraySize != RHS.CurArraySize) {
if (isSmall())
CurArray = (const void**)malloc(sizeof(void*) * RHS.CurArraySize);
- else
- CurArray = (const void**)realloc(CurArray, sizeof(void*)*RHS.CurArraySize);
+ else {
+ const void **T = (const void**)realloc(CurArray,
+ sizeof(void*) * RHS.CurArraySize);
+ if (!T)
+ free(CurArray);
+ CurArray = T;
+ }
assert(CurArray && "Failed to allocate memory?");
}
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index 51162dd..d4b94f8 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -211,7 +211,8 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
LineStr, ColRanges, FixIts);
}
-void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
+void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
+ SourceMgr::DiagKind Kind,
const Twine &Msg, ArrayRef<SMRange> Ranges,
ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges, FixIts);
@@ -222,8 +223,6 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
return;
}
- raw_ostream &OS = errs();
-
if (Loc != SMLoc()) {
int CurBuf = FindBufferContainingLoc(Loc);
assert(CurBuf != -1 && "Invalid or unspecified location!");
@@ -233,6 +232,12 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
Diagnostic.print(0, OS, ShowColors);
}
+void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
+ const Twine &Msg, ArrayRef<SMRange> Ranges,
+ ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
+ PrintMessage(llvm::errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors);
+}
+
//===----------------------------------------------------------------------===//
// SMDiagnostic Implementation
//===----------------------------------------------------------------------===//
@@ -465,7 +470,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S,
if (FixItInsertionLine.empty())
return;
- for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i != e; ++i) {
+ for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
if (i >= LineContents.size() || LineContents[i] != '\t') {
S << FixItInsertionLine[i];
++OutCol;
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index d7a0bfa..bfae754 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -37,20 +37,39 @@ static bool ascii_isdigit(char x) {
return x >= '0' && x <= '9';
}
-/// compare_lower - Compare strings, ignoring case.
-int StringRef::compare_lower(StringRef RHS) const {
- for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
- unsigned char LHC = ascii_tolower(Data[I]);
- unsigned char RHC = ascii_tolower(RHS.Data[I]);
+// strncasecmp() is not available on non-POSIX systems, so define an
+// alternative function here.
+static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
+ for (size_t I = 0; I < Length; ++I) {
+ unsigned char LHC = ascii_tolower(LHS[I]);
+ unsigned char RHC = ascii_tolower(RHS[I]);
if (LHC != RHC)
return LHC < RHC ? -1 : 1;
}
+ return 0;
+}
+/// compare_lower - Compare strings, ignoring case.
+int StringRef::compare_lower(StringRef RHS) const {
+ if (int Res = ascii_strncasecmp(Data, RHS.Data, min(Length, RHS.Length)))
+ return Res;
if (Length == RHS.Length)
return 0;
return Length < RHS.Length ? -1 : 1;
}
+/// Check if this string starts with the given \p Prefix, ignoring case.
+bool StringRef::startswith_lower(StringRef Prefix) const {
+ return Length >= Prefix.Length &&
+ ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
+}
+
+/// Check if this string ends with the given \p Suffix, ignoring case.
+bool StringRef::endswith_lower(StringRef Suffix) const {
+ return Length >= Suffix.Length &&
+ ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
+}
+
/// compare_numeric - Compare strings, handle embedded numbers.
int StringRef::compare_numeric(StringRef RHS) const {
for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
@@ -85,7 +104,7 @@ int StringRef::compare_numeric(StringRef RHS) const {
// Compute the edit distance between the two given strings.
unsigned StringRef::edit_distance(llvm::StringRef Other,
bool AllowReplacements,
- unsigned MaxEditDistance) {
+ unsigned MaxEditDistance) const {
return llvm::ComputeEditDistance(
llvm::ArrayRef<char>(data(), size()),
llvm::ArrayRef<char>(Other.data(), Other.size()),
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index 9c81327..0c90c17 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -135,9 +135,9 @@ const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) {
return TheTarget;
}
-static int TargetArraySortFn(const void *LHS, const void *RHS) {
- typedef std::pair<StringRef, const Target*> pair_ty;
- return ((const pair_ty*)LHS)->first.compare(((const pair_ty*)RHS)->first);
+static int TargetArraySortFn(const std::pair<StringRef, const Target *> *LHS,
+ const std::pair<StringRef, const Target *> *RHS) {
+ return LHS->first.compare(RHS->first);
}
void TargetRegistry::printRegisteredTargetsForVersion() {
diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp
index 0587aae..868b6ea 100644
--- a/lib/Support/ThreadLocal.cpp
+++ b/lib/Support/ThreadLocal.cpp
@@ -23,7 +23,7 @@
// Define all methods as no-ops if threading is explicitly disabled
namespace llvm {
using namespace sys;
-ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::ThreadLocalImpl() : data() { }
ThreadLocalImpl::~ThreadLocalImpl() { }
void ThreadLocalImpl::setInstance(const void* d) {
typedef int SIZE_TOO_BIG[sizeof(d) <= sizeof(data) ? 1 : -1];
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index d0d0e14..6c978a0 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -221,7 +221,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Cases("i386", "i486", "i586", "i686", Triple::x86)
// FIXME: Do we need to support these?
.Cases("i786", "i886", "i986", Triple::x86)
- .Cases("amd64", "x86_64", Triple::x86_64)
+ .Cases("amd64", "x86_64", "x86_64h", Triple::x86_64)
.Case("powerpc", Triple::ppc)
.Cases("powerpc64", "ppu", Triple::ppc64)
.Case("powerpc64le", Triple::ppc64le)
diff --git a/lib/Support/Unicode.cpp b/lib/Support/Unicode.cpp
new file mode 100644
index 0000000..b719bd8
--- /dev/null
+++ b/lib/Support/Unicode.cpp
@@ -0,0 +1,367 @@
+//===- llvm/Support/Unicode.cpp - Unicode character properties -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements functions that allow querying certain properties of
+// Unicode characters.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Unicode.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/UnicodeCharRanges.h"
+
+namespace llvm {
+namespace sys {
+namespace unicode {
+
+bool isPrintable(int UCS) {
+ // Sorted list of non-overlapping intervals of code points that are not
+ // supposed to be printable.
+ static const UnicodeCharRange NonPrintableRanges[] = {
+ { 0x0000, 0x001F }, { 0x007F, 0x009F }, { 0x034F, 0x034F },
+ { 0x0378, 0x0379 }, { 0x037F, 0x0383 }, { 0x038B, 0x038B },
+ { 0x038D, 0x038D }, { 0x03A2, 0x03A2 }, { 0x0528, 0x0530 },
+ { 0x0557, 0x0558 }, { 0x0560, 0x0560 }, { 0x0588, 0x0588 },
+ { 0x058B, 0x058E }, { 0x0590, 0x0590 }, { 0x05C8, 0x05CF },
+ { 0x05EB, 0x05EF }, { 0x05F5, 0x0605 }, { 0x061C, 0x061D },
+ { 0x06DD, 0x06DD }, { 0x070E, 0x070F }, { 0x074B, 0x074C },
+ { 0x07B2, 0x07BF }, { 0x07FB, 0x07FF }, { 0x082E, 0x082F },
+ { 0x083F, 0x083F }, { 0x085C, 0x085D }, { 0x085F, 0x089F },
+ { 0x08A1, 0x08A1 }, { 0x08AD, 0x08E3 }, { 0x08FF, 0x08FF },
+ { 0x0978, 0x0978 }, { 0x0980, 0x0980 }, { 0x0984, 0x0984 },
+ { 0x098D, 0x098E }, { 0x0991, 0x0992 }, { 0x09A9, 0x09A9 },
+ { 0x09B1, 0x09B1 }, { 0x09B3, 0x09B5 }, { 0x09BA, 0x09BB },
+ { 0x09C5, 0x09C6 }, { 0x09C9, 0x09CA }, { 0x09CF, 0x09D6 },
+ { 0x09D8, 0x09DB }, { 0x09DE, 0x09DE }, { 0x09E4, 0x09E5 },
+ { 0x09FC, 0x0A00 }, { 0x0A04, 0x0A04 }, { 0x0A0B, 0x0A0E },
+ { 0x0A11, 0x0A12 }, { 0x0A29, 0x0A29 }, { 0x0A31, 0x0A31 },
+ { 0x0A34, 0x0A34 }, { 0x0A37, 0x0A37 }, { 0x0A3A, 0x0A3B },
+ { 0x0A3D, 0x0A3D }, { 0x0A43, 0x0A46 }, { 0x0A49, 0x0A4A },
+ { 0x0A4E, 0x0A50 }, { 0x0A52, 0x0A58 }, { 0x0A5D, 0x0A5D },
+ { 0x0A5F, 0x0A65 }, { 0x0A76, 0x0A80 }, { 0x0A84, 0x0A84 },
+ { 0x0A8E, 0x0A8E }, { 0x0A92, 0x0A92 }, { 0x0AA9, 0x0AA9 },
+ { 0x0AB1, 0x0AB1 }, { 0x0AB4, 0x0AB4 }, { 0x0ABA, 0x0ABB },
+ { 0x0AC6, 0x0AC6 }, { 0x0ACA, 0x0ACA }, { 0x0ACE, 0x0ACF },
+ { 0x0AD1, 0x0ADF }, { 0x0AE4, 0x0AE5 }, { 0x0AF2, 0x0B00 },
+ { 0x0B04, 0x0B04 }, { 0x0B0D, 0x0B0E }, { 0x0B11, 0x0B12 },
+ { 0x0B29, 0x0B29 }, { 0x0B31, 0x0B31 }, { 0x0B34, 0x0B34 },
+ { 0x0B3A, 0x0B3B }, { 0x0B45, 0x0B46 }, { 0x0B49, 0x0B4A },
+ { 0x0B4E, 0x0B55 }, { 0x0B58, 0x0B5B }, { 0x0B5E, 0x0B5E },
+ { 0x0B64, 0x0B65 }, { 0x0B78, 0x0B81 }, { 0x0B84, 0x0B84 },
+ { 0x0B8B, 0x0B8D }, { 0x0B91, 0x0B91 }, { 0x0B96, 0x0B98 },
+ { 0x0B9B, 0x0B9B }, { 0x0B9D, 0x0B9D }, { 0x0BA0, 0x0BA2 },
+ { 0x0BA5, 0x0BA7 }, { 0x0BAB, 0x0BAD }, { 0x0BBA, 0x0BBD },
+ { 0x0BC3, 0x0BC5 }, { 0x0BC9, 0x0BC9 }, { 0x0BCE, 0x0BCF },
+ { 0x0BD1, 0x0BD6 }, { 0x0BD8, 0x0BE5 }, { 0x0BFB, 0x0C00 },
+ { 0x0C04, 0x0C04 }, { 0x0C0D, 0x0C0D }, { 0x0C11, 0x0C11 },
+ { 0x0C29, 0x0C29 }, { 0x0C34, 0x0C34 }, { 0x0C3A, 0x0C3C },
+ { 0x0C45, 0x0C45 }, { 0x0C49, 0x0C49 }, { 0x0C4E, 0x0C54 },
+ { 0x0C57, 0x0C57 }, { 0x0C5A, 0x0C5F }, { 0x0C64, 0x0C65 },
+ { 0x0C70, 0x0C77 }, { 0x0C80, 0x0C81 }, { 0x0C84, 0x0C84 },
+ { 0x0C8D, 0x0C8D }, { 0x0C91, 0x0C91 }, { 0x0CA9, 0x0CA9 },
+ { 0x0CB4, 0x0CB4 }, { 0x0CBA, 0x0CBB }, { 0x0CC5, 0x0CC5 },
+ { 0x0CC9, 0x0CC9 }, { 0x0CCE, 0x0CD4 }, { 0x0CD7, 0x0CDD },
+ { 0x0CDF, 0x0CDF }, { 0x0CE4, 0x0CE5 }, { 0x0CF0, 0x0CF0 },
+ { 0x0CF3, 0x0D01 }, { 0x0D04, 0x0D04 }, { 0x0D0D, 0x0D0D },
+ { 0x0D11, 0x0D11 }, { 0x0D3B, 0x0D3C }, { 0x0D45, 0x0D45 },
+ { 0x0D49, 0x0D49 }, { 0x0D4F, 0x0D56 }, { 0x0D58, 0x0D5F },
+ { 0x0D64, 0x0D65 }, { 0x0D76, 0x0D78 }, { 0x0D80, 0x0D81 },
+ { 0x0D84, 0x0D84 }, { 0x0D97, 0x0D99 }, { 0x0DB2, 0x0DB2 },
+ { 0x0DBC, 0x0DBC }, { 0x0DBE, 0x0DBF }, { 0x0DC7, 0x0DC9 },
+ { 0x0DCB, 0x0DCE }, { 0x0DD5, 0x0DD5 }, { 0x0DD7, 0x0DD7 },
+ { 0x0DE0, 0x0DF1 }, { 0x0DF5, 0x0E00 }, { 0x0E3B, 0x0E3E },
+ { 0x0E5C, 0x0E80 }, { 0x0E83, 0x0E83 }, { 0x0E85, 0x0E86 },
+ { 0x0E89, 0x0E89 }, { 0x0E8B, 0x0E8C }, { 0x0E8E, 0x0E93 },
+ { 0x0E98, 0x0E98 }, { 0x0EA0, 0x0EA0 }, { 0x0EA4, 0x0EA4 },
+ { 0x0EA6, 0x0EA6 }, { 0x0EA8, 0x0EA9 }, { 0x0EAC, 0x0EAC },
+ { 0x0EBA, 0x0EBA }, { 0x0EBE, 0x0EBF }, { 0x0EC5, 0x0EC5 },
+ { 0x0EC7, 0x0EC7 }, { 0x0ECE, 0x0ECF }, { 0x0EDA, 0x0EDB },
+ { 0x0EE0, 0x0EFF }, { 0x0F48, 0x0F48 }, { 0x0F6D, 0x0F70 },
+ { 0x0F98, 0x0F98 }, { 0x0FBD, 0x0FBD }, { 0x0FCD, 0x0FCD },
+ { 0x0FDB, 0x0FFF }, { 0x10C6, 0x10C6 }, { 0x10C8, 0x10CC },
+ { 0x10CE, 0x10CF }, { 0x115F, 0x1160 }, { 0x1249, 0x1249 },
+ { 0x124E, 0x124F }, { 0x1257, 0x1257 }, { 0x1259, 0x1259 },
+ { 0x125E, 0x125F }, { 0x1289, 0x1289 }, { 0x128E, 0x128F },
+ { 0x12B1, 0x12B1 }, { 0x12B6, 0x12B7 }, { 0x12BF, 0x12BF },
+ { 0x12C1, 0x12C1 }, { 0x12C6, 0x12C7 }, { 0x12D7, 0x12D7 },
+ { 0x1311, 0x1311 }, { 0x1316, 0x1317 }, { 0x135B, 0x135C },
+ { 0x137D, 0x137F }, { 0x139A, 0x139F }, { 0x13F5, 0x13FF },
+ { 0x169D, 0x169F }, { 0x16F1, 0x16FF }, { 0x170D, 0x170D },
+ { 0x1715, 0x171F }, { 0x1737, 0x173F }, { 0x1754, 0x175F },
+ { 0x176D, 0x176D }, { 0x1771, 0x1771 }, { 0x1774, 0x177F },
+ { 0x17B4, 0x17B5 }, { 0x17DE, 0x17DF }, { 0x17EA, 0x17EF },
+ { 0x17FA, 0x17FF }, { 0x180B, 0x180D }, { 0x180F, 0x180F },
+ { 0x181A, 0x181F }, { 0x1878, 0x187F }, { 0x18AB, 0x18AF },
+ { 0x18F6, 0x18FF }, { 0x191D, 0x191F }, { 0x192C, 0x192F },
+ { 0x193C, 0x193F }, { 0x1941, 0x1943 }, { 0x196E, 0x196F },
+ { 0x1975, 0x197F }, { 0x19AC, 0x19AF }, { 0x19CA, 0x19CF },
+ { 0x19DB, 0x19DD }, { 0x1A1C, 0x1A1D }, { 0x1A5F, 0x1A5F },
+ { 0x1A7D, 0x1A7E }, { 0x1A8A, 0x1A8F }, { 0x1A9A, 0x1A9F },
+ { 0x1AAE, 0x1AFF }, { 0x1B4C, 0x1B4F }, { 0x1B7D, 0x1B7F },
+ { 0x1BF4, 0x1BFB }, { 0x1C38, 0x1C3A }, { 0x1C4A, 0x1C4C },
+ { 0x1C80, 0x1CBF }, { 0x1CC8, 0x1CCF }, { 0x1CF7, 0x1CFF },
+ { 0x1DE7, 0x1DFB }, { 0x1F16, 0x1F17 }, { 0x1F1E, 0x1F1F },
+ { 0x1F46, 0x1F47 }, { 0x1F4E, 0x1F4F }, { 0x1F58, 0x1F58 },
+ { 0x1F5A, 0x1F5A }, { 0x1F5C, 0x1F5C }, { 0x1F5E, 0x1F5E },
+ { 0x1F7E, 0x1F7F }, { 0x1FB5, 0x1FB5 }, { 0x1FC5, 0x1FC5 },
+ { 0x1FD4, 0x1FD5 }, { 0x1FDC, 0x1FDC }, { 0x1FF0, 0x1FF1 },
+ { 0x1FF5, 0x1FF5 }, { 0x1FFF, 0x1FFF }, { 0x200B, 0x200F },
+ { 0x202A, 0x202E }, { 0x2060, 0x206F }, { 0x2072, 0x2073 },
+ { 0x208F, 0x208F }, { 0x209D, 0x209F }, { 0x20BB, 0x20CF },
+ { 0x20F1, 0x20FF }, { 0x218A, 0x218F }, { 0x23F4, 0x23FF },
+ { 0x2427, 0x243F }, { 0x244B, 0x245F }, { 0x2700, 0x2700 },
+ { 0x2B4D, 0x2B4F }, { 0x2B5A, 0x2BFF }, { 0x2C2F, 0x2C2F },
+ { 0x2C5F, 0x2C5F }, { 0x2CF4, 0x2CF8 }, { 0x2D26, 0x2D26 },
+ { 0x2D28, 0x2D2C }, { 0x2D2E, 0x2D2F }, { 0x2D68, 0x2D6E },
+ { 0x2D71, 0x2D7E }, { 0x2D97, 0x2D9F }, { 0x2DA7, 0x2DA7 },
+ { 0x2DAF, 0x2DAF }, { 0x2DB7, 0x2DB7 }, { 0x2DBF, 0x2DBF },
+ { 0x2DC7, 0x2DC7 }, { 0x2DCF, 0x2DCF }, { 0x2DD7, 0x2DD7 },
+ { 0x2DDF, 0x2DDF }, { 0x2E3C, 0x2E7F }, { 0x2E9A, 0x2E9A },
+ { 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF },
+ { 0x3040, 0x3040 }, { 0x3097, 0x3098 }, { 0x3100, 0x3104 },
+ { 0x312E, 0x3130 }, { 0x3164, 0x3164 }, { 0x318F, 0x318F },
+ { 0x31BB, 0x31BF }, { 0x31E4, 0x31EF }, { 0x321F, 0x321F },
+ { 0x32FF, 0x32FF }, { 0x4DB6, 0x4DBF }, { 0x9FCD, 0x9FFF },
+ { 0xA48D, 0xA48F }, { 0xA4C7, 0xA4CF }, { 0xA62C, 0xA63F },
+ { 0xA698, 0xA69E }, { 0xA6F8, 0xA6FF }, { 0xA78F, 0xA78F },
+ { 0xA794, 0xA79F }, { 0xA7AB, 0xA7F7 }, { 0xA82C, 0xA82F },
+ { 0xA83A, 0xA83F }, { 0xA878, 0xA87F }, { 0xA8C5, 0xA8CD },
+ { 0xA8DA, 0xA8DF }, { 0xA8FC, 0xA8FF }, { 0xA954, 0xA95E },
+ { 0xA97D, 0xA97F }, { 0xA9CE, 0xA9CE }, { 0xA9DA, 0xA9DD },
+ { 0xA9E0, 0xA9FF }, { 0xAA37, 0xAA3F }, { 0xAA4E, 0xAA4F },
+ { 0xAA5A, 0xAA5B }, { 0xAA7C, 0xAA7F }, { 0xAAC3, 0xAADA },
+ { 0xAAF7, 0xAB00 }, { 0xAB07, 0xAB08 }, { 0xAB0F, 0xAB10 },
+ { 0xAB17, 0xAB1F }, { 0xAB27, 0xAB27 }, { 0xAB2F, 0xABBF },
+ { 0xABEE, 0xABEF }, { 0xABFA, 0xABFF }, { 0xD7A4, 0xD7AF },
+ { 0xD7C7, 0xD7CA }, { 0xD7FC, 0xDFFF }, { 0xFA6E, 0xFA6F },
+ { 0xFADA, 0xFAFF }, { 0xFB07, 0xFB12 }, { 0xFB18, 0xFB1C },
+ { 0xFB37, 0xFB37 }, { 0xFB3D, 0xFB3D }, { 0xFB3F, 0xFB3F },
+ { 0xFB42, 0xFB42 }, { 0xFB45, 0xFB45 }, { 0xFBC2, 0xFBD2 },
+ { 0xFD40, 0xFD4F }, { 0xFD90, 0xFD91 }, { 0xFDC8, 0xFDEF },
+ { 0xFDFE, 0xFE0F }, { 0xFE1A, 0xFE1F }, { 0xFE27, 0xFE2F },
+ { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 }, { 0xFE6C, 0xFE6F },
+ { 0xFE75, 0xFE75 }, { 0xFEFD, 0xFEFF }, { 0xFF00, 0xFF00 },
+ { 0xFFA0, 0xFFA0 }, { 0xFFBF, 0xFFC1 }, { 0xFFC8, 0xFFC9 },
+ { 0xFFD0, 0xFFD1 }, { 0xFFD8, 0xFFD9 }, { 0xFFDD, 0xFFDF },
+ { 0xFFE7, 0xFFE7 }, { 0xFFEF, 0xFFFB }, { 0xFFFE, 0xFFFF },
+ { 0x1000C, 0x1000C }, { 0x10027, 0x10027 }, { 0x1003B, 0x1003B },
+ { 0x1003E, 0x1003E }, { 0x1004E, 0x1004F }, { 0x1005E, 0x1007F },
+ { 0x100FB, 0x100FF }, { 0x10103, 0x10106 }, { 0x10134, 0x10136 },
+ { 0x1018B, 0x1018F }, { 0x1019C, 0x101CF }, { 0x101FE, 0x1027F },
+ { 0x1029D, 0x1029F }, { 0x102D1, 0x102FF }, { 0x1031F, 0x1031F },
+ { 0x10324, 0x1032F }, { 0x1034B, 0x1037F }, { 0x1039E, 0x1039E },
+ { 0x103C4, 0x103C7 }, { 0x103D6, 0x103FF }, { 0x1049E, 0x1049F },
+ { 0x104AA, 0x107FF }, { 0x10806, 0x10807 }, { 0x10809, 0x10809 },
+ { 0x10836, 0x10836 }, { 0x10839, 0x1083B }, { 0x1083D, 0x1083E },
+ { 0x10856, 0x10856 }, { 0x10860, 0x108FF }, { 0x1091C, 0x1091E },
+ { 0x1093A, 0x1093E }, { 0x10940, 0x1097F }, { 0x109B8, 0x109BD },
+ { 0x109C0, 0x109FF }, { 0x10A04, 0x10A04 }, { 0x10A07, 0x10A0B },
+ { 0x10A14, 0x10A14 }, { 0x10A18, 0x10A18 }, { 0x10A34, 0x10A37 },
+ { 0x10A3B, 0x10A3E }, { 0x10A48, 0x10A4F }, { 0x10A59, 0x10A5F },
+ { 0x10A80, 0x10AFF }, { 0x10B36, 0x10B38 }, { 0x10B56, 0x10B57 },
+ { 0x10B73, 0x10B77 }, { 0x10B80, 0x10BFF }, { 0x10C49, 0x10E5F },
+ { 0x10E7F, 0x10FFF }, { 0x1104E, 0x11051 }, { 0x11070, 0x1107F },
+ { 0x110BD, 0x110BD }, { 0x110C2, 0x110CF }, { 0x110E9, 0x110EF },
+ { 0x110FA, 0x110FF }, { 0x11135, 0x11135 }, { 0x11144, 0x1117F },
+ { 0x111C9, 0x111CF }, { 0x111DA, 0x1167F }, { 0x116B8, 0x116BF },
+ { 0x116CA, 0x11FFF }, { 0x1236F, 0x123FF }, { 0x12463, 0x1246F },
+ { 0x12474, 0x12FFF }, { 0x1342F, 0x167FF }, { 0x16A39, 0x16EFF },
+ { 0x16F45, 0x16F4F }, { 0x16F7F, 0x16F8E }, { 0x16FA0, 0x1AFFF },
+ { 0x1B002, 0x1CFFF }, { 0x1D0F6, 0x1D0FF }, { 0x1D127, 0x1D128 },
+ { 0x1D173, 0x1D17A }, { 0x1D1DE, 0x1D1FF }, { 0x1D246, 0x1D2FF },
+ { 0x1D357, 0x1D35F }, { 0x1D372, 0x1D3FF }, { 0x1D455, 0x1D455 },
+ { 0x1D49D, 0x1D49D }, { 0x1D4A0, 0x1D4A1 }, { 0x1D4A3, 0x1D4A4 },
+ { 0x1D4A7, 0x1D4A8 }, { 0x1D4AD, 0x1D4AD }, { 0x1D4BA, 0x1D4BA },
+ { 0x1D4BC, 0x1D4BC }, { 0x1D4C4, 0x1D4C4 }, { 0x1D506, 0x1D506 },
+ { 0x1D50B, 0x1D50C }, { 0x1D515, 0x1D515 }, { 0x1D51D, 0x1D51D },
+ { 0x1D53A, 0x1D53A }, { 0x1D53F, 0x1D53F }, { 0x1D545, 0x1D545 },
+ { 0x1D547, 0x1D549 }, { 0x1D551, 0x1D551 }, { 0x1D6A6, 0x1D6A7 },
+ { 0x1D7CC, 0x1D7CD }, { 0x1D800, 0x1EDFF }, { 0x1EE04, 0x1EE04 },
+ { 0x1EE20, 0x1EE20 }, { 0x1EE23, 0x1EE23 }, { 0x1EE25, 0x1EE26 },
+ { 0x1EE28, 0x1EE28 }, { 0x1EE33, 0x1EE33 }, { 0x1EE38, 0x1EE38 },
+ { 0x1EE3A, 0x1EE3A }, { 0x1EE3C, 0x1EE41 }, { 0x1EE43, 0x1EE46 },
+ { 0x1EE48, 0x1EE48 }, { 0x1EE4A, 0x1EE4A }, { 0x1EE4C, 0x1EE4C },
+ { 0x1EE50, 0x1EE50 }, { 0x1EE53, 0x1EE53 }, { 0x1EE55, 0x1EE56 },
+ { 0x1EE58, 0x1EE58 }, { 0x1EE5A, 0x1EE5A }, { 0x1EE5C, 0x1EE5C },
+ { 0x1EE5E, 0x1EE5E }, { 0x1EE60, 0x1EE60 }, { 0x1EE63, 0x1EE63 },
+ { 0x1EE65, 0x1EE66 }, { 0x1EE6B, 0x1EE6B }, { 0x1EE73, 0x1EE73 },
+ { 0x1EE78, 0x1EE78 }, { 0x1EE7D, 0x1EE7D }, { 0x1EE7F, 0x1EE7F },
+ { 0x1EE8A, 0x1EE8A }, { 0x1EE9C, 0x1EEA0 }, { 0x1EEA4, 0x1EEA4 },
+ { 0x1EEAA, 0x1EEAA }, { 0x1EEBC, 0x1EEEF }, { 0x1EEF2, 0x1EFFF },
+ { 0x1F02C, 0x1F02F }, { 0x1F094, 0x1F09F }, { 0x1F0AF, 0x1F0B0 },
+ { 0x1F0BF, 0x1F0C0 }, { 0x1F0D0, 0x1F0D0 }, { 0x1F0E0, 0x1F0FF },
+ { 0x1F10B, 0x1F10F }, { 0x1F12F, 0x1F12F }, { 0x1F16C, 0x1F16F },
+ { 0x1F19B, 0x1F1E5 }, { 0x1F203, 0x1F20F }, { 0x1F23B, 0x1F23F },
+ { 0x1F249, 0x1F24F }, { 0x1F252, 0x1F2FF }, { 0x1F321, 0x1F32F },
+ { 0x1F336, 0x1F336 }, { 0x1F37D, 0x1F37F }, { 0x1F394, 0x1F39F },
+ { 0x1F3C5, 0x1F3C5 }, { 0x1F3CB, 0x1F3DF }, { 0x1F3F1, 0x1F3FF },
+ { 0x1F43F, 0x1F43F }, { 0x1F441, 0x1F441 }, { 0x1F4F8, 0x1F4F8 },
+ { 0x1F4FD, 0x1F4FF }, { 0x1F53E, 0x1F53F }, { 0x1F544, 0x1F54F },
+ { 0x1F568, 0x1F5FA }, { 0x1F641, 0x1F644 }, { 0x1F650, 0x1F67F },
+ { 0x1F6C6, 0x1F6FF }, { 0x1F774, 0x1FFFF }, { 0x2A6D7, 0x2A6FF },
+ { 0x2B735, 0x2B73F }, { 0x2B81E, 0x2F7FF }, { 0x2FA1E, 0xF0000 },
+ { 0xFFFFE, 0xFFFFF }, { 0x10FFFE, 0x10FFFF }
+ };
+ static const UnicodeCharSet NonPrintables(NonPrintableRanges);
+
+ return UCS >= 0 && UCS <= 0x10FFFF && !NonPrintables.contains(UCS);
+}
+
+/// Gets the number of positions a character is likely to occupy when output
+/// on a terminal ("character width"). This depends on the implementation of the
+/// terminal, and there's no standard definition of character width.
+/// The implementation defines it in a way that is expected to be compatible
+/// with a generic Unicode-capable terminal.
+/// \return Character width:
+/// * ErrorNonPrintableCharacter (-1) for non-printable characters (as
+/// identified by isPrintable);
+/// * 0 for non-spacing and enclosing combining marks;
+/// * 2 for CJK characters excluding halfwidth forms;
+/// * 1 for all remaining characters.
+static inline int charWidth(int UCS)
+{
+ if (!isPrintable(UCS))
+ return ErrorNonPrintableCharacter;
+
+ // Sorted list of non-spacing and enclosing combining mark intervals as
+ // defined in "3.6 Combination" of
+ // http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf
+ static const UnicodeCharRange CombiningCharacterRanges[] = {
+ { 0x0300, 0x036F }, { 0x0483, 0x0489 }, { 0x0591, 0x05BD },
+ { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 },
+ { 0x05C7, 0x05C7 }, { 0x0610, 0x061A }, { 0x064B, 0x065F },
+ { 0x0670, 0x0670 }, { 0x06D6, 0x06DC }, { 0x06DF, 0x06E4 },
+ { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, { 0x0711, 0x0711 },
+ { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 },
+ { 0x0816, 0x0819 }, { 0x081B, 0x0823 }, { 0x0825, 0x0827 },
+ { 0x0829, 0x082D }, { 0x0859, 0x085B }, { 0x08E4, 0x08FE },
+ { 0x0900, 0x0902 }, { 0x093A, 0x093A }, { 0x093C, 0x093C },
+ { 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0957 },
+ { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC },
+ { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 },
+ { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 },
+ { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A51, 0x0A51 },
+ { 0x0A70, 0x0A71 }, { 0x0A75, 0x0A75 }, { 0x0A81, 0x0A82 },
+ { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
+ { 0x0ACD, 0x0ACD }, { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 },
+ { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B44 },
+ { 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B62, 0x0B63 },
+ { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD },
+ { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D },
+ { 0x0C55, 0x0C56 }, { 0x0C62, 0x0C63 }, { 0x0CBC, 0x0CBC },
+ { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
+ { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D44 }, { 0x0D4D, 0x0D4D },
+ { 0x0D62, 0x0D63 }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
+ { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
+ { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
+ { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
+ { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
+ { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
+ { 0x0F8D, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
+ { 0x102D, 0x1030 }, { 0x1032, 0x1037 }, { 0x1039, 0x103A },
+ { 0x103D, 0x103E }, { 0x1058, 0x1059 }, { 0x105E, 0x1060 },
+ { 0x1071, 0x1074 }, { 0x1082, 0x1082 }, { 0x1085, 0x1086 },
+ { 0x108D, 0x108D }, { 0x109D, 0x109D }, { 0x135D, 0x135F },
+ { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
+ { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
+ { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
+ { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
+ { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
+ { 0x1A17, 0x1A18 }, { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E },
+ { 0x1A60, 0x1A60 }, { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C },
+ { 0x1A73, 0x1A7C }, { 0x1A7F, 0x1A7F }, { 0x1B00, 0x1B03 },
+ { 0x1B34, 0x1B34 }, { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C },
+ { 0x1B42, 0x1B42 }, { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B81 },
+ { 0x1BA2, 0x1BA5 }, { 0x1BA8, 0x1BA9 }, { 0x1BAB, 0x1BAB },
+ { 0x1BE6, 0x1BE6 }, { 0x1BE8, 0x1BE9 }, { 0x1BED, 0x1BED },
+ { 0x1BEF, 0x1BF1 }, { 0x1C2C, 0x1C33 }, { 0x1C36, 0x1C37 },
+ { 0x1CD0, 0x1CD2 }, { 0x1CD4, 0x1CE0 }, { 0x1CE2, 0x1CE8 },
+ { 0x1CED, 0x1CED }, { 0x1CF4, 0x1CF4 }, { 0x1DC0, 0x1DE6 },
+ { 0x1DFC, 0x1DFF }, { 0x20D0, 0x20F0 }, { 0x2CEF, 0x2CF1 },
+ { 0x2D7F, 0x2D7F }, { 0x2DE0, 0x2DFF }, { 0x302A, 0x302D },
+ { 0x3099, 0x309A }, { 0xA66F, 0xA672 }, { 0xA674, 0xA67D },
+ { 0xA69F, 0xA69F }, { 0xA6F0, 0xA6F1 }, { 0xA802, 0xA802 },
+ { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 },
+ { 0xA8C4, 0xA8C4 }, { 0xA8E0, 0xA8F1 }, { 0xA926, 0xA92D },
+ { 0xA947, 0xA951 }, { 0xA980, 0xA982 }, { 0xA9B3, 0xA9B3 },
+ { 0xA9B6, 0xA9B9 }, { 0xA9BC, 0xA9BC }, { 0xAA29, 0xAA2E },
+ { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 }, { 0xAA43, 0xAA43 },
+ { 0xAA4C, 0xAA4C }, { 0xAAB0, 0xAAB0 }, { 0xAAB2, 0xAAB4 },
+ { 0xAAB7, 0xAAB8 }, { 0xAABE, 0xAABF }, { 0xAAC1, 0xAAC1 },
+ { 0xAAEC, 0xAAED }, { 0xAAF6, 0xAAF6 }, { 0xABE5, 0xABE5 },
+ { 0xABE8, 0xABE8 }, { 0xABED, 0xABED }, { 0xFB1E, 0xFB1E },
+ { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE26 }, { 0x101FD, 0x101FD },
+ { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
+ { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x11001, 0x11001 },
+ { 0x11038, 0x11046 }, { 0x11080, 0x11081 }, { 0x110B3, 0x110B6 },
+ { 0x110B9, 0x110BA }, { 0x11100, 0x11102 }, { 0x11127, 0x1112B },
+ { 0x1112D, 0x11134 }, { 0x11180, 0x11181 }, { 0x111B6, 0x111BE },
+ { 0x116AB, 0x116AB }, { 0x116AD, 0x116AD }, { 0x116B0, 0x116B5 },
+ { 0x116B7, 0x116B7 }, { 0x16F8F, 0x16F92 }, { 0x1D167, 0x1D169 },
+ { 0x1D17B, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
+ { 0x1D242, 0x1D244 }, { 0xE0100, 0xE01EF },
+ };
+ static const UnicodeCharSet CombiningCharacters(CombiningCharacterRanges);
+
+ if (CombiningCharacters.contains(UCS))
+ return 0;
+
+ static const UnicodeCharRange DoubleWidthCharacterRanges[] = {
+ // Hangul Jamo
+ { 0x1100, 0x11FF },
+ // Deprecated fullwidth angle brackets
+ { 0x2329, 0x232A },
+ // CJK Misc, CJK Unified Ideographs, Yijing Hexagrams, Yi
+ // excluding U+303F (IDEOGRAPHIC HALF FILL SPACE)
+ { 0x2E80, 0x303E }, { 0x3040, 0xA4CF },
+ // Hangul
+ { 0xAC00, 0xD7A3 }, { 0xD7B0, 0xD7C6 }, { 0xD7CB, 0xD7FB },
+ // CJK Unified Ideographs
+ { 0xF900, 0xFAFF },
+ // Vertical forms
+ { 0xFE10, 0xFE19 },
+ // CJK Compatibility Forms + Small Form Variants
+ { 0xFE30, 0xFE6F },
+ // Fullwidth forms
+ { 0xFF01, 0xFF60 }, { 0xFFE0, 0xFFE6 },
+ // CJK Unified Ideographs
+ { 0x20000, 0x2A6DF }, { 0x2A700, 0x2B81F }, { 0x2F800, 0x2FA1F }
+ };
+ static const UnicodeCharSet DoubleWidthCharacters(DoubleWidthCharacterRanges);
+
+ if (DoubleWidthCharacters.contains(UCS))
+ return 2;
+ return 1;
+}
+
+int columnWidthUTF8(StringRef Text) {
+ unsigned ColumnWidth = 0;
+ unsigned Length;
+ for (size_t i = 0, e = Text.size(); i < e; i += Length) {
+ Length = getNumBytesForUTF8(Text[i]);
+ if (Length <= 0 || i + Length > Text.size())
+ return ErrorInvalidUTF8;
+ UTF32 buf[1];
+ const UTF8 *Start = reinterpret_cast<const UTF8 *>(Text.data() + i);
+ UTF32 *Target = &buf[0];
+ if (conversionOK != ConvertUTF8toUTF32(&Start, Start + Length, &Target,
+ Target + 1, strictConversion))
+ return ErrorInvalidUTF8;
+ int Width = charWidth(buf[0]);
+ if (Width < 0)
+ return ErrorNonPrintableCharacter;
+ ColumnWidth += Width;
+ }
+ return ColumnWidth;
+}
+
+} // namespace unicode
+} // namespace sys
+} // namespace llvm
+
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index 4dcfa09..c9dc871 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -182,7 +182,7 @@ namespace sys {
namespace fs {
#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) || \
- defined(__linux__) || defined(__CYGWIN__)
+ defined(__linux__) || defined(__CYGWIN__) || defined(__DragonFly__)
static int
test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
const char *dir, const char *bin)
@@ -251,7 +251,8 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
return link_path;
}
#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
- defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__)
+ defined(__OpenBSD__) || defined(__minix) || defined(__DragonFly__) || \
+ defined(__FreeBSD_kernel__)
char exe_path[PATH_MAX];
if (getprogpath(exe_path, argv0) != NULL)
@@ -298,6 +299,18 @@ UniqueID file_status::getUniqueID() const {
}
error_code current_path(SmallVectorImpl<char> &result) {
+ result.clear();
+
+ const char *pwd = ::getenv("PWD");
+ llvm::sys::fs::file_status PWDStatus, DotStatus;
+ if (pwd && llvm::sys::path::is_absolute(pwd) &&
+ !llvm::sys::fs::status(pwd, PWDStatus) &&
+ !llvm::sys::fs::status(".", DotStatus) &&
+ PWDStatus.getUniqueID() == DotStatus.getUniqueID()) {
+ result.append(pwd, pwd + strlen(pwd));
+ return error_code::success();
+ }
+
#ifdef MAXPATHLEN
result.reserve(MAXPATHLEN);
#else
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 0a797f6..c5778e7 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -13,6 +13,7 @@
#include "Unix.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/TimeValue.h"
@@ -38,25 +39,6 @@
# include <termios.h>
#endif
-// See if we can use curses to detect information about a terminal when
-// connected to one.
-#ifdef HAVE_CURSES
-# if defined(HAVE_CURSES_H)
-# include <curses.h>
-# elif defined(HAVE_NCURSES_H)
-# include <ncurses.h>
-# elif defined(HAVE_NCURSESW_H)
-# include <ncursesw.h>
-# elif defined(HAVE_NCURSES_CURSES_H)
-# include <ncurses/curses.h>
-# elif defined(HAVE_NCURSESW_CURSES_H)
-# include <ncursesw/curses.h>
-# else
-# error Have a curses library but unable to find a curses header!
-# endif
-# include <term.h>
-#endif
-
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only generic UNIX code that
//=== is guaranteed to work on *all* UNIX variants.
@@ -107,13 +89,10 @@ TimeValue self_process::get_system_time() const {
return getRUsageTimes().second;
}
+// On Cygwin, getpagesize() returns 64k(AllocationGranularity) and
+// offset in mmap(3) should be aligned to the AllocationGranularity.
static unsigned getPageSize() {
-#if defined(__CYGWIN__)
- // On Cygwin, getpagesize() returns 64k but the page size for the purposes of
- // memory protection and mmap() is 4k.
- // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492
- const int page_size = 0x1000;
-#elif defined(HAVE_GETPAGESIZE)
+#if defined(HAVE_GETPAGESIZE)
const int page_size = ::getpagesize();
#elif defined(HAVE_SYSCONF)
long page_size = ::sysconf(_SC_PAGE_SIZE);
@@ -159,14 +138,6 @@ void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time,
llvm::tie(user_time, sys_time) = getRUsageTimes();
}
-int Process::GetCurrentUserId() {
- return getuid();
-}
-
-int Process::GetCurrentGroupId() {
- return getgid();
-}
-
#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__)
#include <mach/mach.h>
#endif
@@ -211,6 +182,22 @@ void Process::PreventCoreFiles() {
#endif
}
+Optional<std::string> Process::GetEnv(StringRef Name) {
+ std::string NameStr = Name.str();
+ const char *Val = ::getenv(NameStr.c_str());
+ if (!Val)
+ return None;
+ return std::string(Val);
+}
+
+error_code Process::GetArgumentVector(SmallVectorImpl<const char *> &ArgsOut,
+ ArrayRef<const char *> ArgsIn,
+ SpecificBumpPtrAllocator<char> &) {
+ ArgsOut.append(ArgsIn.begin(), ArgsIn.end());
+
+ return error_code::success();
+}
+
bool Process::StandardInIsUserInput() {
return FileDescriptorIsDisplayed(STDIN_FILENO);
}
@@ -266,21 +253,50 @@ unsigned Process::StandardErrColumns() {
return getColumns(2);
}
+#ifdef HAVE_TERMINFO
+// We manually declare these extern functions because finding the correct
+// headers from various terminfo, curses, or other sources is harder than
+// writing their specs down.
+extern "C" int setupterm(char *term, int filedes, int *errret);
+extern "C" struct term *set_curterm(struct term *termp);
+extern "C" int del_curterm(struct term *termp);
+extern "C" int tigetnum(char *capname);
+#endif
+
static bool terminalHasColors(int fd) {
-#ifdef HAVE_CURSES
- // First, acquire a global lock because the curses C routines are thread
- // hostile.
+#ifdef HAVE_TERMINFO
+ // First, acquire a global lock because these C routines are thread hostile.
static sys::Mutex M;
MutexGuard G(M);
int errret = 0;
- if (setupterm((char *)0, fd, &errret) != OK)
+ if (setupterm((char *)0, fd, &errret) != 0)
// Regardless of why, if we can't get terminfo, we shouldn't try to print
// colors.
return false;
- // Test whether the terminal as set up supports color output.
- if (has_colors() == TRUE)
+ // Test whether the terminal as set up supports color output. How to do this
+ // isn't entirely obvious. We can use the curses routine 'has_colors' but it
+ // would be nice to avoid a dependency on curses proper when we can make do
+ // with a minimal terminfo parsing library. Also, we don't really care whether
+ // the terminal supports the curses-specific color changing routines, merely
+ // if it will interpret ANSI color escape codes in a reasonable way. Thus, the
+ // strategy here is just to query the baseline colors capability and if it
+ // supports colors at all to assume it will translate the escape codes into
+ // whatever range of colors it does support. We can add more detailed tests
+ // here if users report them as necessary.
+ //
+ // The 'tigetnum' routine returns -2 or -1 on errors, and might return 0 if
+ // the terminfo says that no colors are supported.
+ bool HasColors = tigetnum(const_cast<char *>("colors")) > 0;
+
+ // Now extract the structure allocated by setupterm and free its memory
+ // through a really silly dance.
+ struct term *termp = set_curterm((struct term *)0);
+ (void)del_curterm(termp); // Drop any errors here.
+
+ // Return true if we found a color capabilities for the current terminal.
+ if (HasColors)
return true;
#endif
@@ -302,29 +318,15 @@ bool Process::StandardErrHasColors() {
return FileDescriptorHasColors(STDERR_FILENO);
}
+void Process::UseANSIEscapeCodes(bool /*enable*/) {
+ // No effect.
+}
+
bool Process::ColorNeedsFlush() {
// No, we use ANSI escape sequences.
return false;
}
-#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m"
-
-#define ALLCOLORS(FGBG,BOLD) {\
- COLOR(FGBG, "0", BOLD),\
- COLOR(FGBG, "1", BOLD),\
- COLOR(FGBG, "2", BOLD),\
- COLOR(FGBG, "3", BOLD),\
- COLOR(FGBG, "4", BOLD),\
- COLOR(FGBG, "5", BOLD),\
- COLOR(FGBG, "6", BOLD),\
- COLOR(FGBG, "7", BOLD)\
- }
-
-static const char colorcodes[2][2][8][10] = {
- { ALLCOLORS("3",""), ALLCOLORS("3","1;") },
- { ALLCOLORS("4",""), ALLCOLORS("4","1;") }
-};
-
const char *Process::OutputColor(char code, bool bold, bool bg) {
return colorcodes[bg?1:0][bold?1:0][code&7];
}
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index a93a912..78b2971 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -36,6 +36,9 @@
#include <unistd.h>
#endif
#ifdef HAVE_POSIX_SPAWN
+#ifdef __sun__
+#define _RESTRICT_KYWD
+#endif
#include <spawn.h>
#if !defined(__APPLE__)
extern char **environ;
@@ -47,6 +50,8 @@
namespace llvm {
using namespace sys;
+ProcessInfo::ProcessInfo() : Pid(0), ReturnCode(0) {}
+
// This function just uses the PATH environment variable to find the program.
std::string
sys::FindProgramByName(const std::string& progName) {
@@ -175,9 +180,16 @@ static void SetMemoryLimits (unsigned size)
}
-static bool Execute(void **Data, StringRef Program, const char **args,
+static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
const char **envp, const StringRef **redirects,
unsigned memoryLimit, std::string *ErrMsg) {
+ if (!llvm::sys::fs::exists(Program)) {
+ if (ErrMsg)
+ *ErrMsg = std::string("Executable \"") + Program.str() +
+ std::string("\" doesn't exist!");
+ return false;
+ }
+
// If this OS has posix_spawn and there is no memory limit being implied, use
// posix_spawn. It is more efficient than fork/exec.
#ifdef HAVE_POSIX_SPAWN
@@ -239,8 +251,8 @@ static bool Execute(void **Data, StringRef Program, const char **args,
if (Err)
return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err);
- if (Data)
- *Data = reinterpret_cast<void*>(PID);
+ PI.Pid = PID;
+
return true;
}
#endif
@@ -303,56 +315,71 @@ static bool Execute(void **Data, StringRef Program, const char **args,
break;
}
- if (Data)
- *Data = reinterpret_cast<void*>(child);
+ PI.Pid = child;
return true;
}
-static int Wait(void *&Data, StringRef Program, unsigned secondsToWait,
- std::string *ErrMsg) {
+namespace llvm {
+
+ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
+ bool WaitUntilTerminates, std::string *ErrMsg) {
#ifdef HAVE_SYS_WAIT_H
struct sigaction Act, Old;
- assert(Data && "invalid pid to wait on, process not started?");
-
- // Install a timeout handler. The handler itself does nothing, but the simple
- // fact of having a handler at all causes the wait below to return with EINTR,
- // unlike if we used SIG_IGN.
- if (secondsToWait) {
+ assert(PI.Pid && "invalid pid to wait on, process not started?");
+
+ int WaitPidOptions = 0;
+ pid_t ChildPid = PI.Pid;
+ if (WaitUntilTerminates) {
+ SecondsToWait = 0;
+ ChildPid = -1; // mimic a wait() using waitpid()
+ } else if (SecondsToWait) {
+ // Install a timeout handler. The handler itself does nothing, but the
+ // simple fact of having a handler at all causes the wait below to return
+ // with EINTR, unlike if we used SIG_IGN.
memset(&Act, 0, sizeof(Act));
Act.sa_handler = TimeOutHandler;
sigemptyset(&Act.sa_mask);
sigaction(SIGALRM, &Act, &Old);
- alarm(secondsToWait);
- }
+ alarm(SecondsToWait);
+ } else if (SecondsToWait == 0)
+ WaitPidOptions = WNOHANG;
// Parent process: Wait for the child process to terminate.
int status;
- uint64_t pid = reinterpret_cast<uint64_t>(Data);
- pid_t child = static_cast<pid_t>(pid);
- while (waitpid(pid, &status, 0) != child)
- if (secondsToWait && errno == EINTR) {
- // Kill the child.
- kill(child, SIGKILL);
-
- // Turn off the alarm and restore the signal handler
- alarm(0);
- sigaction(SIGALRM, &Old, 0);
-
- // Wait for child to die
- if (wait(&status) != child)
- MakeErrMsg(ErrMsg, "Child timed out but wouldn't die");
- else
- MakeErrMsg(ErrMsg, "Child timed out", 0);
-
- return -2; // Timeout detected
- } else if (errno != EINTR) {
- MakeErrMsg(ErrMsg, "Error waiting for child process");
- return -1;
+ ProcessInfo WaitResult;
+ WaitResult.Pid = waitpid(ChildPid, &status, WaitPidOptions);
+ if (WaitResult.Pid != PI.Pid) {
+ if (WaitResult.Pid == 0) {
+ // Non-blocking wait.
+ return WaitResult;
+ } else {
+ if (SecondsToWait && errno == EINTR) {
+ // Kill the child.
+ kill(PI.Pid, SIGKILL);
+
+ // Turn off the alarm and restore the signal handler
+ alarm(0);
+ sigaction(SIGALRM, &Old, 0);
+
+ // Wait for child to die
+ if (wait(&status) != ChildPid)
+ MakeErrMsg(ErrMsg, "Child timed out but wouldn't die");
+ else
+ MakeErrMsg(ErrMsg, "Child timed out", 0);
+
+ WaitResult.ReturnCode = -2; // Timeout detected
+ return WaitResult;
+ } else if (errno != EINTR) {
+ MakeErrMsg(ErrMsg, "Error waiting for child process");
+ WaitResult.ReturnCode = -1;
+ return WaitResult;
+ }
}
+ }
// We exited normally without timeout, so turn off the timer.
- if (secondsToWait) {
+ if (SecondsToWait && !WaitUntilTerminates) {
alarm(0);
sigaction(SIGALRM, &Old, 0);
}
@@ -362,24 +389,19 @@ static int Wait(void *&Data, StringRef Program, unsigned secondsToWait,
int result = 0;
if (WIFEXITED(status)) {
result = WEXITSTATUS(status);
-#ifdef HAVE_POSIX_SPAWN
- // The posix_spawn child process returns 127 on any kind of error.
- // Following the POSIX convention for command-line tools (which posix_spawn
- // itself apparently does not), check to see if the failure was due to some
- // reason other than the file not existing, and return 126 in this case.
- bool Exists;
- if (result == 127 && !llvm::sys::fs::exists(Program, Exists) && Exists)
- result = 126;
-#endif
+ WaitResult.ReturnCode = result;
+
if (result == 127) {
if (ErrMsg)
*ErrMsg = llvm::sys::StrError(ENOENT);
- return -1;
+ WaitResult.ReturnCode = -1;
+ return WaitResult;
}
if (result == 126) {
if (ErrMsg)
*ErrMsg = "Program could not be executed";
- return -1;
+ WaitResult.ReturnCode = -1;
+ return WaitResult;
}
} else if (WIFSIGNALED(status)) {
if (ErrMsg) {
@@ -391,18 +413,16 @@ static int Wait(void *&Data, StringRef Program, unsigned secondsToWait,
}
// Return a special value to indicate that the process received an unhandled
// signal during execution as opposed to failing to execute.
- return -2;
+ WaitResult.ReturnCode = -2;
}
- return result;
#else
if (ErrMsg)
*ErrMsg = "Program::Wait is not implemented on this platform yet!";
- return -1;
+ WaitResult.ReturnCode = -2;
#endif
+ return WaitResult;
}
-namespace llvm {
-
error_code sys::ChangeStdinToBinary(){
// Do nothing, as Unix doesn't differentiate between text and binary.
return make_error_code(errc::success);
@@ -438,5 +458,4 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
}
return true;
}
-
}
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 800a6a7..13ae862 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -333,7 +333,7 @@ static void PrintStackTraceSignalHandler(void *) {
void llvm::sys::PrintStackTraceOnErrorSignal() {
AddSignalHandler(PrintStackTraceSignalHandler, 0);
-#if defined(__APPLE__) && !defined(ANDROID)
+#if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES)
// Environment variable to disable any kind of crash dialog.
if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
mach_port_t self = mach_task_self();
@@ -359,7 +359,7 @@ void llvm::sys::PrintStackTraceOnErrorSignal() {
// the same linkage unit by just defining our own versions of the assert handler
// and abort.
-#if defined(__APPLE__) && !defined(ANDROID)
+#if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES)
#include <signal.h>
#include <pthread.h>
diff --git a/lib/Support/Unix/ThreadLocal.inc b/lib/Support/Unix/ThreadLocal.inc
index 2b4c901..f14d0fa 100644
--- a/lib/Support/Unix/ThreadLocal.inc
+++ b/lib/Support/Unix/ThreadLocal.inc
@@ -18,7 +18,7 @@
namespace llvm {
using namespace sys;
-ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::ThreadLocalImpl() : data() { }
ThreadLocalImpl::~ThreadLocalImpl() { }
void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
const void* ThreadLocalImpl::getInstance() { return data; }
diff --git a/lib/Support/Unix/Unix.h b/lib/Support/Unix/Unix.h
index dd11c04..ba688e3 100644
--- a/lib/Support/Unix/Unix.h
+++ b/lib/Support/Unix/Unix.h
@@ -47,6 +47,10 @@
# include <sys/wait.h>
#endif
+#ifdef HAVE_DLFCN_H
+# include <dlfcn.h>
+#endif
+
#ifndef WEXITSTATUS
# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
#endif
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 83da82a..5a7b219 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -71,7 +71,7 @@ extern "C" {
DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
std::string *errMsg) {
- SmartScopedLock<true> lock(getMutex());
+ SmartScopedLock<true> lock(*SymbolsMutex);
if (!filename) {
// When no file is specified, enumerate all DLLs and EXEs in the process.
@@ -83,8 +83,15 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
// This is mostly to ensure that the return value still shows up as "valid".
return DynamicLibrary(&OpenedHandles);
}
+
+ SmallVector<wchar_t, MAX_PATH> filenameUnicode;
+ if (error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) {
+ SetLastError(ec.value());
+ MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16: ");
+ return DynamicLibrary();
+ }
- HMODULE a_handle = LoadLibrary(filename);
+ HMODULE a_handle = LoadLibraryW(filenameUnicode.data());
if (a_handle == 0) {
MakeErrMsg(errMsg, std::string(filename) + ": Can't open : ");
@@ -114,10 +121,10 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
#undef EXPLICIT_SYMBOL2
void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
- SmartScopedLock<true> Lock(getMutex());
+ SmartScopedLock<true> Lock(*SymbolsMutex);
// First check symbols added via AddSymbol().
- if (ExplicitSymbols) {
+ if (ExplicitSymbols.isConstructed()) {
StringMap<void *>::iterator i = ExplicitSymbols->find(symbolName);
if (i != ExplicitSymbols->end())
diff --git a/lib/Support/Windows/Memory.inc b/lib/Support/Windows/Memory.inc
index 4c5aebd..1260452 100644
--- a/lib/Support/Windows/Memory.inc
+++ b/lib/Support/Windows/Memory.inc
@@ -82,7 +82,7 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) +
NearBlock->size()
- : NULL;
+ : 0;
// If the requested address is not aligned to the allocation granularity,
// round up to get beyond NearBlock. VirtualAlloc would have rounded down.
@@ -106,7 +106,7 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
MemoryBlock Result;
Result.Address = PA;
Result.Size = NumBlocks*Granularity;
- ;
+
if (Flags & MF_EXEC)
Memory::InvalidateInstructionCache(Result.Address, Result.Size);
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index 1694cb2..0b39198 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -37,70 +37,18 @@ typedef int errno_t;
using namespace llvm;
+using llvm::sys::windows::UTF8ToUTF16;
+using llvm::sys::windows::UTF16ToUTF8;
+
namespace {
typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)(
/*__in*/ LPCWSTR lpSymlinkFileName,
/*__in*/ LPCWSTR lpTargetFileName,
/*__in*/ DWORD dwFlags);
- PtrCreateSymbolicLinkW create_symbolic_link_api = PtrCreateSymbolicLinkW(
- ::GetProcAddress(::GetModuleHandleA("kernel32.dll"),
- "CreateSymbolicLinkW"));
-
- error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16) {
- int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
- utf8.begin(), utf8.size(),
- utf16.begin(), 0);
-
- if (len == 0)
- return windows_error(::GetLastError());
-
- utf16.reserve(len + 1);
- utf16.set_size(len);
-
- len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
- utf8.begin(), utf8.size(),
- utf16.begin(), utf16.size());
-
- if (len == 0)
- return windows_error(::GetLastError());
-
- // Make utf16 null terminated.
- utf16.push_back(0);
- utf16.pop_back();
-
- return error_code::success();
- }
-
- error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
- SmallVectorImpl<char> &utf8) {
- // Get length.
- int len = ::WideCharToMultiByte(CP_UTF8, 0,
- utf16, utf16_len,
- utf8.begin(), 0,
- NULL, NULL);
-
- if (len == 0)
- return windows_error(::GetLastError());
-
- utf8.reserve(len);
- utf8.set_size(len);
-
- // Now do the actual conversion.
- len = ::WideCharToMultiByte(CP_UTF8, 0,
- utf16, utf16_len,
- utf8.data(), utf8.size(),
- NULL, NULL);
-
- if (len == 0)
- return windows_error(::GetLastError());
-
- // Make utf8 null terminated.
- utf8.push_back(0);
- utf8.pop_back();
-
- return error_code::success();
- }
+ PtrCreateSymbolicLinkW create_symbolic_link_api =
+ PtrCreateSymbolicLinkW(::GetProcAddress(
+ ::GetModuleHandleW(L"Kernel32.dll"), "CreateSymbolicLinkW"));
error_code TempDir(SmallVectorImpl<wchar_t> &result) {
retry_temp_dir:
@@ -180,7 +128,7 @@ retry_random_path:
BYTE val = 0;
if (!::CryptGenRandom(CryptoProvider, 1, &val))
return windows_error(::GetLastError());
- random_path_utf16.push_back("0123456789abcdef"[val & 15]);
+ random_path_utf16.push_back(L"0123456789abcdef"[val & 15]);
}
else
random_path_utf16.push_back(*i);
@@ -268,9 +216,28 @@ namespace sys {
namespace fs {
std::string getMainExecutable(const char *argv0, void *MainExecAddr) {
- char pathname[MAX_PATH];
- DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH);
- return ret != MAX_PATH ? pathname : "";
+ SmallVector<wchar_t, MAX_PATH> PathName;
+ DWORD Size = ::GetModuleFileNameW(NULL, PathName.data(), PathName.capacity());
+
+ // A zero return value indicates a failure other than insufficient space.
+ if (Size == 0)
+ return "";
+
+ // Insufficient space is determined by a return value equal to the size of
+ // the buffer passed in.
+ if (Size == PathName.capacity())
+ return "";
+
+ // On success, GetModuleFileNameW returns the number of characters written to
+ // the buffer not including the NULL terminator.
+ PathName.set_size(Size);
+
+ // Convert the result from UTF-16 to UTF-8.
+ SmallVector<char, MAX_PATH> PathNameUTF8;
+ if (UTF16ToUTF8(PathName.data(), PathName.size(), PathNameUTF8))
+ return "";
+
+ return std::string(PathNameUTF8.data());
}
UniqueID file_status::getUniqueID() const {
@@ -293,47 +260,25 @@ TimeValue file_status::getLastModificationTime() const {
}
error_code current_path(SmallVectorImpl<char> &result) {
- SmallVector<wchar_t, 128> cur_path;
- cur_path.reserve(128);
-retry_cur_dir:
- DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());
-
- // A zero return value indicates a failure other than insufficient space.
- if (len == 0)
- return windows_error(::GetLastError());
+ SmallVector<wchar_t, MAX_PATH> cur_path;
+ DWORD len = MAX_PATH;
- // If there's insufficient space, the len returned is larger than the len
- // given.
- if (len > cur_path.capacity()) {
+ do {
cur_path.reserve(len);
- goto retry_cur_dir;
- }
-
- cur_path.set_size(len);
- // cur_path now holds the current directory in utf-16. Convert to utf-8.
+ len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());
- // Find out how much space we need. Sadly, this function doesn't return the
- // size needed unless you tell it the result size is 0, which means you
- // _always_ have to call it twice.
- len = ::WideCharToMultiByte(CP_UTF8, 0,
- cur_path.data(), cur_path.size(),
- result.data(), 0,
- NULL, NULL);
-
- if (len == 0)
- return make_error_code(windows_error(::GetLastError()));
+ // A zero return value indicates a failure other than insufficient space.
+ if (len == 0)
+ return windows_error(::GetLastError());
- result.reserve(len);
- result.set_size(len);
- // Now do the actual conversion.
- len = ::WideCharToMultiByte(CP_UTF8, 0,
- cur_path.data(), cur_path.size(),
- result.data(), result.size(),
- NULL, NULL);
- if (len == 0)
- return windows_error(::GetLastError());
+ // If there's insufficient space, the len returned is larger than the len
+ // given.
+ } while (len > cur_path.capacity());
- return error_code::success();
+ // On success, GetCurrentDirectoryW returns the number of characters not
+ // including the null-terminator.
+ cur_path.set_size(len);
+ return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result);
}
error_code create_directory(const Twine &path, bool &existed) {
@@ -746,12 +691,11 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
case priv: flprotect = PAGE_WRITECOPY; break;
}
- FileMappingHandle = ::CreateFileMapping(FileHandle,
- 0,
- flprotect,
- Size >> 32,
- Size & 0xffffffff,
- 0);
+ FileMappingHandle =
+ ::CreateFileMappingW(FileHandle, 0, flprotect,
+ (Offset + Size) >> 32,
+ (Offset + Size) & 0xffffffff,
+ 0);
if (FileMappingHandle == NULL) {
error_code ec = windows_error(GetLastError());
if (FileDescriptor) {
@@ -816,7 +760,7 @@ mapped_file_region::mapped_file_region(const Twine &path,
mapmode mode,
uint64_t length,
uint64_t offset,
- error_code &ec)
+ error_code &ec)
: Mode(mode)
, Size(length)
, Mapping()
@@ -1018,7 +962,7 @@ error_code detail::directory_iterator_increment(detail::DirIterState &it) {
return error_code::success();
}
-error_code map_file_pages(const Twine &path, off_t file_offset, size_t size,
+error_code map_file_pages(const Twine &path, off_t file_offset, size_t size,
bool map_writable, void *&result) {
assert(0 && "NOT IMPLEMENTED");
return windows_error::invalid_function;
@@ -1078,7 +1022,7 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD,
DWORD CreationDisposition;
if (Flags & F_Excl)
CreationDisposition = CREATE_NEW;
- else if (Flags & F_Append)
+ else if (Flags & F_Append)
CreationDisposition = OPEN_ALWAYS;
else
CreationDisposition = CREATE_ALWAYS;
@@ -1115,7 +1059,64 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD,
ResultFD = FD;
return error_code::success();
}
-
} // end namespace fs
+
+namespace windows {
+llvm::error_code UTF8ToUTF16(llvm::StringRef utf8,
+ llvm::SmallVectorImpl<wchar_t> &utf16) {
+ int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+ utf8.begin(), utf8.size(),
+ utf16.begin(), 0);
+
+ if (len == 0)
+ return llvm::windows_error(::GetLastError());
+
+ utf16.reserve(len + 1);
+ utf16.set_size(len);
+
+ len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+ utf8.begin(), utf8.size(),
+ utf16.begin(), utf16.size());
+
+ if (len == 0)
+ return llvm::windows_error(::GetLastError());
+
+ // Make utf16 null terminated.
+ utf16.push_back(0);
+ utf16.pop_back();
+
+ return llvm::error_code::success();
+}
+
+llvm::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+ llvm::SmallVectorImpl<char> &utf8) {
+ // Get length.
+ int len = ::WideCharToMultiByte(CP_UTF8, 0,
+ utf16, utf16_len,
+ utf8.begin(), 0,
+ NULL, NULL);
+
+ if (len == 0)
+ return llvm::windows_error(::GetLastError());
+
+ utf8.reserve(len);
+ utf8.set_size(len);
+
+ // Now do the actual conversion.
+ len = ::WideCharToMultiByte(CP_UTF8, 0,
+ utf16, utf16_len,
+ utf8.data(), utf8.size(),
+ NULL, NULL);
+
+ if (len == 0)
+ return llvm::windows_error(::GetLastError());
+
+ // Make utf8 null terminated.
+ utf8.push_back(0);
+ utf8.pop_back();
+
+ return llvm::error_code::success();
+}
+} // end namespace windows
} // end namespace sys
} // end namespace llvm
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index 359b99f..f9a3db9 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -11,18 +11,25 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Support/Allocator.h"
+
#include "Windows.h"
#include <direct.h>
#include <io.h>
#include <malloc.h>
#include <psapi.h>
+#include <shellapi.h>
#ifdef __MINGW32__
#if (HAVE_LIBPSAPI != 1)
#error "libpsapi.a should be present"
#endif
+ #if (HAVE_LIBSHELL32 != 1)
+ #error "libshell32.a should be present"
+ #endif
#else
#pragma comment(lib, "psapi.lib")
+ #pragma comment(lib, "shell32.lib")
#endif
//===----------------------------------------------------------------------===//
@@ -83,6 +90,8 @@ static unsigned getPageSize() {
// that LLVM ought to run as 64-bits on a 64-bit system, anyway.
SYSTEM_INFO info;
GetSystemInfo(&info);
+ // FIXME: FileOffset in MapViewOfFile() should be aligned to not dwPageSize,
+ // but dwAllocationGranularity.
return static_cast<unsigned>(info.dwPageSize);
}
@@ -119,28 +128,89 @@ void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time,
sys_time = getTimeValueFromFILETIME(KernelTime);
}
-int Process::GetCurrentUserId()
-{
- return 65536;
-}
-
-int Process::GetCurrentGroupId()
-{
- return 65536;
-}
-
// Some LLVM programs such as bugpoint produce core files as a normal part of
-// their operation. To prevent the disk from filling up, this configuration item
-// does what's necessary to prevent their generation.
+// their operation. To prevent the disk from filling up, this configuration
+// item does what's necessary to prevent their generation.
void Process::PreventCoreFiles() {
- // Windows doesn't do core files, but it does do modal pop-up message
- // boxes. As this method is used by bugpoint, preventing these pop-ups
- // is the moral equivalent of suppressing core files.
+ // Windows does have the concept of core files, called minidumps. However,
+ // disabling minidumps for a particular application extends past the lifetime
+ // of that application, which is the incorrect behavior for this API.
+ // Additionally, the APIs require elevated privileges to disable and re-
+ // enable minidumps, which makes this untenable. For more information, see
+ // WerAddExcludedApplication and WerRemoveExcludedApplication (Vista and
+ // later).
+ //
+ // Windows also has modal pop-up message boxes. As this method is used by
+ // bugpoint, preventing these pop-ups is additionally important.
SetErrorMode(SEM_FAILCRITICALERRORS |
SEM_NOGPFAULTERRORBOX |
SEM_NOOPENFILEERRORBOX);
}
+/// Returns the environment variable \arg Name's value as a string encoded in
+/// UTF-8. \arg Name is assumed to be in UTF-8 encoding.
+Optional<std::string> Process::GetEnv(StringRef Name) {
+ // Convert the argument to UTF-16 to pass it to _wgetenv().
+ SmallVector<wchar_t, 128> NameUTF16;
+ if (error_code ec = windows::UTF8ToUTF16(Name, NameUTF16))
+ return None;
+
+ // Environment variable can be encoded in non-UTF8 encoding, and there's no
+ // way to know what the encoding is. The only reliable way to look up
+ // multibyte environment variable is to use GetEnvironmentVariableW().
+ SmallVector<wchar_t, MAX_PATH> Buf;
+ size_t Size = MAX_PATH;
+ do {
+ Buf.reserve(Size);
+ Size =
+ GetEnvironmentVariableW(NameUTF16.data(), Buf.data(), Buf.capacity());
+ if (Size == 0)
+ return None;
+
+ // Try again with larger buffer.
+ } while (Size > Buf.capacity());
+ Buf.set_size(Size);
+
+ // Convert the result from UTF-16 to UTF-8.
+ SmallVector<char, MAX_PATH> Res;
+ if (error_code ec = windows::UTF16ToUTF8(Buf.data(), Size, Res))
+ return None;
+ return std::string(Res.data());
+}
+
+error_code
+Process::GetArgumentVector(SmallVectorImpl<const char *> &Args,
+ ArrayRef<const char *>,
+ SpecificBumpPtrAllocator<char> &ArgAllocator) {
+ int NewArgCount;
+ error_code ec;
+
+ wchar_t **UnicodeCommandLine = CommandLineToArgvW(GetCommandLineW(),
+ &NewArgCount);
+ if (!UnicodeCommandLine)
+ return windows_error(::GetLastError());
+
+ Args.reserve(NewArgCount);
+
+ for (int i = 0; i < NewArgCount; ++i) {
+ SmallVector<char, MAX_PATH> NewArgString;
+ ec = windows::UTF16ToUTF8(UnicodeCommandLine[i],
+ wcslen(UnicodeCommandLine[i]),
+ NewArgString);
+ if (ec)
+ break;
+
+ char *Buffer = ArgAllocator.Allocate(NewArgString.size() + 1);
+ ::memcpy(Buffer, NewArgString.data(), NewArgString.size() + 1);
+ Args.push_back(Buffer);
+ }
+ LocalFree(UnicodeCommandLine);
+ if (ec)
+ return ec;
+
+ return error_code::success();
+}
+
bool Process::StandardInIsUserInput() {
return FileDescriptorIsDisplayed(0);
}
@@ -187,6 +257,11 @@ bool Process::StandardErrHasColors() {
return FileDescriptorHasColors(2);
}
+static bool UseANSI = false;
+void Process::UseANSIEscapeCodes(bool enable) {
+ UseANSI = enable;
+}
+
namespace {
class DefaultColors
{
@@ -208,10 +283,12 @@ DefaultColors defaultColors;
}
bool Process::ColorNeedsFlush() {
- return true;
+ return !UseANSI;
}
const char *Process::OutputBold(bool bg) {
+ if (UseANSI) return "\033[1m";
+
WORD colors = DefaultColors::GetCurrentColor();
if (bg)
colors |= BACKGROUND_INTENSITY;
@@ -222,6 +299,8 @@ const char *Process::OutputBold(bool bg) {
}
const char *Process::OutputColor(char code, bool bold, bool bg) {
+ if (UseANSI) return colorcodes[bg?1:0][bold?1:0][code&7];
+
WORD colors;
if (bg) {
colors = ((code&1) ? BACKGROUND_RED : 0) |
@@ -247,6 +326,8 @@ static WORD GetConsoleTextAttribute(HANDLE hConsoleOutput) {
}
const char *Process::OutputReverse() {
+ if (UseANSI) return "\033[7m";
+
const WORD attributes
= GetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE));
@@ -273,6 +354,7 @@ const char *Process::OutputReverse() {
}
const char *Process::ResetColor() {
+ if (UseANSI) return "\033[0m";
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors());
return 0;
}
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index 8165ef4..dc09738 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -24,16 +24,11 @@
//=== and must not be UNIX code
//===----------------------------------------------------------------------===//
-namespace {
- struct Win32ProcessInfo {
- HANDLE hProcess;
- DWORD dwProcessId;
- };
-}
-
namespace llvm {
using namespace sys;
+ProcessInfo::ProcessInfo() : ProcessHandle(0), Pid(0), ReturnCode(0) {}
+
// This function just uses the PATH environment variable to find the program.
std::string sys::FindProgramByName(const std::string &progName) {
// Check some degenerate cases
@@ -47,42 +42,39 @@ std::string sys::FindProgramByName(const std::string &progName) {
// At this point, the file name is valid and does not contain slashes.
// Let Windows search for it.
- std::string buffer;
- buffer.resize(MAX_PATH);
- char *dummy = NULL;
- DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH,
- &buffer[0], &dummy);
-
- // See if it wasn't found.
- if (len == 0)
+ SmallVector<wchar_t, MAX_PATH> progNameUnicode;
+ if (windows::UTF8ToUTF16(progName, progNameUnicode))
return "";
- // See if we got the entire path.
- if (len < MAX_PATH)
- return buffer;
+ SmallVector<wchar_t, MAX_PATH> buffer;
+ DWORD len = MAX_PATH;
+ do {
+ buffer.reserve(len);
+ len = ::SearchPathW(NULL, progNameUnicode.data(), L".exe",
+ buffer.capacity(), buffer.data(), NULL);
- // Buffer was too small; grow and retry.
- while (true) {
- buffer.resize(len+1);
- DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, &buffer[0], &dummy);
-
- // It is unlikely the search failed, but it's always possible some file
- // was added or removed since the last search, so be paranoid...
- if (len2 == 0)
+ // See if it wasn't found.
+ if (len == 0)
return "";
- else if (len2 <= len)
- return buffer;
- len = len2;
- }
+ // Buffer was too small; grow and retry.
+ } while (len > buffer.capacity());
+
+ buffer.set_size(len);
+ SmallVector<char, MAX_PATH> result;
+ if (windows::UTF16ToUTF8(buffer.begin(), buffer.size(), result))
+ return "";
+
+ return std::string(result.data(), result.size());
}
static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) {
HANDLE h;
if (path == 0) {
- DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
- GetCurrentProcess(), &h,
- 0, TRUE, DUPLICATE_SAME_ACCESS);
+ if (!DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
+ GetCurrentProcess(), &h,
+ 0, TRUE, DUPLICATE_SAME_ACCESS))
+ return INVALID_HANDLE_VALUE;
return h;
}
@@ -97,9 +89,13 @@ static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) {
sa.lpSecurityDescriptor = 0;
sa.bInheritHandle = TRUE;
- h = CreateFile(fname.c_str(), fd ? GENERIC_WRITE : GENERIC_READ,
- FILE_SHARE_READ, &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
- FILE_ATTRIBUTE_NORMAL, NULL);
+ SmallVector<wchar_t, 128> fnameUnicode;
+ if (windows::UTF8ToUTF16(fname, fnameUnicode))
+ return INVALID_HANDLE_VALUE;
+
+ h = CreateFileW(fnameUnicode.data(), fd ? GENERIC_WRITE : GENERIC_READ,
+ FILE_SHARE_READ, &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
+ FILE_ATTRIBUTE_NORMAL, NULL);
if (h == INVALID_HANDLE_VALUE) {
MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " +
(fd ? "input: " : "output: "));
@@ -171,13 +167,9 @@ static unsigned int ArgLenWithQuotes(const char *Str) {
}
-static bool Execute(void **Data,
- StringRef Program,
- const char** args,
- const char** envp,
- const StringRef** redirects,
- unsigned memoryLimit,
- std::string* ErrMsg) {
+static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
+ const char **envp, const StringRef **redirects,
+ unsigned memoryLimit, std::string *ErrMsg) {
if (!sys::fs::can_execute(Program)) {
if (ErrMsg)
*ErrMsg = "program not executable";
@@ -227,34 +219,28 @@ static bool Execute(void **Data,
*p = 0;
// The pointer to the environment block for the new process.
- OwningArrayPtr<char> envblock;
+ std::vector<wchar_t> EnvBlock;
if (envp) {
// An environment block consists of a null-terminated block of
// null-terminated strings. Convert the array of environment variables to
// an environment block by concatenating them.
+ for (unsigned i = 0; envp[i]; ++i) {
+ SmallVector<wchar_t, MAX_PATH> EnvString;
+ if (error_code ec = windows::UTF8ToUTF16(envp[i], EnvString)) {
+ SetLastError(ec.value());
+ MakeErrMsg(ErrMsg, "Unable to convert environment variable to UTF-16");
+ return false;
+ }
- // First, determine the length of the environment block.
- len = 0;
- for (unsigned i = 0; envp[i]; i++)
- len += strlen(envp[i]) + 1;
-
- // Now build the environment block.
- envblock.reset(new char[len+1]);
- p = envblock.get();
-
- for (unsigned i = 0; envp[i]; i++) {
- const char *ev = envp[i];
- size_t len = strlen(ev) + 1;
- memcpy(p, ev, len);
- p += len;
+ EnvBlock.insert(EnvBlock.end(), EnvString.begin(), EnvString.end());
+ EnvBlock.push_back(0);
}
-
- *p = 0;
+ EnvBlock.push_back(0);
}
// Create a child process.
- STARTUPINFO si;
+ STARTUPINFOW si;
memset(&si, 0, sizeof(si));
si.cb = sizeof(si);
si.hStdInput = INVALID_HANDLE_VALUE;
@@ -278,9 +264,14 @@ static bool Execute(void **Data,
if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) {
// If stdout and stderr should go to the same place, redirect stderr
// to the handle already open for stdout.
- DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
- GetCurrentProcess(), &si.hStdError,
- 0, TRUE, DUPLICATE_SAME_ACCESS);
+ if (!DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
+ GetCurrentProcess(), &si.hStdError,
+ 0, TRUE, DUPLICATE_SAME_ACCESS)) {
+ CloseHandle(si.hStdInput);
+ CloseHandle(si.hStdOutput);
+ MakeErrMsg(ErrMsg, "can't dup stderr to stdout");
+ return false;
+ }
} else {
// Just redirect stderr
si.hStdError = RedirectIO(redirects[2], 2, ErrMsg);
@@ -298,9 +289,27 @@ static bool Execute(void **Data,
fflush(stdout);
fflush(stderr);
- std::string ProgramStr = Program;
- BOOL rc = CreateProcess(ProgramStr.c_str(), command.get(), NULL, NULL, TRUE,
- 0, envblock.get(), NULL, &si, &pi);
+
+ SmallVector<wchar_t, MAX_PATH> ProgramUtf16;
+ if (error_code ec = windows::UTF8ToUTF16(Program, ProgramUtf16)) {
+ SetLastError(ec.value());
+ MakeErrMsg(ErrMsg,
+ std::string("Unable to convert application name to UTF-16"));
+ return false;
+ }
+
+ SmallVector<wchar_t, MAX_PATH> CommandUtf16;
+ if (error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) {
+ SetLastError(ec.value());
+ MakeErrMsg(ErrMsg,
+ std::string("Unable to convert command-line to UTF-16"));
+ return false;
+ }
+
+ BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0,
+ TRUE, CREATE_UNICODE_ENVIRONMENT,
+ EnvBlock.empty() ? 0 : EnvBlock.data(), 0, &si,
+ &pi);
DWORD err = GetLastError();
// Regardless of whether the process got created or not, we are done with
@@ -313,15 +322,12 @@ static bool Execute(void **Data,
if (!rc) {
SetLastError(err);
MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") +
- ProgramStr + "'");
+ Program.str() + "'");
return false;
}
- if (Data) {
- Win32ProcessInfo* wpi = new Win32ProcessInfo;
- wpi->hProcess = pi.hProcess;
- wpi->dwProcessId = pi.dwProcessId;
- *Data = wpi;
- }
+
+ PI.Pid = pi.dwProcessId;
+ PI.ProcessHandle = pi.hProcess;
// Make sure these get closed no matter what.
ScopedCommonHandle hThread(pi.hThread);
@@ -329,7 +335,7 @@ static bool Execute(void **Data,
// Assign the process to a job if a memory limit is defined.
ScopedJobHandle hJob;
if (memoryLimit != 0) {
- hJob = CreateJobObject(0, 0);
+ hJob = CreateJobObjectW(0, 0);
bool success = false;
if (hJob) {
JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
@@ -351,68 +357,72 @@ static bool Execute(void **Data,
}
}
- // Don't leak the handle if the caller doesn't want it.
- if (!Data)
- CloseHandle(pi.hProcess);
-
return true;
}
-static int WaitAux(Win32ProcessInfo *wpi, unsigned secondsToWait,
- std::string *ErrMsg) {
- // Wait for the process to terminate.
- HANDLE hProcess = wpi->hProcess;
- DWORD millisecondsToWait = INFINITE;
- if (secondsToWait > 0)
- millisecondsToWait = secondsToWait * 1000;
-
- if (WaitForSingleObject(hProcess, millisecondsToWait) == WAIT_TIMEOUT) {
- if (!TerminateProcess(hProcess, 1)) {
- MakeErrMsg(ErrMsg, "Failed to terminate timed-out program.");
- // -2 indicates a crash or timeout as opposed to failure to execute.
- return -2;
+namespace llvm {
+ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
+ bool WaitUntilChildTerminates, std::string *ErrMsg) {
+ assert(PI.Pid && "invalid pid to wait on, process not started?");
+ assert(PI.ProcessHandle &&
+ "invalid process handle to wait on, process not started?");
+ DWORD milliSecondsToWait = 0;
+ if (WaitUntilChildTerminates)
+ milliSecondsToWait = INFINITE;
+ else if (SecondsToWait > 0)
+ milliSecondsToWait = SecondsToWait * 1000;
+
+ ProcessInfo WaitResult = PI;
+ DWORD WaitStatus = WaitForSingleObject(PI.ProcessHandle, milliSecondsToWait);
+ if (WaitStatus == WAIT_TIMEOUT) {
+ if (SecondsToWait) {
+ if (!TerminateProcess(PI.ProcessHandle, 1)) {
+ if (ErrMsg)
+ MakeErrMsg(ErrMsg, "Failed to terminate timed-out program.");
+
+ // -2 indicates a crash or timeout as opposed to failure to execute.
+ WaitResult.ReturnCode = -2;
+ CloseHandle(PI.ProcessHandle);
+ return WaitResult;
+ }
+ WaitForSingleObject(PI.ProcessHandle, INFINITE);
+ CloseHandle(PI.ProcessHandle);
+ } else {
+ // Non-blocking wait.
+ return ProcessInfo();
}
- WaitForSingleObject(hProcess, INFINITE);
}
// Get its exit status.
DWORD status;
- BOOL rc = GetExitCodeProcess(hProcess, &status);
+ BOOL rc = GetExitCodeProcess(PI.ProcessHandle, &status);
DWORD err = GetLastError();
+ CloseHandle(PI.ProcessHandle);
if (!rc) {
SetLastError(err);
- MakeErrMsg(ErrMsg, "Failed getting status for program.");
+ if (ErrMsg)
+ MakeErrMsg(ErrMsg, "Failed getting status for program.");
+
// -2 indicates a crash or timeout as opposed to failure to execute.
- return -2;
+ WaitResult.ReturnCode = -2;
+ return WaitResult;
}
if (!status)
- return 0;
+ return WaitResult;
// Pass 10(Warning) and 11(Error) to the callee as negative value.
if ((status & 0xBFFF0000U) == 0x80000000U)
- return (int)status;
-
- if (status & 0xFF)
- return status & 0x7FFFFFFF;
-
- return 1;
-}
-
-static int Wait(void *&Data, StringRef Program, unsigned secondsToWait,
- std::string *ErrMsg) {
- Win32ProcessInfo *wpi = reinterpret_cast<Win32ProcessInfo *>(Data);
- int Ret = WaitAux(wpi, secondsToWait, ErrMsg);
-
- CloseHandle(wpi->hProcess);
- delete wpi;
- Data = 0;
+ WaitResult.ReturnCode = static_cast<int>(status);
+ else if (status & 0xFF)
+ WaitResult.ReturnCode = status & 0x7FFFFFFF;
+ else
+ WaitResult.ReturnCode = 1;
- return Ret;
+ return WaitResult;
}
-namespace llvm {
error_code sys::ChangeStdinToBinary(){
int result = _setmode( _fileno(stdin), _O_BINARY );
if (result == -1)
@@ -449,5 +459,4 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
}
return true;
}
-
}
diff --git a/lib/Support/Windows/RWMutex.inc b/lib/Support/Windows/RWMutex.inc
index 9593923..c431844 100644
--- a/lib/Support/Windows/RWMutex.inc
+++ b/lib/Support/Windows/RWMutex.inc
@@ -48,8 +48,7 @@ static bool loadSRW() {
if (!sChecked) {
sChecked = true;
- HMODULE hLib = ::LoadLibrary(TEXT("Kernel32"));
- if (hLib) {
+ if (HMODULE hLib = ::GetModuleHandleW(L"Kernel32.dll")) {
fpInitializeSRWLock =
(VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
"InitializeSRWLock");
@@ -65,7 +64,6 @@ static bool loadSRW() {
fpReleaseSRWLockShared =
(VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib,
"ReleaseSRWLockShared");
- ::FreeLibrary(hLib);
if (fpInitializeSRWLock != NULL) {
sHasSRW = true;
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index bce83b9..4b40d51 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -135,7 +135,7 @@ typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64);
static fpSymFunctionTableAccess64 SymFunctionTableAccess64;
static bool load64BitDebugHelp(void) {
- HMODULE hLib = ::LoadLibrary("Dbghelp.dll");
+ HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll");
if (hLib) {
StackWalk64 = (fpStackWalk64)
::GetProcAddress(hLib, "StackWalk64");
diff --git a/lib/Support/Windows/TimeValue.inc b/lib/Support/Windows/TimeValue.inc
index 96f5579..98b07d6 100644
--- a/lib/Support/Windows/TimeValue.inc
+++ b/lib/Support/Windows/TimeValue.inc
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#include "Windows.h"
+#include <cctype>
#include <time.h>
-namespace llvm {
-using namespace sys;
+using namespace llvm;
+using namespace llvm::sys;
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only Win32 specific code.
@@ -49,13 +50,10 @@ std::string TimeValue::str() const {
char Buffer[25];
// FIXME: the windows version of strftime doesn't support %e
strftime(Buffer, 25, "%b %d %H:%M %Y", LT);
- assert((Buffer[3] == ' ' && isdigit(Buffer[5]) && Buffer[6] == ' ') ||
+ assert((Buffer[3] == ' ' && isdigit(Buffer[5]) && Buffer[6] == ' ') &&
"Unexpected format in strftime()!");
// Emulate %e on %d to mute '0'.
if (Buffer[4] == '0')
Buffer[4] = ' ';
return std::string(Buffer);
}
-
-
-}
diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h
index 4cdac78..1f3417d 100644
--- a/lib/Support/Windows/Windows.h
+++ b/lib/Support/Windows/Windows.h
@@ -24,23 +24,31 @@
#define _WIN32_IE 0x0600 // MinGW at it again.
#define WIN32_LEAN_AND_MEAN
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h" // Get build system configuration settings
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/system_error.h"
#include <windows.h>
#include <wincrypt.h>
-#include <shlobj.h>
#include <cassert>
#include <string>
+#include <vector>
inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
if (!ErrMsg)
return true;
char *buffer = NULL;
- FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
- NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
- *ErrMsg = prefix + buffer;
+ DWORD R = FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+ FORMAT_MESSAGE_FROM_SYSTEM,
+ NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
+ if (R)
+ *ErrMsg = prefix + buffer;
+ else
+ *ErrMsg = prefix + "Unknown error";
+
LocalFree(buffer);
- return true;
+ return R != 0;
}
template <typename HandleTraits>
@@ -148,4 +156,13 @@ c_str(SmallVectorImpl<T> &str) {
str.pop_back();
return str.data();
}
+
+namespace sys {
+namespace windows {
+error_code UTF8ToUTF16(StringRef utf8,
+ SmallVectorImpl<wchar_t> &utf16);
+error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+ SmallVectorImpl<char> &utf8);
+} // end namespace windows
+} // end namespace sys
} // end namespace llvm.
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 213f5e1..9495cd4 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -96,6 +96,15 @@ static EncodingInfo getUnicodeEncoding(StringRef Input) {
namespace llvm {
namespace yaml {
+/// Pin the vtables to this file.
+void Node::anchor() {}
+void NullNode::anchor() {}
+void ScalarNode::anchor() {}
+void KeyValueNode::anchor() {}
+void MappingNode::anchor() {}
+void SequenceNode::anchor() {}
+void AliasNode::anchor() {}
+
/// Token - A single YAML token.
struct Token : ilist_node<Token> {
enum TokenKind {
@@ -1070,14 +1079,22 @@ bool Scanner::scanDirective() {
Current = skip_while(&Scanner::skip_ns_char, Current);
StringRef Name(NameStart, Current - NameStart);
Current = skip_while(&Scanner::skip_s_white, Current);
-
+
+ Token T;
if (Name == "YAML") {
Current = skip_while(&Scanner::skip_ns_char, Current);
- Token T;
T.Kind = Token::TK_VersionDirective;
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
return true;
+ } else if(Name == "TAG") {
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ Current = skip_while(&Scanner::skip_s_white, Current);
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ T.Kind = Token::TK_TagDirective;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+ return true;
}
return false;
}
@@ -1564,10 +1581,6 @@ void Stream::printError(Node *N, const Twine &Msg) {
, Ranges);
}
-void Stream::handleYAMLDirective(const Token &t) {
- // TODO: Ensure version is 1.x.
-}
-
document_iterator Stream::begin() {
if (CurrentDoc)
report_fatal_error("Can only iterate over the stream once");
@@ -1588,14 +1601,59 @@ void Stream::skip() {
i->skip();
}
-Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A)
+Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A, StringRef T)
: Doc(D)
, TypeID(Type)
- , Anchor(A) {
+ , Anchor(A)
+ , Tag(T) {
SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
SourceRange = SMRange(Start, Start);
}
+std::string Node::getVerbatimTag() const {
+ StringRef Raw = getRawTag();
+ if (!Raw.empty() && Raw != "!") {
+ std::string Ret;
+ if (Raw.find_last_of('!') == 0) {
+ Ret = Doc->getTagMap().find("!")->second;
+ Ret += Raw.substr(1);
+ return llvm_move(Ret);
+ } else if (Raw.startswith("!!")) {
+ Ret = Doc->getTagMap().find("!!")->second;
+ Ret += Raw.substr(2);
+ return llvm_move(Ret);
+ } else {
+ StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
+ std::map<StringRef, StringRef>::const_iterator It =
+ Doc->getTagMap().find(TagHandle);
+ if (It != Doc->getTagMap().end())
+ Ret = It->second;
+ else {
+ Token T;
+ T.Kind = Token::TK_Tag;
+ T.Range = TagHandle;
+ setError(Twine("Unknown tag handle ") + TagHandle, T);
+ }
+ Ret += Raw.substr(Raw.find_last_of('!') + 1);
+ return llvm_move(Ret);
+ }
+ }
+
+ switch (getType()) {
+ case NK_Null:
+ return "tag:yaml.org,2002:null";
+ case NK_Scalar:
+ // TODO: Tag resolution.
+ return "tag:yaml.org,2002:str";
+ case NK_Mapping:
+ return "tag:yaml.org,2002:map";
+ case NK_Sequence:
+ return "tag:yaml.org,2002:seq";
+ }
+
+ return "";
+}
+
Token &Node::peekNext() {
return Doc->peekNext();
}
@@ -1999,6 +2057,10 @@ void SequenceNode::increment() {
}
Document::Document(Stream &S) : stream(S), Root(0) {
+ // Tag maps starts with two default mappings.
+ TagMap["!"] = "!";
+ TagMap["!!"] = "tag:yaml.org,2002:";
+
if (parseDirectives())
expectToken(Token::TK_DocumentStart);
Token &T = peekNext();
@@ -2042,6 +2104,7 @@ Node *Document::parseBlockNode() {
Token T = peekNext();
// Handle properties.
Token AnchorInfo;
+ Token TagInfo;
parse_property:
switch (T.Kind) {
case Token::TK_Alias:
@@ -2056,7 +2119,11 @@ parse_property:
T = peekNext();
goto parse_property;
case Token::TK_Tag:
- getNext(); // Skip TK_Tag.
+ if (TagInfo.Kind == Token::TK_Tag) {
+ setError("Already encountered a tag for this node!", T);
+ return 0;
+ }
+ TagInfo = getNext(); // Consume TK_Tag.
T = peekNext();
goto parse_property;
default:
@@ -2070,42 +2137,49 @@ parse_property:
// Don't eat the TK_BlockEntry, SequenceNode needs it.
return new (NodeAllocator) SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, SequenceNode::ST_Indentless);
case Token::TK_BlockSequenceStart:
getNext();
return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, SequenceNode::ST_Block);
case Token::TK_BlockMappingStart:
getNext();
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, MappingNode::MT_Block);
case Token::TK_FlowSequenceStart:
getNext();
return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, SequenceNode::ST_Flow);
case Token::TK_FlowMappingStart:
getNext();
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, MappingNode::MT_Flow);
case Token::TK_Scalar:
getNext();
return new (NodeAllocator)
ScalarNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, T.Range);
case Token::TK_Key:
// Don't eat the TK_Key, KeyValueNode expects it.
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, MappingNode::MT_Inline);
case Token::TK_DocumentStart:
case Token::TK_DocumentEnd:
@@ -2126,10 +2200,10 @@ bool Document::parseDirectives() {
while (true) {
Token T = peekNext();
if (T.Kind == Token::TK_TagDirective) {
- handleTagDirective(getNext());
+ parseTAGDirective();
isDirective = true;
} else if (T.Kind == Token::TK_VersionDirective) {
- stream.handleYAMLDirective(getNext());
+ parseYAMLDirective();
isDirective = true;
} else
break;
@@ -2137,6 +2211,21 @@ bool Document::parseDirectives() {
return isDirective;
}
+void Document::parseYAMLDirective() {
+ getNext(); // Eat %YAML <version>
+}
+
+void Document::parseTAGDirective() {
+ Token Tag = getNext(); // %TAG <handle> <prefix>
+ StringRef T = Tag.Range;
+ // Strip %TAG
+ T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
+ std::size_t HandleEnd = T.find_first_of(" \t");
+ StringRef TagHandle = T.substr(0, HandleEnd);
+ StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
+ TagMap[TagHandle] = TagPrefix;
+}
+
bool Document::expectToken(int TK) {
Token T = getNext();
if (T.Kind != TK) {
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index b0cd415..42bff96 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -15,6 +15,7 @@
#include "llvm/Support/YAMLParser.h"
#include "llvm/Support/raw_ostream.h"
#include <cstring>
+#include <cctype>
using namespace llvm;
using namespace yaml;
@@ -40,32 +41,43 @@ void IO::setContext(void *Context) {
// Input
//===----------------------------------------------------------------------===//
-Input::Input(StringRef InputContent, void *Ctxt)
- : IO(Ctxt),
+Input::Input(StringRef InputContent,
+ void *Ctxt,
+ SourceMgr::DiagHandlerTy DiagHandler,
+ void *DiagHandlerCtxt)
+ : IO(Ctxt),
Strm(new Stream(InputContent, SrcMgr)),
CurrentNode(NULL) {
+ if (DiagHandler)
+ SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt);
DocIterator = Strm->begin();
}
Input::~Input() {
-
}
error_code Input::error() {
return EC;
}
-void Input::setDiagHandler(SourceMgr::DiagHandlerTy Handler, void *Ctxt) {
- SrcMgr.setDiagHandler(Handler, Ctxt);
-}
+// Pin the vtables to this file.
+void Input::HNode::anchor() {}
+void Input::EmptyHNode::anchor() {}
+void Input::ScalarHNode::anchor() {}
-bool Input::outputting() {
+bool Input::outputting() const {
return false;
}
bool Input::setCurrentDocument() {
if (DocIterator != Strm->end()) {
Node *N = DocIterator->getRoot();
+ if (!N) {
+ assert(Strm->failed() && "Root is NULL iff parsing failed");
+ EC = make_error_code(errc::invalid_argument);
+ return false;
+ }
+
if (isa<NullNode>(N)) {
// Empty files are allowed and ignored
++DocIterator;
@@ -82,10 +94,21 @@ void Input::nextDocument() {
++DocIterator;
}
+bool Input::mapTag(StringRef Tag, bool Default) {
+ std::string foundTag = CurrentNode->_node->getVerbatimTag();
+ if (foundTag.empty()) {
+ // If no tag found and 'Tag' is the default, say it was found.
+ return Default;
+ }
+ // Return true iff found tag matches supplied tag.
+ return Tag.equals(foundTag);
+}
+
void Input::beginMapping() {
if (EC)
return;
- MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
+ // CurrentNode can be null if the document is empty.
+ MapHNode *MN = dyn_cast_or_null<MapHNode>(CurrentNode);
if (MN) {
MN->ValidKeys.clear();
}
@@ -96,6 +119,15 @@ bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
UseDefault = false;
if (EC)
return false;
+
+ // CurrentNode is null for empty documents, which is an error in case required
+ // nodes are present.
+ if (!CurrentNode) {
+ if (Required)
+ EC = make_error_code(errc::invalid_argument);
+ return false;
+ }
+
MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
if (!MN) {
setError(CurrentNode, "not a mapping");
@@ -122,7 +154,8 @@ void Input::postflightKey(void *saveInfo) {
void Input::endMapping() {
if (EC)
return;
- MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
+ // CurrentNode can be null if the document is empty.
+ MapHNode *MN = dyn_cast_or_null<MapHNode>(CurrentNode);
if (!MN)
return;
for (MapHNode::NameToNode::iterator i = MN->Mapping.begin(),
@@ -263,6 +296,7 @@ void Input::scalarString(StringRef &S) {
}
void Input::setError(HNode *hnode, const Twine &message) {
+ assert(hnode && "HNode must not be NULL");
this->setError(hnode->_node, message);
}
@@ -334,6 +368,10 @@ void Input::setError(const Twine &Message) {
this->setError(CurrentNode, Message);
}
+bool Input::canElideEmptySequence() {
+ return false;
+}
+
Input::MapHNode::~MapHNode() {
for (MapHNode::NameToNode::iterator i = Mapping.begin(), End = Mapping.end();
i != End; ++i) {
@@ -368,7 +406,7 @@ Output::Output(raw_ostream &yout, void *context)
Output::~Output() {
}
-bool Output::outputting() {
+bool Output::outputting() const {
return true;
}
@@ -377,6 +415,14 @@ void Output::beginMapping() {
NeedsNewLine = true;
}
+bool Output::mapTag(StringRef Tag, bool Use) {
+ if (Use) {
+ this->output(" ");
+ this->output(Tag);
+ }
+ return Use;
+}
+
void Output::endMapping() {
StateStack.pop_back();
}
@@ -505,9 +551,20 @@ void Output::endBitSetScalar() {
}
void Output::scalarString(StringRef &S) {
+ const char ScalarSafeChars[] = "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-/^., \t";
+
this->newLineCheck();
- if (S.find('\n') == StringRef::npos) {
- // No embedded new-line chars, just print string.
+ if (S.empty()) {
+ // Print '' for the empty string because leaving the field empty is not
+ // allowed.
+ this->outputUpToEndOfLine("''");
+ return;
+ }
+ if (S.find_first_not_of(ScalarSafeChars) == StringRef::npos &&
+ !isspace(S.front()) && !isspace(S.back())) {
+ // If the string consists only of safe characters, print it out without
+ // quotes.
this->outputUpToEndOfLine(S);
return;
}
@@ -532,6 +589,19 @@ void Output::scalarString(StringRef &S) {
void Output::setError(const Twine &message) {
}
+bool Output::canElideEmptySequence() {
+ // Normally, with an optional key/value where the value is an empty sequence,
+ // the whole key/value can be not written. But, that produces wrong yaml
+ // if the key/value is the only thing in the map and the map is used in
+ // a sequence. This detects if the this sequence is the first key/value
+ // in map that itself is embedded in a sequnce.
+ if (StateStack.size() < 2)
+ return true;
+ if (StateStack.back() != inMapFirstKey)
+ return true;
+ return (StateStack[StateStack.size()-2] != inSeq);
+}
+
void Output::output(StringRef s) {
Column += s.size();
Out << s;
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 92fa8b5..cb96489 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -447,7 +447,8 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
error_code EC = sys::fs::openFileForWrite(Filename, FD, Flags);
if (EC) {
- ErrorInfo = "Error opening output file '" + std::string(Filename) + "'";
+ ErrorInfo = "Error opening output file '" + std::string(Filename) + "': " +
+ EC.message();
ShouldClose = false;
return;
}
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 9ad2053..431f4aa 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -557,9 +557,23 @@ Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const {
return const_cast<BitsInit *>(this);
}
+namespace {
+ template<typename T>
+ class Pool : public T {
+ public:
+ ~Pool();
+ };
+ template<typename T>
+ Pool<T>::~Pool() {
+ for (typename T::iterator I = this->begin(), E = this->end(); I != E; ++I) {
+ typename T::value_type &Item = *I;
+ delete Item.second;
+ }
+ }
+}
+
IntInit *IntInit::get(int64_t V) {
- typedef DenseMap<int64_t, IntInit *> Pool;
- static Pool ThePool;
+ static Pool<DenseMap<int64_t, IntInit *> > ThePool;
IntInit *&I = ThePool[V];
if (!I) I = new IntInit(V);
@@ -586,8 +600,7 @@ IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
void StringInit::anchor() { }
StringInit *StringInit::get(StringRef V) {
- typedef StringMap<StringInit *> Pool;
- static Pool ThePool;
+ static Pool<StringMap<StringInit *> > ThePool;
StringInit *&I = ThePool[V];
if (!I) I = new StringInit(V);
@@ -726,9 +739,7 @@ Init *OpInit::getBit(unsigned Bit) const {
UnOpInit *UnOpInit::get(UnaryOp opc, Init *lhs, RecTy *Type) {
typedef std::pair<std::pair<unsigned, Init *>, RecTy *> Key;
-
- typedef DenseMap<Key, UnOpInit *> Pool;
- static Pool ThePool;
+ static Pool<DenseMap<Key, UnOpInit *> > ThePool;
Key TheKey(std::make_pair(std::make_pair(opc, lhs), Type));
@@ -873,8 +884,7 @@ BinOpInit *BinOpInit::get(BinaryOp opc, Init *lhs,
RecTy *
> Key;
- typedef DenseMap<Key, BinOpInit *> Pool;
- static Pool ThePool;
+ static Pool<DenseMap<Key, BinOpInit *> > ThePool;
Key TheKey(std::make_pair(std::make_pair(std::make_pair(opc, lhs), rhs),
Type));
@@ -1298,8 +1308,7 @@ VarInit *VarInit::get(const std::string &VN, RecTy *T) {
VarInit *VarInit::get(Init *VN, RecTy *T) {
typedef std::pair<RecTy *, Init *> Key;
- typedef DenseMap<Key, VarInit *> Pool;
- static Pool ThePool;
+ static Pool<DenseMap<Key, VarInit *> > ThePool;
Key TheKey(std::make_pair(T, VN));
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 965cd00..daac574 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -2496,6 +2496,9 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
if (Lex.getCode() != tgtok::comma) break;
Lex.Lex(); // eat ','.
+ if (Lex.getCode() != tgtok::Id)
+ return TokError("expected identifier");
+
SubClassLoc = Lex.getLoc();
// A defm can inherit from regular classes (non-multiclass) as
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index e17052b..9c2c69a 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -21,8 +21,11 @@ include "llvm/Target/Target.td"
// AArch64 Subtarget features.
//
+def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
+ "Enable ARMv8 FP">;
+
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
- "Enable Advanced SIMD instructions">;
+ "Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
"Enable cryptographic instructions">;
@@ -33,7 +36,7 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
include "AArch64Schedule.td"
-def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>;
+def : Processor<"generic", GenericItineraries, [FeatureFPARMv8]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 9498722..d59ca56 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -32,17 +32,18 @@ using namespace llvm;
/// argument to be printed as "bN".
static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
const TargetRegisterInfo *TRI,
- const TargetRegisterClass &RegClass,
- raw_ostream &O) {
+ char RegType, raw_ostream &O) {
if (!MO.isReg())
return true;
for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
- if (RegClass.contains(*AR)) {
- O << AArch64InstPrinter::getRegisterName(*AR);
+ if (AArch64::FPR8RegClass.contains(*AR)) {
+ O << RegType << TRI->getEncodingValue(MO.getReg());
return false;
}
}
+
+ // The register doesn't correspond to anything floating-point like.
return true;
}
@@ -81,9 +82,9 @@ bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
StringRef Modifier;
switch (MO.getType()) {
default:
- llvm_unreachable("Unexpected operand for symbolic address constraint");
+ return true;
case MachineOperand::MO_GlobalAddress:
- Name = Mang->getSymbol(MO.getGlobal())->getName();
+ Name = getSymbol(MO.getGlobal())->getName();
// Global variables may be accessed either via a GOT or in various fun and
// interesting TLS-model specific ways. Set the prefix modifier as
@@ -145,57 +146,29 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
- if (!ExtraCode || !ExtraCode[0]) {
- // There's actually no operand modifier, which leads to a slightly eclectic
- // set of behaviour which we have to handle here.
- const MachineOperand &MO = MI->getOperand(OpNum);
- switch (MO.getType()) {
- default:
- llvm_unreachable("Unexpected operand for inline assembly");
- case MachineOperand::MO_Register:
- // GCC prints the unmodified operand of a 'w' constraint as the vector
- // register. Technically, we could allocate the argument as a VPR128, but
- // that leads to extremely dodgy copies being generated to get the data
- // there.
- if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O))
- O << AArch64InstPrinter::getRegisterName(MO.getReg());
- break;
- case MachineOperand::MO_Immediate:
- O << '#' << MO.getImm();
- break;
- case MachineOperand::MO_FPImmediate:
- assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
- O << "#0.0";
- break;
- case MachineOperand::MO_BlockAddress:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_ExternalSymbol:
- return printSymbolicAddress(MO, false, "", O);
- }
- return false;
- }
- // We have a real modifier to handle.
+ if (!ExtraCode)
+ ExtraCode = "";
+
switch(ExtraCode[0]) {
default:
- // See if this is a generic operand
- return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
- case 'c': // Don't print "#" before an immediate operand.
- if (!MI->getOperand(OpNum).isImm())
- return true;
- O << MI->getOperand(OpNum).getImm();
- return false;
+ if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O))
+ return false;
+ break;
case 'w':
// Output 32-bit general register operand, constant zero as wzr, or stack
// pointer as wsp. Ignored when used with other operand types.
- return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::GPR32RegClass, O);
+ if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::GPR32RegClass, O))
+ return false;
+ break;
case 'x':
// Output 64-bit general register operand, constant zero as xzr, or stack
// pointer as sp. Ignored when used with other operand types.
- return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::GPR64RegClass, O);
+ if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::GPR64RegClass, O))
+ return false;
+ break;
case 'H':
// Output higher numbered of a 64-bit general register pair
case 'Q':
@@ -211,40 +184,65 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// copies ...).
llvm_unreachable("FIXME: Unimplemented register pairs");
case 'b':
- // Output 8-bit FP/SIMD scalar register operand, prefixed with b.
- return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::FPR8RegClass, O);
case 'h':
- // Output 16-bit FP/SIMD scalar register operand, prefixed with h.
- return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::FPR16RegClass, O);
case 's':
- // Output 32-bit FP/SIMD scalar register operand, prefixed with s.
- return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::FPR32RegClass, O);
case 'd':
- // Output 64-bit FP/SIMD scalar register operand, prefixed with d.
- return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::FPR64RegClass, O);
case 'q':
- // Output 128-bit FP/SIMD scalar register operand, prefixed with q.
- return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::FPR128RegClass, O);
+ if (!printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ ExtraCode[0], O))
+ return false;
+ break;
case 'A':
// Output symbolic address with appropriate relocation modifier (also
// suitable for ADRP).
- return printSymbolicAddress(MI->getOperand(OpNum), false, "", O);
+ if (!printSymbolicAddress(MI->getOperand(OpNum), false, "", O))
+ return false;
+ break;
case 'L':
// Output bits 11:0 of symbolic address with appropriate :lo12: relocation
// modifier.
- return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O);
+ if (!printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O))
+ return false;
+ break;
case 'G':
// Output bits 23:12 of symbolic address with appropriate :hi12: relocation
// modifier (currently only for TLS local exec).
- return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O);
+ if (!printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O))
+ return false;
+ break;
+ case 'a':
+ return PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O);
}
+ // There's actually no operand modifier, which leads to a slightly eclectic
+ // set of behaviour which we have to handle here.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unexpected operand for inline assembly");
+ case MachineOperand::MO_Register:
+ // GCC prints the unmodified operand of a 'w' constraint as the vector
+ // register. Technically, we could allocate the argument as a VPR128, but
+ // that leads to extremely dodgy copies being generated to get the data
+ // there.
+ if (printModifiedFPRAsmOperand(MO, TRI, 'v', O))
+ O << AArch64InstPrinter::getRegisterName(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ O << '#' << MO.getImm();
+ break;
+ case MachineOperand::MO_FPImmediate:
+ assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
+ O << "#0.0";
+ break;
+ case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ return printSymbolicAddress(MO, false, "", O);
+ }
+ return false;
}
bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td
index bff7eeb..a2a9f3f 100644
--- a/lib/Target/AArch64/AArch64CallingConv.td
+++ b/lib/Target/AArch64/AArch64CallingConv.td
@@ -59,9 +59,9 @@ def CC_A64_APCS : CallingConv<[
// Canonicalise the various types that live in different floating-point
// registers. This makes sense because the PCS does not distinguish Short
// Vectors and Floating-point types.
- CCIfType<[v2i8], CCBitConvertToType<f16>>,
- CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
- CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType<f64>>,
+ CCIfType<[v1i16, v2i8], CCBitConvertToType<f16>>,
+ CCIfType<[v1i32, v4i8, v2i16, v1f32], CCBitConvertToType<f32>>,
+ CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType<f64>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCBitConvertToType<f128>>,
@@ -70,7 +70,8 @@ def CC_A64_APCS : CallingConv<[
// argument is allocated to the least significant bits of register
// v[NSRN]. The NSRN is incremented by one. The argument has now been
// allocated."
- CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
+ CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
+ CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ee819e0..ef99541 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -109,6 +109,45 @@ public:
SDNode* Select(SDNode*);
private:
+ /// Get the opcode for table lookup instruction
+ unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec);
+
+ /// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4.
+ /// IsExt is to indicate if the result will be extended with an argument.
+ SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt);
+
+ /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4.
+ SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcode);
+
+ /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4.
+ SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcodes);
+
+ /// Form sequences of consecutive 64/128-bit registers for use in NEON
+ /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
+ /// between 1 and 4 elements. If it contains a single element that is returned
+ /// unchanged; otherwise a REG_SEQUENCE value is returned.
+ SDValue createDTuple(ArrayRef<SDValue> Vecs);
+ SDValue createQTuple(ArrayRef<SDValue> Vecs);
+
+ /// Generic helper for the createDTuple/createQTuple
+ /// functions. Those should almost always be called instead.
+ SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
+ unsigned SubRegs[]);
+
+ /// Select NEON load-duplicate intrinsics. NumVecs should be 2, 3 or 4.
+ /// The opcode array specifies the instructions used for load.
+ SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcodes);
+
+ /// Select NEON load/store lane intrinsics. NumVecs should be 2, 3 or 4.
+ /// The opcode arrays specify the instructions used for load/store.
+ SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
+ unsigned NumVecs, const uint16_t *Opcodes);
+
+ SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
+ SDValue Operand);
};
}
@@ -390,12 +429,607 @@ SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
&Ops[0], Ops.size());
}
+SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
+ static unsigned RegClassIDs[] = { AArch64::DPairRegClassID,
+ AArch64::DTripleRegClassID,
+ AArch64::DQuadRegClassID };
+ static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1,
+ AArch64::dsub_2, AArch64::dsub_3 };
+
+ return createTuple(Regs, RegClassIDs, SubRegs);
+}
+
+SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
+ static unsigned RegClassIDs[] = { AArch64::QPairRegClassID,
+ AArch64::QTripleRegClassID,
+ AArch64::QQuadRegClassID };
+ static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1,
+ AArch64::qsub_2, AArch64::qsub_3 };
+
+ return createTuple(Regs, RegClassIDs, SubRegs);
+}
+
+SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
+ unsigned RegClassIDs[],
+ unsigned SubRegs[]) {
+ // There's no special register-class for a vector-list of 1 element: it's just
+ // a vector.
+ if (Regs.size() == 1)
+ return Regs[0];
+
+ assert(Regs.size() >= 2 && Regs.size() <= 4);
+
+ SDLoc DL(Regs[0].getNode());
+
+ SmallVector<SDValue, 4> Ops;
+
+ // First operand of REG_SEQUENCE is the desired RegClass.
+ Ops.push_back(
+ CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
+
+ // Then we get pairs of source & subregister-position for the components.
+ for (unsigned i = 0; i < Regs.size(); ++i) {
+ Ops.push_back(Regs[i]);
+ Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
+ }
+
+ SDNode *N =
+ CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
+ return SDValue(N, 0);
+}
+
+
+// Get the register stride update opcode of a VLD/VST instruction that
+// is otherwise equivalent to the given fixed stride updating instruction.
+static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
+ switch (Opc) {
+ default: break;
+ case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register;
+ case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register;
+ case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register;
+ case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register;
+ case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register;
+ case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register;
+ case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register;
+ case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register;
+
+ case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
+ case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
+ case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
+ case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
+ case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
+ case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
+ case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register;
+
+ case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
+ case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
+ case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
+ case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
+ case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
+ case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
+ case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register;
+
+ case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
+ case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
+ case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
+ case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
+ case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
+ case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
+ case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
+
+ case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register;
+ case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register;
+ case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register;
+ case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register;
+ case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register;
+ case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register;
+ case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register;
+ case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register;
+
+ case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register;
+ case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register;
+ case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register;
+ case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register;
+ case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register;
+ case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register;
+ case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register;
+ case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register;
+
+ case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register;
+ case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register;
+ case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register;
+ case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register;
+ case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register;
+ case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register;
+ case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register;
+ case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register;
+
+ case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
+ case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
+ case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
+ case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register;
+ case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register;
+ case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register;
+ case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register;
+ case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register;
+
+ case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
+ case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
+ case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
+ case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
+ case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
+ case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
+ case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register;
+
+ case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
+ case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
+ case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
+ case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
+ case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
+ case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
+ case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register;
+
+ case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
+ case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
+ case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
+ case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
+ case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
+ case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
+ case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
+
+ case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register;
+ case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register;
+ case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register;
+ case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register;
+ case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register;
+ case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register;
+ case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register;
+ case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register;
+
+ case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register;
+ case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register;
+ case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register;
+ case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register;
+ case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register;
+ case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register;
+ case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register;
+ case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register;
+
+ case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register;
+ case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register;
+ case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register;
+ case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register;
+ case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register;
+ case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register;
+ case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register;
+ case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register;
+
+ // Post-index of duplicate loads
+ case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register;
+ case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register;
+ case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register;
+ case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register;
+ case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register;
+ case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register;
+ case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register;
+ case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register;
+
+ case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register;
+ case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register;
+ case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register;
+ case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register;
+ case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register;
+ case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register;
+ case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register;
+ case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register;
+
+ case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register;
+ case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register;
+ case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register;
+ case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register;
+ case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register;
+ case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register;
+ case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register;
+ case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register;
+
+ // Post-index of lane loads
+ case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register;
+ case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register;
+ case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register;
+ case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register;
+
+ case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register;
+ case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register;
+ case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register;
+ case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register;
+
+ case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register;
+ case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register;
+ case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register;
+ case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register;
+
+ // Post-index of lane stores
+ case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register;
+ case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register;
+ case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register;
+ case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register;
+
+ case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register;
+ case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register;
+ case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register;
+ case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register;
+
+ case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register;
+ case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register;
+ case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register;
+ case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register;
+ }
+ return Opc; // If not one we handle, return it unchanged.
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating,
+ unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+
+ EVT VT = N->getValueType(0);
+ unsigned OpcodeIndex;
+ bool is64BitVector = VT.is64BitVector();
+ switch (VT.getScalarType().getSizeInBits()) {
+ case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
+ case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
+ case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
+ case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
+ default: llvm_unreachable("unhandled vector load type");
+ }
+ unsigned Opc = Opcodes[OpcodeIndex];
+
+ SmallVector<SDValue, 2> Ops;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
+
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ }
+
+ Ops.push_back(N->getOperand(0)); // Push back the Chain
+
+ SmallVector<EVT, 3> ResTys;
+ // Push back the type of return super register
+ if (NumVecs == 1)
+ ResTys.push_back(VT);
+ else if (NumVecs == 3)
+ ResTys.push_back(MVT::Untyped);
+ else {
+ EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
+ is64BitVector ? NumVecs : NumVecs * 2);
+ ResTys.push_back(ResTy);
+ }
+
+ if (isUpdating)
+ ResTys.push_back(MVT::i64); // Type of the updated register
+ ResTys.push_back(MVT::Other); // Type of the Chain
+ SDLoc dl(N);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+
+ if (NumVecs == 1)
+ return VLd;
+
+ // If NumVecs > 1, the return result is a super register containing 2-4
+ // consecutive vector registers.
+ SDValue SuperReg = SDValue(VLd, 0);
+
+ unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ // Update users of the Chain
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
+
+ return NULL;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating,
+ unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ SDLoc dl(N);
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3;
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ unsigned OpcodeIndex;
+ bool is64BitVector = VT.is64BitVector();
+ switch (VT.getScalarType().getSizeInBits()) {
+ case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
+ case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
+ case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
+ case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
+ default: llvm_unreachable("unhandled vector store type");
+ }
+ unsigned Opc = Opcodes[OpcodeIndex];
+
+ SmallVector<EVT, 2> ResTys;
+ if (isUpdating)
+ ResTys.push_back(MVT::i64);
+ ResTys.push_back(MVT::Other); // Type for the Chain
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
+
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ }
+
+ SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
+ N->op_begin() + Vec0Idx + NumVecs);
+ SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs);
+ Ops.push_back(SrcReg);
+
+ // Push back the Chain
+ Ops.push_back(N->getOperand(0));
+
+ // Transfer memoperands.
+ SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+
+ return VSt;
+}
+
+SDValue
+AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
+ SDValue Operand) {
+ SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL,
+ VT, VTD, MVT::Other,
+ CurDAG->getTargetConstant(0, MVT::i64),
+ Operand,
+ CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32));
+ return SDValue(Reg, 0);
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+ unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range");
+ SDLoc dl(N);
+
+ EVT VT = N->getValueType(0);
+ unsigned OpcodeIndex;
+ bool is64BitVector = VT.is64BitVector();
+ switch (VT.getScalarType().getSizeInBits()) {
+ case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
+ case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
+ case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
+ case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
+ default: llvm_unreachable("unhandled vector duplicate lane load type");
+ }
+ unsigned Opc = Opcodes[OpcodeIndex];
+
+ SDValue SuperReg;
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(N->getOperand(1)); // Push back the Memory Address
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(2);
+ if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ }
+ Ops.push_back(N->getOperand(0)); // Push back the Chain
+
+ SmallVector<EVT, 3> ResTys;
+ // Push back the type of return super register
+ if (NumVecs == 3)
+ ResTys.push_back(MVT::Untyped);
+ else {
+ EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
+ is64BitVector ? NumVecs : NumVecs * 2);
+ ResTys.push_back(ResTy);
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i64); // Type of the updated register
+ ResTys.push_back(MVT::Other); // Type of the Chain
+ SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
+
+ SuperReg = SDValue(VLdDup, 0);
+ unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
+ // Update uses of each registers in super register
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ // Update uses of the Chain
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
+ return NULL;
+}
+
+// We only have 128-bit vector type of load/store lane instructions.
+// If it is 64-bit vector, we also select it to the 128-bit instructions.
+// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and
+// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output.
+SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+ SDLoc dl(N);
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3;
+
+ SDValue Chain = N->getOperand(0);
+ unsigned Lane =
+ cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+ EVT VT64; // 64-bit Vector Type
+
+ if (is64BitVector) {
+ VT64 = VT;
+ VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(),
+ VT.getVectorNumElements() * 2);
+ }
+
+ unsigned OpcodeIndex;
+ switch (VT.getScalarType().getSizeInBits()) {
+ case 8: OpcodeIndex = 0; break;
+ case 16: OpcodeIndex = 1; break;
+ case 32: OpcodeIndex = 2; break;
+ case 64: OpcodeIndex = 3; break;
+ default: llvm_unreachable("unhandled vector lane load/store type");
+ }
+ unsigned Opc = Opcodes[OpcodeIndex];
+
+ SmallVector<EVT, 3> ResTys;
+ if (IsLoad) {
+ // Push back the type of return super register
+ if (NumVecs == 3)
+ ResTys.push_back(MVT::Untyped);
+ else {
+ EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
+ is64BitVector ? NumVecs : NumVecs * 2);
+ ResTys.push_back(ResTy);
+ }
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i64); // Type of the updated register
+ ResTys.push_back(MVT::Other); // Type of Chain
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ }
+
+ SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
+ N->op_begin() + Vec0Idx + NumVecs);
+ if (is64BitVector)
+ for (unsigned i = 0; i < Regs.size(); i++)
+ Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]);
+ SDValue SuperReg = createQTuple(Regs);
+
+ Ops.push_back(SuperReg); // Source Reg
+ SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32);
+ Ops.push_back(LaneValue);
+ Ops.push_back(Chain); // Push back the Chain
+
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
+ if (!IsLoad)
+ return VLdLn;
+
+ // Extract the subregisters.
+ SuperReg = SDValue(VLdLn, 0);
+ unsigned Sub0 = AArch64::qsub_0;
+ // Update uses of each registers in super register
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg);
+ if (is64BitVector) {
+ SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0);
+ }
+ ReplaceUses(SDValue(N, Vec), SUB0);
+ }
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+ return NULL;
+}
+
+unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit,
+ unsigned NumOfVec) {
+ assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range");
+
+ unsigned Opc = 0;
+ switch (NumOfVec) {
+ default:
+ break;
+ case 1:
+ if (IsExt)
+ Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b;
+ else
+ Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b;
+ break;
+ case 2:
+ if (IsExt)
+ Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b;
+ else
+ Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b;
+ break;
+ case 3:
+ if (IsExt)
+ Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b;
+ else
+ Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b;
+ break;
+ case 4:
+ if (IsExt)
+ Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b;
+ else
+ Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b;
+ break;
+ }
+
+ return Opc;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs,
+ bool IsExt) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ SDLoc dl(N);
+
+ // Check the element of look up table is 64-bit or not
+ unsigned Vec0Idx = IsExt ? 2 : 1;
+ assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() &&
+ "The element of lookup table for vtbl and vtbx must be 128-bit");
+
+ // Check the return value type is 64-bit or not
+ EVT ResVT = N->getValueType(0);
+ bool is64BitRes = ResVT.is64BitVector();
+
+ // Create new SDValue for vector list
+ SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
+ N->op_begin() + Vec0Idx + NumVecs);
+ SDValue TblReg = createQTuple(Regs);
+ unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs);
+
+ SmallVector<SDValue, 3> Ops;
+ if (IsExt)
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(TblReg);
+ Ops.push_back(N->getOperand(Vec0Idx + NumVecs));
+ return CurDAG->getMachineNode(Opc, dl, ResVT, Ops);
+}
+
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
if (Node->isMachineOpcode()) {
DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
+ Node->setNodeId(-1);
return NULL;
}
@@ -535,6 +1169,399 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
Node = ResNode;
break;
}
+ case AArch64ISD::NEON_LD1_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed,
+ AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed,
+ AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
+ AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 1, Opcodes);
+ }
+ case AArch64ISD::NEON_LD2_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed,
+ AArch64::LD2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed,
+ AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
+ AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD3_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed,
+ AArch64::LD3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed,
+ AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
+ AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD4_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed,
+ AArch64::LD4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed,
+ AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
+ AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_LD1x2_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1x2WB_8B_fixed, AArch64::LD1x2WB_4H_fixed,
+ AArch64::LD1x2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed,
+ AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed,
+ AArch64::LD1x2WB_4S_fixed, AArch64::LD1x2WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD1x3_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1x3WB_8B_fixed, AArch64::LD1x3WB_4H_fixed,
+ AArch64::LD1x3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed,
+ AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed,
+ AArch64::LD1x3WB_4S_fixed, AArch64::LD1x3WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD1x4_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1x4WB_8B_fixed, AArch64::LD1x4WB_4H_fixed,
+ AArch64::LD1x4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed,
+ AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed,
+ AArch64::LD1x4WB_4S_fixed, AArch64::LD1x4WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_ST1_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed,
+ AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed,
+ AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
+ AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed
+ };
+ return SelectVST(Node, true, 1, Opcodes);
+ }
+ case AArch64ISD::NEON_ST2_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed,
+ AArch64::ST2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed,
+ AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
+ AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed
+ };
+ return SelectVST(Node, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_ST3_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed,
+ AArch64::ST3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed,
+ AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
+ AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed
+ };
+ return SelectVST(Node, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_ST4_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed,
+ AArch64::ST4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed,
+ AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
+ AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed
+ };
+ return SelectVST(Node, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_LD2DUP: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S,
+ AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H,
+ AArch64::LD2R_4S, AArch64::LD2R_2D
+ };
+ return SelectVLDDup(Node, false, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD3DUP: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S,
+ AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H,
+ AArch64::LD3R_4S, AArch64::LD3R_2D
+ };
+ return SelectVLDDup(Node, false, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD4DUP: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S,
+ AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H,
+ AArch64::LD4R_4S, AArch64::LD4R_2D
+ };
+ return SelectVLDDup(Node, false, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_LD2DUP_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2R_WB_8B_fixed, AArch64::LD2R_WB_4H_fixed,
+ AArch64::LD2R_WB_2S_fixed, AArch64::LD2R_WB_1D_fixed,
+ AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed,
+ AArch64::LD2R_WB_4S_fixed, AArch64::LD2R_WB_2D_fixed
+ };
+ return SelectVLDDup(Node, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD3DUP_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3R_WB_8B_fixed, AArch64::LD3R_WB_4H_fixed,
+ AArch64::LD3R_WB_2S_fixed, AArch64::LD3R_WB_1D_fixed,
+ AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed,
+ AArch64::LD3R_WB_4S_fixed, AArch64::LD3R_WB_2D_fixed
+ };
+ return SelectVLDDup(Node, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD4DUP_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4R_WB_8B_fixed, AArch64::LD4R_WB_4H_fixed,
+ AArch64::LD4R_WB_2S_fixed, AArch64::LD4R_WB_1D_fixed,
+ AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed,
+ AArch64::LD4R_WB_4S_fixed, AArch64::LD4R_WB_2D_fixed
+ };
+ return SelectVLDDup(Node, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_LD2LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed,
+ AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, true, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD3LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed,
+ AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, true, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD4LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed,
+ AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, true, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_ST2LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed,
+ AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, false, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_ST3LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed,
+ AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, false, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_ST4LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed,
+ AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, false, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_ST1x2_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1x2WB_8B_fixed, AArch64::ST1x2WB_4H_fixed,
+ AArch64::ST1x2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed,
+ AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed,
+ AArch64::ST1x2WB_4S_fixed, AArch64::ST1x2WB_2D_fixed
+ };
+ return SelectVST(Node, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_ST1x3_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1x3WB_8B_fixed, AArch64::ST1x3WB_4H_fixed,
+ AArch64::ST1x3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed,
+ AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed,
+ AArch64::ST1x3WB_4S_fixed, AArch64::ST1x3WB_2D_fixed
+ };
+ return SelectVST(Node, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_ST1x4_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1x4WB_8B_fixed, AArch64::ST1x4WB_4H_fixed,
+ AArch64::ST1x4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed,
+ AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed,
+ AArch64::ST1x4WB_4S_fixed, AArch64::ST1x4WB_2D_fixed
+ };
+ return SelectVST(Node, true, 4, Opcodes);
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ bool IsExt = false;
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::aarch64_neon_vtbx1:
+ IsExt = true;
+ case Intrinsic::aarch64_neon_vtbl1:
+ return SelectVTBL(Node, 1, IsExt);
+ case Intrinsic::aarch64_neon_vtbx2:
+ IsExt = true;
+ case Intrinsic::aarch64_neon_vtbl2:
+ return SelectVTBL(Node, 2, IsExt);
+ case Intrinsic::aarch64_neon_vtbx3:
+ IsExt = true;
+ case Intrinsic::aarch64_neon_vtbl3:
+ return SelectVTBL(Node, 3, IsExt);
+ case Intrinsic::aarch64_neon_vtbx4:
+ IsExt = true;
+ case Intrinsic::aarch64_neon_vtbl4:
+ return SelectVTBL(Node, 4, IsExt);
+ }
+ break;
+ }
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::arm_neon_vld1: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1_8B, AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D,
+ AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D
+ };
+ return SelectVLD(Node, false, 1, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld2: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2_8B, AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D,
+ AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D
+ };
+ return SelectVLD(Node, false, 2, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld3: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3_8B, AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D,
+ AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D
+ };
+ return SelectVLD(Node, false, 3, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld4: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4_8B, AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D,
+ AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D
+ };
+ return SelectVLD(Node, false, 4, Opcodes);
+ }
+ case Intrinsic::aarch64_neon_vld1x2: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1x2_8B, AArch64::LD1x2_4H, AArch64::LD1x2_2S,
+ AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H,
+ AArch64::LD1x2_4S, AArch64::LD1x2_2D
+ };
+ return SelectVLD(Node, false, 2, Opcodes);
+ }
+ case Intrinsic::aarch64_neon_vld1x3: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1x3_8B, AArch64::LD1x3_4H, AArch64::LD1x3_2S,
+ AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H,
+ AArch64::LD1x3_4S, AArch64::LD1x3_2D
+ };
+ return SelectVLD(Node, false, 3, Opcodes);
+ }
+ case Intrinsic::aarch64_neon_vld1x4: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1x4_8B, AArch64::LD1x4_4H, AArch64::LD1x4_2S,
+ AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H,
+ AArch64::LD1x4_4S, AArch64::LD1x4_2D
+ };
+ return SelectVLD(Node, false, 4, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst1: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1_8B, AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D,
+ AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D
+ };
+ return SelectVST(Node, false, 1, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst2: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST2_8B, AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D,
+ AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D
+ };
+ return SelectVST(Node, false, 2, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst3: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST3_8B, AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D,
+ AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D
+ };
+ return SelectVST(Node, false, 3, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst4: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST4_8B, AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D,
+ AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D
+ };
+ return SelectVST(Node, false, 4, Opcodes);
+ }
+ case Intrinsic::aarch64_neon_vst1x2: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1x2_8B, AArch64::ST1x2_4H, AArch64::ST1x2_2S,
+ AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H,
+ AArch64::ST1x2_4S, AArch64::ST1x2_2D
+ };
+ return SelectVST(Node, false, 2, Opcodes);
+ }
+ case Intrinsic::aarch64_neon_vst1x3: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1x3_8B, AArch64::ST1x3_4H, AArch64::ST1x3_2S,
+ AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H,
+ AArch64::ST1x3_4S, AArch64::ST1x3_2D
+ };
+ return SelectVST(Node, false, 3, Opcodes);
+ }
+ case Intrinsic::aarch64_neon_vst1x4: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1x4_8B, AArch64::ST1x4_4H, AArch64::ST1x4_2S,
+ AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H,
+ AArch64::ST1x4_4S, AArch64::ST1x4_2D
+ };
+ return SelectVST(Node, false, 4, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld2lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D
+ };
+ return SelectVLDSTLane(Node, true, false, 2, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld3lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D
+ };
+ return SelectVLDSTLane(Node, true, false, 3, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld4lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D
+ };
+ return SelectVLDSTLane(Node, true, false, 4, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst2lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D
+ };
+ return SelectVLDSTLane(Node, false, false, 2, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst3lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D
+ };
+ return SelectVLDSTLane(Node, false, false, 3, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst4lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D
+ };
+ return SelectVLDSTLane(Node, false, false, 4, Opcodes);
+ }
+ } // End of switch IntNo
+ break;
+ } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN
default:
break; // Let generic code handle it
}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 44b691b..4fdb667 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -50,24 +50,33 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
// Scalar register <-> type mapping
addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
- addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
- addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
- addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
- addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
+
+ if (Subtarget->hasFPARMv8()) {
+ addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
+ addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
+ addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
+ }
if (Subtarget->hasNEON()) {
// And the vectors
- addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass);
- addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass);
- addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass);
- addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass);
- addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass);
- addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass);
- addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass);
- addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass);
- addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass);
- addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass);
- addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass);
+ addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass);
+ addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass);
+ addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass);
+ addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v1f32, &AArch64::FPR32RegClass);
+ addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
+ addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
+ addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
+ addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
+ addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
+ addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
}
computeRegisterProperties();
@@ -77,6 +86,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::SHL);
+
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
// AArch64 does not have i1 loads, or much of anything for i1 really.
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
@@ -270,28 +285,89 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setExceptionSelectorRegister(AArch64::X1);
if (Subtarget->hasNEON()) {
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal);
+
setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v1i64, Custom);
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v1f32, Custom);
setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v1f64, Custom);
setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
+
+ setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v1f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v1f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::v1f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
}
}
@@ -333,6 +409,29 @@ static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
StrOpc = StoreOps[Log2_32(Size)];
}
+// FIXME: AArch64::DTripleRegClass and AArch64::QTripleRegClass don't really
+// have value type mapped, and they are both being defined as MVT::untyped.
+// Without knowing the MVT type, MachineLICM::getRegisterClassIDAndCost
+// would fail to figure out the register pressure correctly.
+std::pair<const TargetRegisterClass*, uint8_t>
+AArch64TargetLowering::findRepresentativeClass(MVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ case MVT::v4i64:
+ RRC = &AArch64::QPairRegClass;
+ Cost = 2;
+ break;
+ case MVT::v8i64:
+ RRC = &AArch64::QQuadRegClass;
+ Cost = 4;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
MachineBasicBlock *
AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size,
@@ -658,6 +757,12 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
MBB->addSuccessor(TrueBB);
MBB->addSuccessor(EndBB);
+ if (!NZCVKilled) {
+ // NZCV is live-through TrueBB.
+ TrueBB->addLiveIn(AArch64::NZCV);
+ EndBB->addLiveIn(AArch64::NZCV);
+ }
+
// IfTrue:
// str qIFTRUE, [sp]
BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
@@ -672,8 +777,6 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
// Done:
// ldr qDEST, [sp]
// [... rest of incoming MBB ...]
- if (!NZCVKilled)
- EndBB->addLiveIn(AArch64::NZCV);
MachineInstr *StartOfEnd = EndBB->begin();
BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
.addFrameIndex(ScratchFI)
@@ -833,6 +936,86 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
return "AArch64ISD::NEON_CMPZ";
case AArch64ISD::NEON_TST:
return "AArch64ISD::NEON_TST";
+ case AArch64ISD::NEON_QSHLs:
+ return "AArch64ISD::NEON_QSHLs";
+ case AArch64ISD::NEON_QSHLu:
+ return "AArch64ISD::NEON_QSHLu";
+ case AArch64ISD::NEON_VDUP:
+ return "AArch64ISD::NEON_VDUP";
+ case AArch64ISD::NEON_VDUPLANE:
+ return "AArch64ISD::NEON_VDUPLANE";
+ case AArch64ISD::NEON_REV16:
+ return "AArch64ISD::NEON_REV16";
+ case AArch64ISD::NEON_REV32:
+ return "AArch64ISD::NEON_REV32";
+ case AArch64ISD::NEON_REV64:
+ return "AArch64ISD::NEON_REV64";
+ case AArch64ISD::NEON_UZP1:
+ return "AArch64ISD::NEON_UZP1";
+ case AArch64ISD::NEON_UZP2:
+ return "AArch64ISD::NEON_UZP2";
+ case AArch64ISD::NEON_ZIP1:
+ return "AArch64ISD::NEON_ZIP1";
+ case AArch64ISD::NEON_ZIP2:
+ return "AArch64ISD::NEON_ZIP2";
+ case AArch64ISD::NEON_TRN1:
+ return "AArch64ISD::NEON_TRN1";
+ case AArch64ISD::NEON_TRN2:
+ return "AArch64ISD::NEON_TRN2";
+ case AArch64ISD::NEON_LD1_UPD:
+ return "AArch64ISD::NEON_LD1_UPD";
+ case AArch64ISD::NEON_LD2_UPD:
+ return "AArch64ISD::NEON_LD2_UPD";
+ case AArch64ISD::NEON_LD3_UPD:
+ return "AArch64ISD::NEON_LD3_UPD";
+ case AArch64ISD::NEON_LD4_UPD:
+ return "AArch64ISD::NEON_LD4_UPD";
+ case AArch64ISD::NEON_ST1_UPD:
+ return "AArch64ISD::NEON_ST1_UPD";
+ case AArch64ISD::NEON_ST2_UPD:
+ return "AArch64ISD::NEON_ST2_UPD";
+ case AArch64ISD::NEON_ST3_UPD:
+ return "AArch64ISD::NEON_ST3_UPD";
+ case AArch64ISD::NEON_ST4_UPD:
+ return "AArch64ISD::NEON_ST4_UPD";
+ case AArch64ISD::NEON_LD1x2_UPD:
+ return "AArch64ISD::NEON_LD1x2_UPD";
+ case AArch64ISD::NEON_LD1x3_UPD:
+ return "AArch64ISD::NEON_LD1x3_UPD";
+ case AArch64ISD::NEON_LD1x4_UPD:
+ return "AArch64ISD::NEON_LD1x4_UPD";
+ case AArch64ISD::NEON_ST1x2_UPD:
+ return "AArch64ISD::NEON_ST1x2_UPD";
+ case AArch64ISD::NEON_ST1x3_UPD:
+ return "AArch64ISD::NEON_ST1x3_UPD";
+ case AArch64ISD::NEON_ST1x4_UPD:
+ return "AArch64ISD::NEON_ST1x4_UPD";
+ case AArch64ISD::NEON_LD2DUP:
+ return "AArch64ISD::NEON_LD2DUP";
+ case AArch64ISD::NEON_LD3DUP:
+ return "AArch64ISD::NEON_LD3DUP";
+ case AArch64ISD::NEON_LD4DUP:
+ return "AArch64ISD::NEON_LD4DUP";
+ case AArch64ISD::NEON_LD2DUP_UPD:
+ return "AArch64ISD::NEON_LD2DUP_UPD";
+ case AArch64ISD::NEON_LD3DUP_UPD:
+ return "AArch64ISD::NEON_LD3DUP_UPD";
+ case AArch64ISD::NEON_LD4DUP_UPD:
+ return "AArch64ISD::NEON_LD4DUP_UPD";
+ case AArch64ISD::NEON_LD2LN_UPD:
+ return "AArch64ISD::NEON_LD2LN_UPD";
+ case AArch64ISD::NEON_LD3LN_UPD:
+ return "AArch64ISD::NEON_LD3LN_UPD";
+ case AArch64ISD::NEON_LD4LN_UPD:
+ return "AArch64ISD::NEON_LD4LN_UPD";
+ case AArch64ISD::NEON_ST2LN_UPD:
+ return "AArch64ISD::NEON_ST2LN_UPD";
+ case AArch64ISD::NEON_ST3LN_UPD:
+ return "AArch64ISD::NEON_ST3LN_UPD";
+ case AArch64ISD::NEON_ST4LN_UPD:
+ return "AArch64ISD::NEON_ST4LN_UPD";
+ case AArch64ISD::NEON_VEXTRACT:
+ return "AArch64ISD::NEON_VEXTRACT";
default:
return NULL;
}
@@ -908,24 +1091,31 @@ AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
}
}
+ if (getSubtarget()->hasFPARMv8()) {
unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
int FPRIdx = 0;
- if (FPRSaveSize != 0) {
- FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
-
- SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
-
- for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
- unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
- &AArch64::FPR128RegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
- SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(i * 16),
- false, false, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(16, getPointerTy()));
+ // According to the AArch64 Procedure Call Standard, section B.1/B.3, we
+ // can omit a register save area if we know we'll never use registers of
+ // that class.
+ if (FPRSaveSize != 0) {
+ FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
+
+ SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+
+ for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
+ unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
+ &AArch64::FPR128RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
+ SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(i * 16),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+ DAG.getConstant(16, getPointerTy()));
+ }
}
+ FuncInfo->setVariadicFPRIdx(FPRIdx);
+ FuncInfo->setVariadicFPRSize(FPRSaveSize);
}
int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true);
@@ -933,8 +1123,6 @@ AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
FuncInfo->setVariadicStackIdx(StackIdx);
FuncInfo->setVariadicGPRIdx(GPRIdx);
FuncInfo->setVariadicGPRSize(GPRSaveSize);
- FuncInfo->setVariadicFPRIdx(FPRIdx);
- FuncInfo->setVariadicFPRSize(FPRSaveSize);
if (!MemOps.empty()) {
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
@@ -1875,7 +2063,7 @@ AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
SDValue SrcVal = Op.getOperand(0);
return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
- /*isSigned*/ false, SDLoc(Op));
+ /*isSigned*/ false, SDLoc(Op)).first;
}
SDValue
@@ -1905,6 +2093,45 @@ AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
return LowerF128ToCall(Op, DAG, LC);
}
+SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(8, MVT::i64);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Return X30, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(AArch64::X30, getRegClassFor(MVT::i64));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, MVT::i64);
+}
+
+
+SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG)
+ const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = AArch64::X29;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ return FrameAddr;
+}
+
SDValue
AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
SelectionDAG &DAG) const {
@@ -2650,6 +2877,8 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
@@ -2664,6 +2893,7 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
}
return SDValue();
@@ -3235,6 +3465,336 @@ static SDValue PerformSRACombine(SDNode *N,
DAG.getConstant(LSB + Width - 1, MVT::i64));
}
+/// Check if this is a valid build_vector for the immediate operand of
+/// a vector shift operation, where all the elements of the build_vector
+/// must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
+ HasAnyUndefs, ElementBits) ||
+ SplatBitSize > ElementBits)
+ return false;
+ Cnt = SplatBits.getSExtValue();
+ return true;
+}
+
+/// Check if this is a valid build_vector for the immediate operand of
+/// a vector shift left operation. That value must be in the range:
+/// 0 <= Value < ElementBits
+static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (!getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 0 && Cnt < ElementBits);
+}
+
+/// Check if this is a valid build_vector for the immediate operand of a
+/// vector shift right operation. The value must be in the range:
+/// 1 <= Value <= ElementBits
+static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (!getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 1 && Cnt <= ElementBits);
+}
+
+/// Checks for immediate versions of vector shifts and lowers them.
+static SDValue PerformShiftCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *ST) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64))
+ return PerformSRACombine(N, DCI);
+
+ // Nothing to be done for scalar shifts.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!VT.isVector() || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ assert(ST->hasNEON() && "unexpected vector shift");
+ int64_t Cnt;
+
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("unexpected shift opcode");
+
+ case ISD::SHL:
+ if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
+ SDValue RHS =
+ DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
+ DAG.getConstant(Cnt, MVT::i32));
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
+ }
+ break;
+
+ case ISD::SRA:
+ case ISD::SRL:
+ if (isVShiftRImm(N->getOperand(1), VT, Cnt)) {
+ SDValue RHS =
+ DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
+ DAG.getConstant(Cnt, MVT::i32));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS);
+ }
+ break;
+ }
+
+ return SDValue();
+}
+
+/// ARM-specific DAG combining for intrinsics.
+static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+
+ switch (IntNo) {
+ default:
+ // Don't do anything for most intrinsics.
+ break;
+
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ if (!isVShiftLImm(N->getOperand(2), VT, Cnt))
+ break;
+ unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts)
+ ? AArch64ISD::NEON_QSHLs
+ : AArch64ISD::NEON_QSHLu;
+ return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
+ }
+
+ return SDValue();
+}
+
+/// Target-specific DAG combine function for NEON load/store intrinsics
+/// to merge base address updates.
+static SDValue CombineBaseUpdate(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+ N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+ unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+ SDValue Addr = N->getOperand(AddrOpIdx);
+
+ // Search for a use of the address operand that is an increment.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::ADD ||
+ UI.getUse().getResNo() != Addr.getResNo())
+ continue;
+
+ // Check that the add is independent of the load/store. Otherwise, folding
+ // it would create a cycle.
+ if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ continue;
+
+ // Find the new opcode for the updating load/store.
+ bool isLoad = true;
+ bool isLaneOp = false;
+ unsigned NewOpc = 0;
+ unsigned NumVecs = 0;
+ if (isIntrinsic) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: llvm_unreachable("unexpected intrinsic for Neon base update");
+ case Intrinsic::arm_neon_vld1: NewOpc = AArch64ISD::NEON_LD1_UPD;
+ NumVecs = 1; break;
+ case Intrinsic::arm_neon_vld2: NewOpc = AArch64ISD::NEON_LD2_UPD;
+ NumVecs = 2; break;
+ case Intrinsic::arm_neon_vld3: NewOpc = AArch64ISD::NEON_LD3_UPD;
+ NumVecs = 3; break;
+ case Intrinsic::arm_neon_vld4: NewOpc = AArch64ISD::NEON_LD4_UPD;
+ NumVecs = 4; break;
+ case Intrinsic::arm_neon_vst1: NewOpc = AArch64ISD::NEON_ST1_UPD;
+ NumVecs = 1; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2: NewOpc = AArch64ISD::NEON_ST2_UPD;
+ NumVecs = 2; isLoad = false; break;
+ case Intrinsic::arm_neon_vst3: NewOpc = AArch64ISD::NEON_ST3_UPD;
+ NumVecs = 3; isLoad = false; break;
+ case Intrinsic::arm_neon_vst4: NewOpc = AArch64ISD::NEON_ST4_UPD;
+ NumVecs = 4; isLoad = false; break;
+ case Intrinsic::aarch64_neon_vld1x2: NewOpc = AArch64ISD::NEON_LD1x2_UPD;
+ NumVecs = 2; break;
+ case Intrinsic::aarch64_neon_vld1x3: NewOpc = AArch64ISD::NEON_LD1x3_UPD;
+ NumVecs = 3; break;
+ case Intrinsic::aarch64_neon_vld1x4: NewOpc = AArch64ISD::NEON_LD1x4_UPD;
+ NumVecs = 4; break;
+ case Intrinsic::aarch64_neon_vst1x2: NewOpc = AArch64ISD::NEON_ST1x2_UPD;
+ NumVecs = 2; isLoad = false; break;
+ case Intrinsic::aarch64_neon_vst1x3: NewOpc = AArch64ISD::NEON_ST1x3_UPD;
+ NumVecs = 3; isLoad = false; break;
+ case Intrinsic::aarch64_neon_vst1x4: NewOpc = AArch64ISD::NEON_ST1x4_UPD;
+ NumVecs = 4; isLoad = false; break;
+ case Intrinsic::arm_neon_vld2lane: NewOpc = AArch64ISD::NEON_LD2LN_UPD;
+ NumVecs = 2; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld3lane: NewOpc = AArch64ISD::NEON_LD3LN_UPD;
+ NumVecs = 3; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld4lane: NewOpc = AArch64ISD::NEON_LD4LN_UPD;
+ NumVecs = 4; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst2lane: NewOpc = AArch64ISD::NEON_ST2LN_UPD;
+ NumVecs = 2; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst3lane: NewOpc = AArch64ISD::NEON_ST3LN_UPD;
+ NumVecs = 3; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst4lane: NewOpc = AArch64ISD::NEON_ST4LN_UPD;
+ NumVecs = 4; isLoad = false; isLaneOp = true; break;
+ }
+ } else {
+ isLaneOp = true;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode for Neon base update");
+ case AArch64ISD::NEON_LD2DUP: NewOpc = AArch64ISD::NEON_LD2DUP_UPD;
+ NumVecs = 2; break;
+ case AArch64ISD::NEON_LD3DUP: NewOpc = AArch64ISD::NEON_LD3DUP_UPD;
+ NumVecs = 3; break;
+ case AArch64ISD::NEON_LD4DUP: NewOpc = AArch64ISD::NEON_LD4DUP_UPD;
+ NumVecs = 4; break;
+ }
+ }
+
+ // Find the size of memory referenced by the load/store.
+ EVT VecTy;
+ if (isLoad)
+ VecTy = N->getValueType(0);
+ else
+ VecTy = N->getOperand(AddrOpIdx + 1).getValueType();
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+ if (isLaneOp)
+ NumBytes /= VecTy.getVectorNumElements();
+
+ // If the increment is a constant, it must match the memory ref size.
+ SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+ if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
+ uint32_t IncVal = CInc->getZExtValue();
+ if (IncVal != NumBytes)
+ continue;
+ Inc = DAG.getTargetConstant(IncVal, MVT::i32);
+ }
+
+ // Create the new updating load/store node.
+ EVT Tys[6];
+ unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+ unsigned n;
+ for (n = 0; n < NumResultVecs; ++n)
+ Tys[n] = VecTy;
+ Tys[n++] = MVT::i64;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs + 2);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(N->getOperand(0)); // incoming chain
+ Ops.push_back(N->getOperand(AddrOpIdx));
+ Ops.push_back(Inc);
+ for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+ MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
+ Ops.data(), Ops.size(),
+ MemInt->getMemoryVT(),
+ MemInt->getMemOperand());
+
+ // Update the uses.
+ std::vector<SDValue> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i) {
+ NewResults.push_back(SDValue(UpdN.getNode(), i));
+ }
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
+ DCI.CombineTo(N, NewResults);
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+ break;
+ }
+ return SDValue();
+}
+
+/// For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1)
+/// intrinsic, and if all the other uses of that intrinsic are also VDUPLANEs.
+/// If so, combine them to a vldN-dup operation and return true.
+static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+
+ // Check if the VDUPLANE operand is a vldN-dup intrinsic.
+ SDNode *VLD = N->getOperand(0).getNode();
+ if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return SDValue();
+ unsigned NumVecs = 0;
+ unsigned NewOpc = 0;
+ unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::arm_neon_vld2lane) {
+ NumVecs = 2;
+ NewOpc = AArch64ISD::NEON_LD2DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
+ NumVecs = 3;
+ NewOpc = AArch64ISD::NEON_LD3DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
+ NumVecs = 4;
+ NewOpc = AArch64ISD::NEON_LD4DUP;
+ } else {
+ return SDValue();
+ }
+
+ // First check that all the vldN-lane uses are VDUPLANEs and that the lane
+ // numbers match the load.
+ unsigned VLDLaneNo =
+ cast<ConstantSDNode>(VLD->getOperand(NumVecs + 3))->getZExtValue();
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ // Ignore uses of the chain result.
+ if (UI.getUse().getResNo() == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ if (User->getOpcode() != AArch64ISD::NEON_VDUPLANE ||
+ VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
+ return SDValue();
+ }
+
+ // Create the vldN-dup node.
+ EVT Tys[5];
+ unsigned n;
+ for (n = 0; n < NumVecs; ++n)
+ Tys[n] = VT;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumVecs + 1);
+ SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
+ MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
+ SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, 2,
+ VLDMemInt->getMemoryVT(),
+ VLDMemInt->getMemOperand());
+
+ // Update the uses.
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ unsigned ResNo = UI.getUse().getResNo();
+ // Ignore uses of the chain result.
+ if (ResNo == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
+ }
+
+ // Now the vldN-lane intrinsic is dead except for its chain result.
+ // Update uses of the chain.
+ std::vector<SDValue> VLDDupResults;
+ for (unsigned n = 0; n < NumVecs; ++n)
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
+ DCI.CombineTo(VLD, VLDDupResults);
+
+ return SDValue(N, 0);
+}
SDValue
AArch64TargetLowering::PerformDAGCombine(SDNode *N,
@@ -3243,7 +3803,45 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::AND: return PerformANDCombine(N, DCI);
case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
- case ISD::SRA: return PerformSRACombine(N, DCI);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ return PerformShiftCombine(N, DCI, getSubtarget());
+ case ISD::INTRINSIC_WO_CHAIN:
+ return PerformIntrinsicCombine(N, DCI.DAG);
+ case AArch64ISD::NEON_VDUPLANE:
+ return CombineVLDDUP(N, DCI);
+ case AArch64ISD::NEON_LD2DUP:
+ case AArch64ISD::NEON_LD3DUP:
+ case AArch64ISD::NEON_LD4DUP:
+ return CombineBaseUpdate(N, DCI);
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::aarch64_neon_vld1x2:
+ case Intrinsic::aarch64_neon_vld1x3:
+ case Intrinsic::aarch64_neon_vld1x4:
+ case Intrinsic::aarch64_neon_vst1x2:
+ case Intrinsic::aarch64_neon_vst1x3:
+ case Intrinsic::aarch64_neon_vst1x4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane:
+ return CombineBaseUpdate(N, DCI);
+ default:
+ break;
+ }
}
return SDValue();
}
@@ -3269,6 +3867,59 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
+// Check whether a Build Vector could be presented as Shuffle Vector. If yes,
+// try to call LowerVECTOR_SHUFFLE to lower it.
+bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
+ SDValue &Res) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned V0NumElts = 0;
+ int Mask[16];
+ SDValue V0, V1;
+
+ // Check if all elements are extracted from less than 3 vectors.
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
+ if (V0.getNode() == 0) {
+ V0 = Elt.getOperand(0);
+ V0NumElts = V0.getValueType().getVectorNumElements();
+ }
+ if (Elt.getOperand(0) == V0) {
+ Mask[i] = (cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue());
+ continue;
+ } else if (V1.getNode() == 0) {
+ V1 = Elt.getOperand(0);
+ }
+ if (Elt.getOperand(0) == V1) {
+ unsigned Lane = cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue();
+ Mask[i] = (Lane + V0NumElts);
+ continue;
+ } else {
+ return false;
+ }
+ }
+
+ if (!V1.getNode() && V0NumElts == NumElts * 2) {
+ V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
+ DAG.getConstant(NumElts, MVT::i64));
+ V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
+ DAG.getConstant(0, MVT::i64));
+ V0NumElts = V0.getValueType().getVectorNumElements();
+ }
+
+ if (V1.getNode() && NumElts == V0NumElts &&
+ V0NumElts == V1.getValueType().getVectorNumElements()) {
+ SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
+ Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
+ return true;
+ } else
+ return false;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
SDValue
@@ -3283,12 +3934,15 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
unsigned SplatBitSize;
bool HasAnyUndefs;
+ unsigned UseNeonMov = VT.getSizeInBits() >= 64;
+
// Note we favor lowering MOVI over MVNI.
// This has implications on the definition of patterns in TableGen to select
// BIC immediate instructions but not ORR immediate instructions.
// If this lowering order is changed, TableGen patterns for BIC immediate and
// ORR immediate instructions have to be updated.
- if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (UseNeonMov &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
// First attempt to use vector immediate-form MOVI
EVT NeonMovVT;
@@ -3336,9 +3990,390 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
}
}
+
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool hasDominantValue = false;
+ bool isConstant = true;
+
+ // Map of the number of times a particular SDValue appears in the
+ // element list.
+ DenseMap<SDValue, unsigned> ValueCounts;
+ SDValue Value;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ ValueCounts.insert(std::make_pair(V, 0));
+ unsigned &Count = ValueCounts[V];
+
+ // Is this value dominant? (takes up more than half of the lanes)
+ if (++Count > (NumElts / 2)) {
+ hasDominantValue = true;
+ Value = V;
+ }
+ }
+ if (ValueCounts.size() != 1)
+ usesOnlyOneValue = false;
+ if (!Value.getNode() && ValueCounts.size() > 0)
+ Value = ValueCounts.begin()->first;
+
+ if (ValueCounts.size() == 0)
+ return DAG.getUNDEF(VT);
+
+ // Loads are better lowered with insert_vector_elt.
+ // Keep going if we are hitting this case.
+ if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (hasDominantValue && EltSize <= 64) {
+ // Use VDUP for non-constant splats.
+ if (!isConstant) {
+ SDValue N;
+
+ // If we are DUPing a value that comes directly from a vector, we could
+ // just use DUPLANE. We can only do this if the lane being extracted
+ // is at a constant index, as the DUP from lane instructions only have
+ // constant-index forms.
+ if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Value->getOperand(1))) {
+ N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT,
+ Value->getOperand(0), Value->getOperand(1));
+ } else
+ N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
+
+ if (!usesOnlyOneValue) {
+ // The dominant value was splatted as 'N', but we now have to insert
+ // all differing elements.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ if (Op.getOperand(I) == Value)
+ continue;
+ SmallVector<SDValue, 3> Ops;
+ Ops.push_back(N);
+ Ops.push_back(Op.getOperand(I));
+ Ops.push_back(DAG.getConstant(I, MVT::i64));
+ N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, &Ops[0], 3);
+ }
+ }
+ return N;
+ }
+ if (usesOnlyOneValue && isConstant) {
+ return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
+ }
+ }
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
+ if (isConstant)
+ return SDValue();
+
+ // Try to lower this in lowering ShuffleVector way.
+ SDValue Shuf;
+ if (isKnownShuffleVector(Op, DAG, Shuf))
+ return Shuf;
+
+ // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
+ // know the default expansion would otherwise fall back on something even
+ // worse. For a vector with one or two non-undef values, that's
+ // scalar_to_vector for the elements followed by a shuffle (provided the
+ // shuffle is valid for the target) and materialization element by element
+ // on the stack followed by a load for everything else.
+ if (!isConstant && !usesOnlyOneValue) {
+ SDValue Vec = DAG.getUNDEF(VT);
+ for (unsigned i = 0 ; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx);
+ }
+ return Vec;
+ }
return SDValue();
}
+/// isREVMask - Check if a vector shuffle corresponds to a REV
+/// instruction with the specified blocksize. (The order of the elements
+/// within each block of the vector is reversed.)
+static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
+ assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
+ "Only possible block sizes for REV are: 16, 32, 64");
+
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
+
+ if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
+ return false;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (M[i] < 0)
+ continue; // ignore UNDEF indices
+ if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
+ return false;
+ }
+
+ return true;
+}
+
+// isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and
+// TRN instruction.
+static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) {
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts < 4)
+ return 0;
+
+ bool ismatch = true;
+
+ // Check UZP1
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if ((unsigned)M[i] != i * 2) {
+ ismatch = false;
+ break;
+ }
+ }
+ if (ismatch)
+ return AArch64ISD::NEON_UZP1;
+
+ // Check UZP2
+ ismatch = true;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if ((unsigned)M[i] != i * 2 + 1) {
+ ismatch = false;
+ break;
+ }
+ }
+ if (ismatch)
+ return AArch64ISD::NEON_UZP2;
+
+ // Check ZIP1
+ ismatch = true;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if ((unsigned)M[i] != i / 2 + NumElts * (i % 2)) {
+ ismatch = false;
+ break;
+ }
+ }
+ if (ismatch)
+ return AArch64ISD::NEON_ZIP1;
+
+ // Check ZIP2
+ ismatch = true;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if ((unsigned)M[i] != (NumElts + i) / 2 + NumElts * (i % 2)) {
+ ismatch = false;
+ break;
+ }
+ }
+ if (ismatch)
+ return AArch64ISD::NEON_ZIP2;
+
+ // Check TRN1
+ ismatch = true;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if ((unsigned)M[i] != i + (NumElts - 1) * (i % 2)) {
+ ismatch = false;
+ break;
+ }
+ }
+ if (ismatch)
+ return AArch64ISD::NEON_TRN1;
+
+ // Check TRN2
+ ismatch = true;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if ((unsigned)M[i] != 1 + i + (NumElts - 1) * (i % 2)) {
+ ismatch = false;
+ break;
+ }
+ }
+ if (ismatch)
+ return AArch64ISD::NEON_TRN2;
+
+ return 0;
+}
+
+SDValue
+AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+
+ // Convert shuffles that are directly supported on NEON to target-specific
+ // DAG nodes, instead of keeping them as shuffles and matching them again
+ // during code selection. This is more efficient and avoids the possibility
+ // of inconsistencies between legalization and selection.
+ ArrayRef<int> ShuffleMask = SVN->getMask();
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize > 64)
+ return SDValue();
+
+ if (isREVMask(ShuffleMask, VT, 64))
+ return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1);
+ if (isREVMask(ShuffleMask, VT, 32))
+ return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1);
+ if (isREVMask(ShuffleMask, VT, 16))
+ return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1);
+
+ unsigned ISDNo = isPermuteMask(ShuffleMask, VT);
+ if (ISDNo)
+ return DAG.getNode(ISDNo, dl, VT, V1, V2);
+
+ // If the element of shuffle mask are all the same constant, we can
+ // transform it into either NEON_VDUP or NEON_VDUPLANE
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1) Lane = 0;
+
+ // Test if V1 is a SCALAR_TO_VECTOR.
+ if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
+ }
+ // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
+ if (V1.getOpcode() == ISD::BUILD_VECTOR) {
+ bool IsScalarToVector = true;
+ for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
+ if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
+ i != (unsigned)Lane) {
+ IsScalarToVector = false;
+ break;
+ }
+ if (IsScalarToVector)
+ return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
+ V1.getOperand(Lane));
+ }
+
+ // Test if V1 is a EXTRACT_SUBVECTOR.
+ if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ int ExtLane = cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
+ return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1.getOperand(0),
+ DAG.getConstant(Lane + ExtLane, MVT::i64));
+ }
+ // Test if V1 is a CONCAT_VECTORS.
+ if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
+ V1.getOperand(1).getOpcode() == ISD::UNDEF) {
+ SDValue Op0 = V1.getOperand(0);
+ assert((unsigned)Lane < Op0.getValueType().getVectorNumElements() &&
+ "Invalid vector lane access");
+ return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, Op0,
+ DAG.getConstant(Lane, MVT::i64));
+ }
+
+ return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
+ DAG.getConstant(Lane, MVT::i64));
+ }
+
+ int Length = ShuffleMask.size();
+ int V1EltNum = V1.getValueType().getVectorNumElements();
+
+ // If the number of v1 elements is the same as the number of shuffle mask
+ // element and the shuffle masks are sequential values, we can transform
+ // it into NEON_VEXTRACT.
+ if (V1EltNum == Length) {
+ // Check if the shuffle mask is sequential.
+ bool IsSequential = true;
+ int CurMask = ShuffleMask[0];
+ for (int I = 0; I < Length; ++I) {
+ if (ShuffleMask[I] != CurMask) {
+ IsSequential = false;
+ break;
+ }
+ CurMask++;
+ }
+ if (IsSequential) {
+ assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect");
+ unsigned VecSize = EltSize * V1EltNum;
+ unsigned Index = (EltSize/8) * ShuffleMask[0];
+ if (VecSize == 64 || VecSize == 128)
+ return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
+ DAG.getConstant(Index, MVT::i64));
+ }
+ }
+
+ // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
+ // by element from V2 to V1 .
+ // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
+ // better choice to be inserted than V1 as less insert needed, so we count
+ // element to be inserted for both V1 and V2, and select less one as insert
+ // target.
+
+ // Collect elements need to be inserted and their index.
+ SmallVector<int, 8> NV1Elt;
+ SmallVector<int, 8> N1Index;
+ SmallVector<int, 8> NV2Elt;
+ SmallVector<int, 8> N2Index;
+ for (int I = 0; I != Length; ++I) {
+ if (ShuffleMask[I] != I) {
+ NV1Elt.push_back(ShuffleMask[I]);
+ N1Index.push_back(I);
+ }
+ }
+ for (int I = 0; I != Length; ++I) {
+ if (ShuffleMask[I] != (I + V1EltNum)) {
+ NV2Elt.push_back(ShuffleMask[I]);
+ N2Index.push_back(I);
+ }
+ }
+
+ // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
+ // will be inserted.
+ SDValue InsV = V1;
+ SmallVector<int, 8> InsMasks = NV1Elt;
+ SmallVector<int, 8> InsIndex = N1Index;
+ if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
+ if (NV1Elt.size() > NV2Elt.size()) {
+ InsV = V2;
+ InsMasks = NV2Elt;
+ InsIndex = N2Index;
+ }
+ } else {
+ InsV = DAG.getNode(ISD::UNDEF, dl, VT);
+ }
+
+ for (int I = 0, E = InsMasks.size(); I != E; ++I) {
+ SDValue ExtV = V1;
+ int Mask = InsMasks[I];
+ if (Mask >= V1EltNum) {
+ ExtV = V2;
+ Mask -= V1EltNum;
+ }
+ // Any value type smaller than i32 is illegal in AArch64, and this lower
+ // function is called after legalize pass, so we need to legalize
+ // the result here.
+ EVT EltVT;
+ if (VT.getVectorElementType().isFloatingPoint())
+ EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32;
+ else
+ EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;
+
+ if (Mask >= 0) {
+ ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
+ DAG.getConstant(Mask, MVT::i64));
+ InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
+ DAG.getConstant(InsIndex[I], MVT::i64));
+ }
+ }
+ return InsV;
+}
+
AArch64TargetLowering::ConstraintType
AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
@@ -3484,14 +4519,10 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
return std::make_pair(0U, &AArch64::FPR16RegClass);
else if (VT == MVT::f32)
return std::make_pair(0U, &AArch64::FPR32RegClass);
- else if (VT == MVT::f64)
- return std::make_pair(0U, &AArch64::FPR64RegClass);
else if (VT.getSizeInBits() == 64)
- return std::make_pair(0U, &AArch64::VPR64RegClass);
- else if (VT == MVT::f128)
- return std::make_pair(0U, &AArch64::FPR128RegClass);
+ return std::make_pair(0U, &AArch64::FPR64RegClass);
else if (VT.getSizeInBits() == 128)
- return std::make_pair(0U, &AArch64::VPR128RegClass);
+ return std::make_pair(0U, &AArch64::FPR128RegClass);
break;
}
}
@@ -3500,3 +4531,69 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
// constraint into a member of a register class.
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
+
+/// Represent NEON load and store intrinsics as MemIntrinsicNodes.
+/// The associated MachineMemOperands record the alignment specified
+/// in the intrinsic calls.
+bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::aarch64_neon_vld1x2:
+ case Intrinsic::aarch64_neon_vld1x3:
+ case Intrinsic::aarch64_neon_vld1x4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ // Conservatively set memVT to the entire set of vectors loaded.
+ uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile loads with NEON intrinsics not supported
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::aarch64_neon_vst1x2:
+ case Intrinsic::aarch64_neon_vst1x3:
+ case Intrinsic::aarch64_neon_vst1x4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ // Conservatively set memVT to the entire set of vectors stored.
+ unsigned NumElts = 0;
+ for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ if (!ArgTy->isVectorTy())
+ break;
+ NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
+ }
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile stores with NEON intrinsics not supported
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 67a908e..8ad5a79 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -19,7 +19,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
-
+#include "llvm/IR/Intrinsics.h"
namespace llvm {
namespace AArch64ISD {
@@ -125,6 +125,19 @@ namespace AArch64ISD {
// Vector FP move immediate
NEON_FMOVIMM,
+ // Vector permute
+ NEON_UZP1,
+ NEON_UZP2,
+ NEON_ZIP1,
+ NEON_ZIP2,
+ NEON_TRN1,
+ NEON_TRN2,
+
+ // Vector Element reverse
+ NEON_REV64,
+ NEON_REV32,
+ NEON_REV16,
+
// Vector compare
NEON_CMP,
@@ -132,7 +145,58 @@ namespace AArch64ISD {
NEON_CMPZ,
// Vector compare bitwise test
- NEON_TST
+ NEON_TST,
+
+ // Vector saturating shift
+ NEON_QSHLs,
+ NEON_QSHLu,
+
+ // Vector dup
+ NEON_VDUP,
+
+ // Vector dup by lane
+ NEON_VDUPLANE,
+
+ // Vector extract
+ NEON_VEXTRACT,
+
+ // NEON duplicate lane loads
+ NEON_LD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ NEON_LD3DUP,
+ NEON_LD4DUP,
+
+ // NEON loads with post-increment base updates:
+ NEON_LD1_UPD,
+ NEON_LD2_UPD,
+ NEON_LD3_UPD,
+ NEON_LD4_UPD,
+ NEON_LD1x2_UPD,
+ NEON_LD1x3_UPD,
+ NEON_LD1x4_UPD,
+
+ // NEON stores with post-increment base updates:
+ NEON_ST1_UPD,
+ NEON_ST2_UPD,
+ NEON_ST3_UPD,
+ NEON_ST4_UPD,
+ NEON_ST1x2_UPD,
+ NEON_ST1x3_UPD,
+ NEON_ST1x4_UPD,
+
+ // NEON duplicate lane loads with post-increment base updates:
+ NEON_LD2DUP_UPD,
+ NEON_LD3DUP_UPD,
+ NEON_LD4DUP_UPD,
+
+ // NEON lane loads with post-increment base updates:
+ NEON_LD2LN_UPD,
+ NEON_LD3LN_UPD,
+ NEON_LD4LN_UPD,
+
+ // NEON lane store with post-increment base updates:
+ NEON_ST2LN_UPD,
+ NEON_ST3LN_UPD,
+ NEON_ST4LN_UPD
};
}
@@ -169,9 +233,13 @@ public:
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const;
+
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+
void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
SDValue &Chain) const;
@@ -234,6 +302,8 @@ public:
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const;
@@ -269,6 +339,14 @@ public:
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
+
+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ unsigned Intrinsic) const LLVM_OVERRIDE;
+
+protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(MVT VT) const;
+
private:
const InstrItineraryData *Itins;
@@ -280,6 +358,10 @@ enum NeonModImmType {
Neon_Mov_Imm,
Neon_Mvn_Imm
};
+
+extern SDValue ScanBUILD_VECTOR(SDValue Op, bool &isOnlyLowElement,
+ bool &usesOnlyOneValue, bool &hasDominantValue,
+ bool &isConstant, bool &isUNDEF);
} // namespace llvm
#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 09451fd..34f917c 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -120,6 +120,14 @@ class A64InstRdnm<dag outs, dag ins, string asmstr,
let Inst{20-16} = Rm;
}
+class A64InstRtnm<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rm;
+
+ let Inst{20-16} = Rm;
+}
+
//===----------------------------------------------------------------------===//
//
// Actual A64 Instruction Formats
@@ -383,6 +391,8 @@ class A64I_extract<bit sf, bits<3> op, bit n,
// Inherits Rd in 4-0
}
+let Predicates = [HasFPARMv8] in {
+
// Format for floating-point compare instructions.
class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2,
dag outs, dag ins, string asmstr,
@@ -562,6 +572,8 @@ class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5,
// Inherit Rd in 4-0
}
+}
+
// Format for load-register (literal) instructions.
class A64I_LDRlit<bits<2> opc, bit v,
dag outs, dag ins, string asmstr,
@@ -971,31 +983,123 @@ class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit> {
}
+// Format AdvSIMD bitwise extract
+class NeonI_BitExtract<bit q, bits<2> op2,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-24} = 0b101110;
+ let Inst{23-22} = op2;
+ let Inst{21} = 0b0;
+ // Inherit Rm in 20-16
+ let Inst{15} = 0b0;
+ // imm4 in 14-11
+ let Inst{10} = 0b0;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD perm
+class NeonI_Perm<bit q, bits<2> size, bits<3> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-24} = 0b001110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b0;
+ // Inherit Rm in 20-16
+ let Inst{15} = 0b0;
+ let Inst{14-12} = opcode;
+ let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD table lookup
+class NeonI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-24} = 0b001110;
+ let Inst{23-22} = op2;
+ let Inst{21} = 0b0;
+ // Inherit Rm in 20-16
+ let Inst{15} = 0b0;
+ let Inst{14-13} = len;
+ let Inst{12} = op;
+ let Inst{11-10} = 0b00;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
// Format AdvSIMD 3 vector registers with same vector type
class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin>
-{
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
let Inst{31} = 0b0;
let Inst{30} = q;
let Inst{29} = u;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
let Inst{21} = 0b1;
- // Inherit Rm in 20-16
+ // Inherit Rm in 20-16
let Inst{15-11} = opcode;
let Inst{10} = 0b1;
// Inherit Rn in 9-5
// Inherit Rd in 4-0
}
+// Format AdvSIMD 3 vector registers with different vector type
+class NeonI_3VDiff<bit q, bit u, bits<2> size, bits<4> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b1;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = opcode;
+ let Inst{11} = 0b0;
+ let Inst{10} = 0b0;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD two registers and an element
+class NeonI_2VElem<bit q, bit u, bits<2> size, bits<4> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b01111;
+ let Inst{23-22} = size;
+ // l in Inst{21}
+ // m in Inst{20}
+ // Inherit Rm in 19-16
+ let Inst{15-12} = opcode;
+ // h in Inst{11}
+ let Inst{10} = 0b0;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
// Format AdvSIMD 1 vector register with modified immediate
class NeonI_1VModImm<bit q, bit op,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
- : A64InstRd<outs,ins, asmstr, patterns, itin>
-{
+ : A64InstRd<outs,ins, asmstr, patterns, itin> {
bits<8> Imm;
bits<4> cmode;
let Inst{31} = 0b0;
@@ -1015,15 +1119,14 @@ class NeonI_1VModImm<bit q, bit op,
class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin>
-{
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
let Inst{31} = 0b0;
let Inst{30} = 0b1;
let Inst{29} = u;
let Inst{28-24} = 0b11110;
let Inst{23-22} = size;
let Inst{21} = 0b1;
- // Inherit Rm in 20-16
+ // Inherit Rm in 20-16
let Inst{15-11} = opcode;
let Inst{10} = 0b1;
// Inherit Rn in 9-5
@@ -1035,6 +1138,98 @@ class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode,
class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD 2 vector 1 immediate shift
+class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<7> Imm;
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = u;
+ let Inst{28-23} = 0b011110;
+ let Inst{22-16} = Imm;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 0b1;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD duplicate and insert
+class NeonI_copy<bit q, bit op, bits<4> imm4,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Imm5;
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = op;
+ let Inst{28-21} = 0b01110000;
+ let Inst{20-16} = Imm5;
+ let Inst{15} = 0b0;
+ let Inst{14-11} = imm4;
+ let Inst{10} = 0b1;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+// Format AdvSIMD insert from element to vector
+class NeonI_insert<bit q, bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Imm5;
+ bits<4> Imm4;
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = op;
+ let Inst{28-21} = 0b01110000;
+ let Inst{20-16} = Imm5;
+ let Inst{15} = 0b0;
+ let Inst{14-11} = Imm4;
+ let Inst{10} = 0b1;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD scalar pairwise
+class NeonI_ScalarPair<bit u, bits<2> size, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = 0b1;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b11000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD 2 vector across lanes
+class NeonI_2VAcross<bit q, bit u, bits<2> size, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
: A64InstRdn<outs, ins, asmstr, patterns, itin>
{
let Inst{31} = 0b0;
@@ -1042,13 +1237,253 @@ class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
let Inst{29} = u;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
+ let Inst{21-17} = 0b11000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD scalar two registers miscellaneous
+class NeonI_Scalar2SameMisc<bit u, bits<2> size, bits<5> opcode, dag outs, dag ins,
+ string asmstr, list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = 0b1;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
let Inst{21-17} = 0b10000;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD vector load/store multiple N-element structure
+class NeonI_LdStMult<bit q, bit l, bits<4> opcode, bits<2> size,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-23} = 0b0011000;
+ let Inst{22} = l;
+ let Inst{21-16} = 0b000000;
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = size;
+
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format AdvSIMD vector load/store multiple N-element structure (post-index)
+class NeonI_LdStMult_Post<bit q, bit l, bits<4> opcode, bits<2> size,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtnm<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-23} = 0b0011001;
+ let Inst{22} = l;
+ let Inst{21} = 0b0;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = size;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format AdvSIMD vector load Single N-element structure to all lanes
+class NeonI_LdOne_Dup<bit q, bit r, bits<3> opcode, bits<2> size, dag outs,
+ dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-23} = 0b0011010;
+ let Inst{22} = 0b1;
+ let Inst{21} = r;
+ let Inst{20-16} = 0b00000;
+ let Inst{15-13} = opcode;
+ let Inst{12} = 0b0;
+ let Inst{11-10} = size;
+
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format AdvSIMD vector load/store Single N-element structure to/from one lane
+class NeonI_LdStOne_Lane<bit l, bit r, bits<2> op2_1, bit op0, dag outs,
+ dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+ bits<4> lane;
+ let Inst{31} = 0b0;
+ let Inst{29-23} = 0b0011010;
+ let Inst{22} = l;
+ let Inst{21} = r;
+ let Inst{20-16} = 0b00000;
+ let Inst{15-14} = op2_1;
+ let Inst{13} = op0;
+
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format AdvSIMD post-index vector load Single N-element structure to all lanes
+class NeonI_LdOne_Dup_Post<bit q, bit r, bits<3> opcode, bits<2> size, dag outs,
+ dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRtnm<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-23} = 0b0011011;
+ let Inst{22} = 0b1;
+ let Inst{21} = r;
+ // Inherit Rm in 20-16
+ let Inst{15-13} = opcode;
+ let Inst{12} = 0b0;
+ let Inst{11-10} = size;
+
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format AdvSIMD post-index vector load/store Single N-element structure
+// to/from one lane
+class NeonI_LdStOne_Lane_Post<bit l, bit r, bits<2> op2_1, bit op0, dag outs,
+ dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtnm<outs, ins, asmstr, patterns, itin>
+{
+ bits<4> lane;
+ let Inst{31} = 0b0;
+ let Inst{29-23} = 0b0011011;
+ let Inst{22} = l;
+ let Inst{21} = r;
+ // Inherit Rm in 20-16
+ let Inst{15-14} = op2_1;
+ let Inst{13} = op0;
+
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format AdvSIMD 3 scalar registers with different type
+
+class NeonI_Scalar3Diff<bit u, bits<2> size, bits<4> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31-30} = 0b01;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b1;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = 0b00;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD scalar shift by immediate
+
+class NeonI_ScalarShiftImm<bit u, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<4> Imm4;
+ bits<3> Imm3;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = u;
+ let Inst{28-23} = 0b111110;
+ let Inst{22-19} = Imm4;
+ let Inst{18-16} = Imm3;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 0b1;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD crypto AES
+class NeonI_Crypto_AES<bits<2> size, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31-24} = 0b01001110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10100;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+// Format AdvSIMD crypto SHA
+class NeonI_Crypto_SHA<bits<2> size, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31-24} = 0b01011110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10100;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
// Inherit Rn in 9-5
// Inherit Rd in 4-0
}
+// Format AdvSIMD crypto 3V SHA
+class NeonI_Crypto_3VSHA<bits<2> size, bits<3> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31-24} = 0b01011110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b0;
+ // Inherit Rm in 20-16
+ let Inst{15} = 0b0;
+ let Inst{14-12} = opcode;
+ let Inst{11-10} = 0b00;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD scalar x indexed element
+class NeonI_ScalarXIndexedElem<bit u, bit szhi, bit szlo,
+ bits<4> opcode, dag outs, dag ins,
+ string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = 0b1;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b11111;
+ let Inst{23} = szhi;
+ let Inst{22} = szlo;
+ // l in Inst{21}
+ // m in Instr{20}
+ // Inherit Rm in 19-16
+ let Inst{15-12} = opcode;
+ // h in Inst{11}
+ let Inst{10} = 0b0;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+// Format AdvSIMD scalar copy - insert from element to scalar
+class NeonI_ScalarCopy<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> {
+ let Inst{28} = 0b1;
+}
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index d8f45eb..180110a 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -29,7 +29,7 @@
#include <algorithm>
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "AArch64GenInstrInfo.inc"
using namespace llvm;
@@ -68,43 +68,71 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
.addImm(A64SysReg::NZCV);
} else if (AArch64::GPR64RegClass.contains(DestReg)) {
- assert(AArch64::GPR64RegClass.contains(SrcReg));
- Opc = AArch64::ORRxxx_lsl;
- ZeroReg = AArch64::XZR;
+ if(AArch64::GPR64RegClass.contains(SrcReg)){
+ Opc = AArch64::ORRxxx_lsl;
+ ZeroReg = AArch64::XZR;
+ } else{
+ assert(AArch64::FPR64RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
} else if (AArch64::GPR32RegClass.contains(DestReg)) {
- assert(AArch64::GPR32RegClass.contains(SrcReg));
- Opc = AArch64::ORRwww_lsl;
- ZeroReg = AArch64::WZR;
+ if(AArch64::GPR32RegClass.contains(SrcReg)){
+ Opc = AArch64::ORRwww_lsl;
+ ZeroReg = AArch64::WZR;
+ } else{
+ assert(AArch64::FPR32RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
} else if (AArch64::FPR32RegClass.contains(DestReg)) {
- assert(AArch64::FPR32RegClass.contains(SrcReg));
- BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
- .addReg(SrcReg);
- return;
+ if(AArch64::FPR32RegClass.contains(SrcReg)){
+ BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
+ else {
+ assert(AArch64::GPR32RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
} else if (AArch64::FPR64RegClass.contains(DestReg)) {
- assert(AArch64::FPR64RegClass.contains(SrcReg));
- BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
- .addReg(SrcReg);
- return;
+ if(AArch64::FPR64RegClass.contains(SrcReg)){
+ BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
+ else {
+ assert(AArch64::GPR64RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
} else if (AArch64::FPR128RegClass.contains(DestReg)) {
assert(AArch64::FPR128RegClass.contains(SrcReg));
- // FIXME: there's no good way to do this, at least without NEON:
- // + There's no single move instruction for q-registers
- // + We can't create a spill slot and use normal STR/LDR because stack
- // allocation has already happened
- // + We can't go via X-registers with FMOV because register allocation has
- // already happened.
- // This may not be efficient, but at least it works.
- BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
- .addReg(SrcReg)
- .addReg(AArch64::XSP)
- .addImm(0x1ff & -16);
-
- BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
- .addReg(AArch64::XSP, RegState::Define)
- .addReg(AArch64::XSP)
- .addImm(16);
- return;
+ // If NEON is enable, we use ORR to implement this copy.
+ // If NEON isn't available, emit STR and LDR to handle this.
+ if(getSubTarget().hasNEON()) {
+ BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg);
+ return;
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
+ .addReg(SrcReg)
+ .addReg(AArch64::XSP)
+ .addImm(0x1ff & -16);
+
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
+ .addReg(AArch64::XSP, RegState::Define)
+ .addReg(AArch64::XSP)
+ .addImm(16);
+ return;
+ }
} else {
llvm_unreachable("Unknown register class in copyPhysReg");
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 07289b0..23d81fc 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -14,6 +14,8 @@
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
+def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
+ AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "neon">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
@@ -125,6 +127,8 @@ def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+
//===----------------------------------------------------------------------===//
// Call sequence pseudo-instructions
//===----------------------------------------------------------------------===//
@@ -1274,7 +1278,7 @@ def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)),
// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
-def : Pat<(zext i32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
+def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
sub_32)>;
//===-------------------------------
@@ -1978,6 +1982,13 @@ def fpz64 : Operand<f64>,
let DecoderMethod = "DecodeFPZeroOperand";
}
+def fpz64movi : Operand<i64>,
+ ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
+ let ParserMatchClass = fpzero_asmoperand;
+ let PrintMethod = "printFPZeroOperand";
+ let DecoderMethod = "DecodeFPZeroOperand";
+}
+
multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
(outs), ins, "fcmp\t$Rn, $Rm", [pattern],
@@ -2186,23 +2197,23 @@ def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
// Extra patterns for when we're allowed to optimise separate multiplication and
// addition.
-let Predicates = [UseFusedMAC] in {
-def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
+let Predicates = [HasFPARMv8, UseFusedMAC] in {
+def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
+def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra),
+def : Pat<(f32 (fsub (f32 (fmul FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)),
(FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)),
+def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(fadd FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
+def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def : Pat<(fsub FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
+def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def : Pat<(fsub (fmul FPR64:$Rn, FPR64:$Rm), FPR64:$Ra),
+def : Pat<(f64 (fsub (f64 (fmul FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)),
(FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def : Pat<(fsub (fneg FPR64:$Ra), (fmul FPR64:$Rn, FPR64:$Rm)),
+def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
}
@@ -2342,6 +2353,7 @@ defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">;
defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">;
defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">;
+let Predicates = [HasFPARMv8] in {
def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>;
def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>;
def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>;
@@ -2350,6 +2362,7 @@ def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>;
def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>;
def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>;
def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>;
+}
multiclass A64I_inttofp<bit o0, string asmop> {
def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>;
@@ -2361,6 +2374,7 @@ multiclass A64I_inttofp<bit o0, string asmop> {
defm S : A64I_inttofp<0b0, "scvtf">;
defm U : A64I_inttofp<0b1, "ucvtf">;
+let Predicates = [HasFPARMv8] in {
def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>;
def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>;
def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>;
@@ -2369,16 +2383,19 @@ def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>;
def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>;
def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>;
def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>;
+}
def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">;
def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">;
def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">;
def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">;
+let Predicates = [HasFPARMv8] in {
def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>;
def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>;
def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>;
def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>;
+}
def lane1_asmoperand : AsmOperandClass {
let Name = "Lane1";
@@ -2401,11 +2418,13 @@ let DecoderMethod = "DecodeFMOVLaneInstruction" in {
"fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>;
}
+let Predicates = [HasFPARMv8] in {
def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]",
(FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>;
def : InstAlias<"fmov $Rd.2d[$Lane], $Rn",
(FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>;
+}
//===----------------------------------------------------------------------===//
// Floating-point immediate instructions
@@ -2499,11 +2518,15 @@ let mayLoad = 1 in {
def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>;
}
+let Predicates = [HasFPARMv8] in {
def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>;
def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>;
+}
let mayLoad = 1 in {
+ let Predicates = [HasFPARMv8] in {
def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>;
+ }
def LDRSWx_lit : A64I_LDRlit<0b10, 0b0,
@@ -3097,6 +3120,7 @@ defm LS32
defm LS64
: A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>;
+let Predicates = [HasFPARMv8] in {
// STR/LDR to/from a B register
defm LSFP8
: A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>;
@@ -3115,6 +3139,7 @@ defm LSFP64
defm LSFP128
: A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128,
qword_addrparams>;
+}
//===------------------------------
// 2.3 Signed loads
@@ -3570,10 +3595,13 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">;
defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">;
+
+let Predicates = [HasFPARMv8] in {
defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">;
defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">;
defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7,
"LSFPPair128">;
+}
def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
@@ -5162,4 +5190,4 @@ defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
// Advanced SIMD (NEON) Support
//
-include "AArch64InstrNEON.td" \ No newline at end of file
+include "AArch64InstrNEON.td"
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 98b9e3e..d71749d 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -41,6 +41,37 @@ def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
[SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
+def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
+
+def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>;
+def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>;
+def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>;
+def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>;
+def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>;
+def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>;
+
+def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
+def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
+def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
+def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
+ [SDTCisVec<0>]>>;
+def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
+def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
+ [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
+
+def SDT_assertext : SDTypeProfile<1, 1,
+ [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>;
+def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>;
+def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
+
//===----------------------------------------------------------------------===//
// Multiclasses
//===----------------------------------------------------------------------===//
@@ -48,8 +79,7 @@ def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
string asmop, SDPatternOperator opnode8B,
SDPatternOperator opnode16B,
- bit Commutable = 0>
-{
+ bit Commutable = 0> {
let isCommutable = Commutable in {
def _8B : NeonI_3VSame<0b0, u, size, opcode,
(outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
@@ -70,8 +100,7 @@ multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+ bit Commutable = 0> {
let isCommutable = Commutable in {
def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
(outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
@@ -105,8 +134,7 @@ multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
string asmop, SDPatternOperator opnode,
bit Commutable = 0>
- : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable>
-{
+ : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable> {
let isCommutable = Commutable in {
def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
(outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
@@ -127,8 +155,7 @@ multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
string asmop, SDPatternOperator opnode,
bit Commutable = 0>
- : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable>
-{
+ : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable> {
let isCommutable = Commutable in {
def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
(outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
@@ -146,8 +173,7 @@ multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
SDPatternOperator opnode4S,
SDPatternOperator opnode2D,
ValueType ResTy2S, ValueType ResTy4S,
- ValueType ResTy2D, bit Commutable = 0>
-{
+ ValueType ResTy2D, bit Commutable = 0> {
let isCommutable = Commutable in {
def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
(outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
@@ -206,8 +232,8 @@ defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
// two operands constraints.
class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
- RegisterClass VPRC, ValueType OpTy, bit q, bit u, bits<2> size, bits<5> opcode,
- SDPatternOperator opnode>
+ RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
+ bits<5> opcode, SDPatternOperator opnode>
: NeonI_3VSame<q, u, size, opcode,
(outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
@@ -312,26 +338,24 @@ defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
// ORR disassembled as MOV if Vn==Vm
// Vector Move - register
-// Alias for ORR if Vn=Vm and it is the preferred syntax
+// Alias for ORR if Vn=Vm.
+// FIXME: This is actually the preferred syntax but TableGen can't deal with
+// custom printing of aliases.
def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
- (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn)>;
+ (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
- (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn)>;
-
-def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
- ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
- ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
- unsigned EltBits;
- uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
- OpCmodeConstVal->getZExtValue(), EltBits);
- return (EltBits == 8 && EltVal == 0xff);
-}]>;
+ (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
+// The MOVI instruction takes two immediate operands. The first is the
+// immediate encoding, while the second is the cmode. A cmode of 14, or
+// 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC.
+def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
+def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
def Neon_not8B : PatFrag<(ops node:$in),
- (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>;
+ (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>;
def Neon_not16B : PatFrag<(ops node:$in),
- (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>;
+ (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>;
def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
(or node:$Rn, (Neon_not8B node:$Rm))>;
@@ -447,6 +471,9 @@ multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
(v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src),
+ (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))),
+ (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
(v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
(INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
@@ -562,7 +589,7 @@ def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
// NeonI_compare_aliases class: swaps register operands to implement
// comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
class NeonI_compare_aliases<string asmop, string asmlane,
- Instruction inst, RegisterClass VPRC>
+ Instruction inst, RegisterOperand VPRC>
: NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
", $Rm" # asmlane,
(inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
@@ -1023,6 +1050,20 @@ defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
return (HasShift && !ShiftOnesIn);
}]>;
+def neon_uimm1_asmoperand : AsmOperandClass
+{
+ let Name = "UImm1";
+ let PredicateMethod = "isUImm<1>";
+ let RenderMethod = "addImmOperands";
+}
+
+def neon_uimm2_asmoperand : AsmOperandClass
+{
+ let Name = "UImm2";
+ let PredicateMethod = "isUImm<2>";
+ let RenderMethod = "addImmOperands";
+}
+
def neon_uimm8_asmoperand : AsmOperandClass
{
let Name = "UImm8";
@@ -1032,7 +1073,7 @@ def neon_uimm8_asmoperand : AsmOperandClass
def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm8_asmoperand;
- let PrintMethod = "printNeonUImm8Operand";
+ let PrintMethod = "printUImmHexOperand";
}
def neon_uimm64_mask_asmoperand : AsmOperandClass
@@ -1057,7 +1098,7 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
(outs VPR64:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
[(set (v2i32 VPR64:$Rd),
(v2i32 (opnode (timm:$Imm),
(neon_mov_imm_LSL_operand:$Simm))))],
@@ -1070,7 +1111,7 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
(outs VPR128:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
[(set (v4i32 VPR128:$Rd),
(v4i32 (opnode (timm:$Imm),
(neon_mov_imm_LSL_operand:$Simm))))],
@@ -1084,7 +1125,7 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
(outs VPR64:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
[(set (v4i16 VPR64:$Rd),
(v4i16 (opnode (timm:$Imm),
(neon_mov_imm_LSLH_operand:$Simm))))],
@@ -1097,7 +1138,7 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
(outs VPR128:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
[(set (v8i16 VPR128:$Rd),
(v8i16 (opnode (timm:$Imm),
(neon_mov_imm_LSLH_operand:$Simm))))],
@@ -1117,7 +1158,7 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
(outs VPR64:$Rd),
(ins VPR64:$src, neon_uimm8:$Imm,
neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
[(set (v2i32 VPR64:$Rd),
(v2i32 (opnode (v2i32 VPR64:$src),
(v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
@@ -1131,7 +1172,7 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
(outs VPR128:$Rd),
(ins VPR128:$src, neon_uimm8:$Imm,
neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
[(set (v4i32 VPR128:$Rd),
(v4i32 (opnode (v4i32 VPR128:$src),
(v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
@@ -1146,7 +1187,7 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
(outs VPR64:$Rd),
(ins VPR64:$src, neon_uimm8:$Imm,
neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
[(set (v4i16 VPR64:$Rd),
(v4i16 (opnode (v4i16 VPR64:$src),
(v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
@@ -1160,7 +1201,7 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
(outs VPR128:$Rd),
(ins VPR128:$src, neon_uimm8:$Imm,
neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
[(set (v8i16 VPR128:$Rd),
(v8i16 (opnode (v8i16 VPR128:$src),
(v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
@@ -1180,7 +1221,7 @@ multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
(outs VPR64:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_MSL_operand:$Simm),
- !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
[(set (v2i32 VPR64:$Rd),
(v2i32 (opnode (timm:$Imm),
(neon_mov_imm_MSL_operand:$Simm))))],
@@ -1193,7 +1234,7 @@ multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
(outs VPR128:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_MSL_operand:$Simm),
- !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
[(set (v4i32 VPR128:$Rd),
(v4i32 (opnode (timm:$Imm),
(neon_mov_imm_MSL_operand:$Simm))))],
@@ -1315,8 +1356,8 @@ defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
}
class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
- Instruction inst, RegisterClass VPRC>
- : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
+ Instruction inst, RegisterOperand VPRC>
+ : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
(inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
// Aliases for Vector Move Immediate Shifted
@@ -1382,9 +1423,8 @@ let isReMaterializable = 1 in {
def MOVIdi : NeonI_1VModImm<0b0, 0b1,
(outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
"movi\t $Rd, $Imm",
- [(set (f64 FPR64:$Rd),
- (f64 (bitconvert
- (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
+ [(set (v1i64 FPR64:$Rd),
+ (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))],
NoItinerary> {
let cmode = 0b1110;
}
@@ -1392,7 +1432,7 @@ def MOVIdi : NeonI_1VModImm<0b0, 0b1,
// Vector Floating Point Move Immediate
-class NeonI_FMOV_impl<string asmlane, RegisterClass VPRC, ValueType OpTy,
+class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
Operand immOpType, bit q, bit op>
: NeonI_1VModImm<q, op,
(outs VPRC:$Rd), (ins immOpType:$Imm),
@@ -1409,49 +1449,3339 @@ def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
}
-// Scalar Arithmetic
+// Vector Shift (Immediate)
+// Immediate in [0, 63]
+def imm0_63 : Operand<i32> {
+ let ParserMatchClass = uimm6_asmoperand;
+}
-class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
- : NeonI_Scalar3Same<u, 0b11, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
+// Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
+// as follows:
+//
+// Offset Encoding
+// 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
+// 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
+// 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
+// 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
+//
+// The shift right immediate amount, in the range 1 to element bits, is computed
+// as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0
+// to element bits - 1, is computed as UInt(immh:immb) - Offset.
+
+class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
+ let Name = "ShrImm" # OFFSET;
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "ShrImm" # OFFSET;
+}
+
+class shr_imm<string OFFSET> : Operand<i32> {
+ let EncoderMethod = "getShiftRightImm" # OFFSET;
+ let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
+}
+
+def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
+def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
+def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
+def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
+
+def shr_imm8 : shr_imm<"8">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 8;}]>;
+def shr_imm16 : shr_imm<"16">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 16;}]>;
+def shr_imm32 : shr_imm<"32">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 32;}]>;
+def shr_imm64 : shr_imm<"64">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 64;}]>;
+
+class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
+ let Name = "ShlImm" # OFFSET;
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "ShlImm" # OFFSET;
+}
+
+class shl_imm<string OFFSET> : Operand<i32> {
+ let EncoderMethod = "getShiftLeftImm" # OFFSET;
+ let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
+}
+
+def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
+def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
+def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
+def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
+
+def shl_imm8 : shl_imm<"8">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 8;}]>;
+def shl_imm16 : shl_imm<"16">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 16;}]>;
+def shl_imm32 : shl_imm<"32">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 32;}]>;
+def shl_imm64 : shl_imm<"64">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 64;}]>;
+
+class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd),
+ (Ty (OpNode (Ty VPRC:$Rn),
+ (Ty (Neon_vdup (i32 ImmTy:$Imm))))))],
+ NoItinerary>;
+
+multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
+ // 64-bit vector types.
+ def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> {
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ }
+
+ def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> {
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ }
+
+ def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> {
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ }
+
+ // 128-bit vector types.
+ def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> {
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ }
+
+ def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> {
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ }
+
+ def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> {
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ }
+
+ def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> {
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ }
+}
+
+multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
+ def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Shift left
+defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
+
+// Shift right
+defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
+defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
+
+def Neon_High16B : PatFrag<(ops node:$in),
+ (extract_subvector (v16i8 node:$in), (iPTR 8))>;
+def Neon_High8H : PatFrag<(ops node:$in),
+ (extract_subvector (v8i16 node:$in), (iPTR 4))>;
+def Neon_High4S : PatFrag<(ops node:$in),
+ (extract_subvector (v4i32 node:$in), (iPTR 2))>;
+def Neon_High2D : PatFrag<(ops node:$in),
+ (extract_subvector (v2i64 node:$in), (iPTR 1))>;
+def Neon_High4float : PatFrag<(ops node:$in),
+ (extract_subvector (v4f32 node:$in), (iPTR 2))>;
+def Neon_High2double : PatFrag<(ops node:$in),
+ (extract_subvector (v2f64 node:$in), (iPTR 1))>;
+
+def Neon_Low16B : PatFrag<(ops node:$in),
+ (v8i8 (extract_subvector (v16i8 node:$in),
+ (iPTR 0)))>;
+def Neon_Low8H : PatFrag<(ops node:$in),
+ (v4i16 (extract_subvector (v8i16 node:$in),
+ (iPTR 0)))>;
+def Neon_Low4S : PatFrag<(ops node:$in),
+ (v2i32 (extract_subvector (v4i32 node:$in),
+ (iPTR 0)))>;
+def Neon_Low2D : PatFrag<(ops node:$in),
+ (v1i64 (extract_subvector (v2i64 node:$in),
+ (iPTR 0)))>;
+def Neon_Low4float : PatFrag<(ops node:$in),
+ (v2f32 (extract_subvector (v4f32 node:$in),
+ (iPTR 0)))>;
+def Neon_Low2double : PatFrag<(ops node:$in),
+ (v1f64 (extract_subvector (v2f64 node:$in),
+ (iPTR 0)))>;
+
+class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+ string SrcT, ValueType DestTy, ValueType SrcTy,
+ Operand ImmTy, SDPatternOperator ExtOp>
+ : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
+ (ins VPR64:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+ [(set (DestTy VPR128:$Rd),
+ (DestTy (shl
+ (DestTy (ExtOp (SrcTy VPR64:$Rn))),
+ (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
+ NoItinerary>;
+
+class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+ string SrcT, ValueType DestTy, ValueType SrcTy,
+ int StartIndex, Operand ImmTy,
+ SDPatternOperator ExtOp, PatFrag getTop>
+ : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
+ (ins VPR128:$Rn, ImmTy:$Imm),
+ asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+ [(set (DestTy VPR128:$Rd),
+ (DestTy (shl
+ (DestTy (ExtOp
+ (SrcTy (getTop VPR128:$Rn)))),
+ (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
+ NoItinerary>;
+
+multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
+ SDNode ExtOp> {
+ // 64-bit vector types.
+ def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
+ shl_imm8, ExtOp> {
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ }
+
+ def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
+ shl_imm16, ExtOp> {
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ }
+
+ def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
+ shl_imm32, ExtOp> {
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ }
+
+ // 128-bit vector types
+ def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
+ 8, shl_imm8, ExtOp, Neon_High16B> {
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ }
+
+ def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
+ 4, shl_imm16, ExtOp, Neon_High8H> {
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ }
+
+ def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
+ 2, shl_imm32, ExtOp, Neon_High4S> {
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ }
+
+ // Use other patterns to match when the immediate is 0.
+ def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
+ (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
+
+ def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
+ (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
+
+ def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
+ (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
+
+ def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
+ (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
+
+ def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
+ (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
+
+ def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
+ (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
+}
+
+// Shift left long
+defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
+defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
+
+// Rounding/Saturating shift
+class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
+ SDPatternOperator OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
+ (i32 ImmTy:$Imm))))],
+ NoItinerary>;
+
+// shift right (vector by immediate)
+multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator OpNode> {
+ def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator OpNode> {
+ // 64-bit vector types.
+ def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ // 128-bit vector types.
+ def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Rounding shift right
+defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
+ int_aarch64_neon_vsrshr>;
+defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
+ int_aarch64_neon_vurshr>;
+
+// Saturating shift left unsigned
+defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
+
+// Saturating shift left
+defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
+defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
+
+class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
+ SDNode OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
+ (Ty (OpNode (Ty VPRC:$Rn),
+ (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+// Shift Right accumulate
+multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
+ def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Shift right and accumulate
+defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
+defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
+
+// Rounding shift accumulate
+class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
+ SDPatternOperator OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
+ (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator OpNode> {
+ def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Rounding shift right and accumulate
+defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
+defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
+
+// Shift insert by immediate
+class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
+ SDPatternOperator OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
+ (i32 ImmTy:$Imm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+// shift left insert (vector by immediate)
+multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
+ def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
+ int_aarch64_neon_vsli> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
+ int_aarch64_neon_vsli> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
+ int_aarch64_neon_vsli> {
+ let Inst{22-21} = 0b01;
+ }
+
+ // 128-bit vector types
+ def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
+ int_aarch64_neon_vsli> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
+ int_aarch64_neon_vsli> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
+ int_aarch64_neon_vsli> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
+ int_aarch64_neon_vsli> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// shift right insert (vector by immediate)
+multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
+ // 64-bit vector types.
+ def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ int_aarch64_neon_vsri> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ int_aarch64_neon_vsri> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ int_aarch64_neon_vsri> {
+ let Inst{22-21} = 0b01;
+ }
+
+ // 128-bit vector types
+ def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ int_aarch64_neon_vsri> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ int_aarch64_neon_vsri> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ int_aarch64_neon_vsri> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ int_aarch64_neon_vsri> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Shift left and insert
+defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
+
+// Shift right and insert
+defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
+
+class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+ string SrcT, Operand ImmTy>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+ [], NoItinerary>;
+
+class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+ string SrcT, Operand ImmTy>
+ : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
+ (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+ [], NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+// left long shift by immediate
+multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
+ def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
+ let Inst{22-21} = 0b01;
+ }
+
+ // Shift Narrow High
+ def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
+ shr_imm8> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
+ shr_imm16> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
+ shr_imm32> {
+ let Inst{22-21} = 0b01;
+ }
+}
+
+// Shift right narrow
+defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
+
+// Shift right narrow (prefix Q is saturating, prefix R is rounding)
+defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
+defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
+defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
+defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
+defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
+defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
+defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
+
+def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
+ (v2i64 (concat_vectors (v1i64 node:$Rm),
+ (v1i64 node:$Rn)))>;
+def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
+ (v8i16 (concat_vectors (v4i16 node:$Rm),
+ (v4i16 node:$Rn)))>;
+def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
+ (v4i32 (concat_vectors (v2i32 node:$Rm),
+ (v2i32 node:$Rn)))>;
+def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
+ (v4f32 (concat_vectors (v2f32 node:$Rm),
+ (v2f32 node:$Rn)))>;
+def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
+ (v2f64 (concat_vectors (v1f64 node:$Rm),
+ (v1f64 node:$Rn)))>;
+
+def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
+ (v8i16 (srl (v8i16 node:$lhs),
+ (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
+def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
+ (v4i32 (srl (v4i32 node:$lhs),
+ (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
+def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
+ (v2i64 (srl (v2i64 node:$lhs),
+ (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
+def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
+ (v8i16 (sra (v8i16 node:$lhs),
+ (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
+def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
+ (v4i32 (sra (v4i32 node:$lhs),
+ (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
+def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
+ (v2i64 (sra (v2i64 node:$lhs),
+ (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
+
+// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
+multiclass Neon_shiftNarrow_patterns<string shr> {
+ def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
+ (i32 shr_imm8:$Imm)))),
+ (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
+ (i32 shr_imm16:$Imm)))),
+ (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
+ (i32 shr_imm32:$Imm)))),
+ (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
+
+ def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
+ (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
+ VPR128:$Rn, (i32 shr_imm8:$Imm))))))),
+ (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
+ VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
+ (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
+ VPR128:$Rn, (i32 shr_imm16:$Imm))))))),
+ (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
+ (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
+ VPR128:$Rn, (i32 shr_imm32:$Imm))))))),
+ (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
+ def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)),
+ (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)),
+ (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)),
+ (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
+
+ def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
+ (v1i64 (bitconvert (v8i8
+ (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))),
+ (!cast<Instruction>(prefix # "_16B")
+ (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
+ (v1i64 (bitconvert (v4i16
+ (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))),
+ (!cast<Instruction>(prefix # "_8H")
+ (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
+ (v1i64 (bitconvert (v2i32
+ (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))),
+ (!cast<Instruction>(prefix # "_4S")
+ (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, imm:$Imm)>;
+}
+
+defm : Neon_shiftNarrow_patterns<"lshr">;
+defm : Neon_shiftNarrow_patterns<"ashr">;
+
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
+
+// Convert fix-point and float-pointing
+class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
+ Operand ImmTy, SDPatternOperator IntOp>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
+ (i32 ImmTy:$Imm))))],
+ NoItinerary>;
+
+multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator IntOp> {
+ def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
+ shr_imm32, IntOp> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
+ shr_imm32, IntOp> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
+ shr_imm64, IntOp> {
+ let Inst{22} = 0b1;
+ }
+}
+
+multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator IntOp> {
+ def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
+ shr_imm32, IntOp> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
+ shr_imm32, IntOp> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
+ shr_imm64, IntOp> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Convert fixed-point to floating-point
+defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
+ int_arm_neon_vcvtfxs2fp>;
+defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
+ int_arm_neon_vcvtfxu2fp>;
+
+// Convert floating-point to fixed-point
+defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
+ int_arm_neon_vcvtfp2fxs>;
+defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
+ int_arm_neon_vcvtfp2fxu>;
+
+multiclass Neon_sshll2_0<SDNode ext>
+{
+ def _v8i8 : PatFrag<(ops node:$Rn),
+ (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
+ def _v4i16 : PatFrag<(ops node:$Rn),
+ (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
+ def _v2i32 : PatFrag<(ops node:$Rn),
+ (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
+}
+
+defm NI_sext_high : Neon_sshll2_0<sext>;
+defm NI_zext_high : Neon_sshll2_0<zext>;
+
+
+//===----------------------------------------------------------------------===//
+// Multiclasses for NeonI_Across
+//===----------------------------------------------------------------------===//
+
+// Variant 1
+
+multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+{
+ def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
+ (outs FPR16:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd, $Rn.8b",
+ [(set (v1i16 FPR16:$Rd),
+ (v1i16 (opnode (v8i8 VPR64:$Rn))))],
NoItinerary>;
-multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
- string asmop, bit Commutable = 0>
+ def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
+ (outs FPR16:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.16b",
+ [(set (v1i16 FPR16:$Rd),
+ (v1i16 (opnode (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
+ (outs FPR32:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd, $Rn.4h",
+ [(set (v1i32 FPR32:$Rd),
+ (v1i32 (opnode (v4i16 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
+ (outs FPR32:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.8h",
+ [(set (v1i32 FPR32:$Rd),
+ (v1i32 (opnode (v8i16 VPR128:$Rn))))],
+ NoItinerary>;
+
+ // _1d2s doesn't exist!
+
+ def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
+ (outs FPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.4s",
+ [(set (v1i64 FPR64:$Rd),
+ (v1i64 (opnode (v4i32 VPR128:$Rn))))],
+ NoItinerary>;
+}
+
+defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
+defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
+
+// Variant 2
+
+multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
{
+ def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
+ (outs FPR8:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd, $Rn.8b",
+ [(set (v1i8 FPR8:$Rd),
+ (v1i8 (opnode (v8i8 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
+ (outs FPR8:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.16b",
+ [(set (v1i8 FPR8:$Rd),
+ (v1i8 (opnode (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
+ (outs FPR16:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd, $Rn.4h",
+ [(set (v1i16 FPR16:$Rd),
+ (v1i16 (opnode (v4i16 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
+ (outs FPR16:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.8h",
+ [(set (v1i16 FPR16:$Rd),
+ (v1i16 (opnode (v8i16 VPR128:$Rn))))],
+ NoItinerary>;
+
+ // _1s2s doesn't exist!
+
+ def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
+ (outs FPR32:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.4s",
+ [(set (v1i32 FPR32:$Rd),
+ (v1i32 (opnode (v4i32 VPR128:$Rn))))],
+ NoItinerary>;
+}
+
+defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
+defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
+
+defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
+defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
+
+defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
+
+// Variant 3
+
+multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
+ string asmop, SDPatternOperator opnode> {
+ def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
+ (outs FPR32:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.4s",
+ [(set (v1f32 FPR32:$Rd),
+ (v1f32 (opnode (v4f32 VPR128:$Rn))))],
+ NoItinerary>;
+}
+
+defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
+ int_aarch64_neon_vmaxnmv>;
+defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
+ int_aarch64_neon_vminnmv>;
+
+defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
+ int_aarch64_neon_vmaxv>;
+defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
+ int_aarch64_neon_vminv>;
+
+// The followings are for instruction class (Perm)
+
+class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
+ string asmop, RegisterOperand OpVPR, string OpS,
+ SDPatternOperator opnode, ValueType Ty>
+ : NeonI_Perm<q, size, opcode,
+ (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (Ty OpVPR:$Rd),
+ (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
+ NoItinerary>;
+
+multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop,
+ VPR64, "8b", opnode, v8i8>;
+ def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop,
+ VPR128, "16b",opnode, v16i8>;
+ def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop,
+ VPR64, "4h", opnode, v4i16>;
+ def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop,
+ VPR128, "8h", opnode, v8i16>;
+ def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop,
+ VPR64, "2s", opnode, v2i32>;
+ def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop,
+ VPR128, "4s", opnode, v4i32>;
+ def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop,
+ VPR128, "2d", opnode, v2i64>;
+}
+
+defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>;
+defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>;
+defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>;
+defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>;
+defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>;
+defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>;
+
+multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> {
+ def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
+ (!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>;
+
+ def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
+ (!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>;
+
+ def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
+ (!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>;
+}
+
+defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>;
+defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>;
+defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>;
+defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>;
+defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>;
+defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>;
+
+// The followings are for instruction class (3V Diff)
+
+// normal long/long2 pattern
+class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode, SDPatternOperator ext,
+ RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
+ (ResTy (ext (OpTy OpVPR:$Rm))))))],
+ NoItinerary>;
+
+multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
+ string asmop, SDPatternOperator opnode,
+ bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, sext, VPR64, v8i16, v8i8>;
+ def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, sext, VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, sext, VPR64, v2i64, v2i32>;
+ }
+}
+
+multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
+ def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
+ def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
+ }
+}
+
+multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, zext, VPR64, v8i16, v8i8>;
+ def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, zext, VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, zext, VPR64, v2i64, v2i32>;
+ }
+}
+
+multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
+ def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
+ def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
+ }
+}
+
+defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
+defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
+
+defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
+defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
+
+defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
+defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
+
+defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
+defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
+
+// normal wide/wide2 pattern
+class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode, SDPatternOperator ext,
+ RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (opnode (ResTy VPR128:$Rn),
+ (ResTy (ext (OpTy OpVPR:$Rm))))))],
+ NoItinerary>;
+
+multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, sext, VPR64, v8i16, v8i8>;
+ def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, sext, VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, sext, VPR64, v2i64, v2i32>;
+}
+
+defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
+defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
+
+multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
+ def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
+ def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
+}
+
+defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
+defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
+
+multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, zext, VPR64, v8i16, v8i8>;
+ def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, zext, VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, zext, VPR64, v2i64, v2i32>;
+}
+
+defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
+defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
+
+multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
+ def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
+ def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
+}
+
+defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
+defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
+
+// Get the high half part of the vector element.
+multiclass NeonI_get_high {
+ def _8h : PatFrag<(ops node:$Rn),
+ (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
+ (v8i16 (Neon_vdup (i32 8)))))))>;
+ def _4s : PatFrag<(ops node:$Rn),
+ (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
+ (v4i32 (Neon_vdup (i32 16)))))))>;
+ def _2d : PatFrag<(ops node:$Rn),
+ (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
+ (v2i64 (Neon_vdup (i32 32)))))))>;
+}
+
+defm NI_get_hi : NeonI_get_high;
+
+// pattern for addhn/subhn with 2 operands
+class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode, SDPatternOperator get_hi,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR64:$Rd),
+ (ResTy (get_hi
+ (OpTy (opnode (OpTy VPR128:$Rn),
+ (OpTy VPR128:$Rm))))))],
+ NoItinerary>;
+
+multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
+ opnode, NI_get_hi_8h, v8i8, v8i16>;
+ def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
+ opnode, NI_get_hi_4s, v4i16, v4i32>;
+ def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
+ opnode, NI_get_hi_2d, v2i32, v2i64>;
+ }
+}
+
+defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
+defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
+
+// pattern for operation with 2 operands
+class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode,
+ RegisterOperand ResVPR, RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy ResVPR:$Rd),
+ (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
+ NoItinerary>;
+
+// normal narrow pattern
+multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
+ opnode, VPR64, VPR128, v8i8, v8i16>;
+ def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
+ opnode, VPR64, VPR128, v4i16, v4i32>;
+ def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
+ opnode, VPR64, VPR128, v2i32, v2i64>;
+ }
+}
+
+defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
+defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
+
+// pattern for acle intrinsic with 3 operands
+class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [], NoItinerary> {
+ let Constraints = "$src = $Rd";
+ let neverHasSideEffects = 1;
+}
+
+multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
+ def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
+ def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
+ def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
+}
+
+defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
+defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
+
+defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
+defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
+
+// Patterns have to be separate because there's a SUBREG_TO_REG in the output
+// part.
+class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
+ SDPatternOperator coreop>
+ : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
+ (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
+ (SrcTy VPR128:$Rm)))))),
+ (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, VPR128:$Rm)>;
+
+// addhn2 patterns
+def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
+ BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
+def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
+ BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
+def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
+ BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
+
+// subhn2 patterns
+def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
+ BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
+def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
+ BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
+def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
+ BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
+
+// raddhn2 patterns
+def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
+def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
+def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
+
+// rsubhn2 patterns
+def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
+def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
+def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
+
+// pattern that need to extend result
+class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode,
+ RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy, ValueType OpSTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
+ (OpTy OpVPR:$Rm))))))],
+ NoItinerary>;
+
+multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, VPR64, v8i16, v8i8, v8i8>;
+ def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, VPR64, v4i32, v4i16, v4i16>;
+ def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, VPR64, v2i64, v2i32, v2i32>;
+ }
+}
+
+defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
+defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
+
+multiclass NeonI_Op_High<SDPatternOperator op> {
+ def _16B : PatFrag<(ops node:$Rn, node:$Rm),
+ (op (v8i8 (Neon_High16B node:$Rn)),
+ (v8i8 (Neon_High16B node:$Rm)))>;
+ def _8H : PatFrag<(ops node:$Rn, node:$Rm),
+ (op (v4i16 (Neon_High8H node:$Rn)),
+ (v4i16 (Neon_High8H node:$Rm)))>;
+ def _4S : PatFrag<(ops node:$Rn, node:$Rm),
+ (op (v2i32 (Neon_High4S node:$Rn)),
+ (v2i32 (Neon_High4S node:$Rm)))>;
+}
+
+defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
+defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
+defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
+defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
+defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
+defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
+
+multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
+ bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ !cast<PatFrag>(opnode # "_16B"),
+ VPR128, v8i16, v16i8, v8i8>;
+ def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ !cast<PatFrag>(opnode # "_8H"),
+ VPR128, v4i32, v8i16, v4i16>;
+ def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ !cast<PatFrag>(opnode # "_4S"),
+ VPR128, v2i64, v4i32, v2i32>;
+ }
+}
+
+defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
+defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
+
+// For pattern that need two operators being chained.
+class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode, SDPatternOperator subop,
+ RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy, ValueType OpSTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (opnode
+ (ResTy VPR128:$src),
+ (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
+ (OpTy OpVPR:$Rm))))))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, SDPatternOperator subop>{
+ def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, subop, VPR64, v8i16, v8i8, v8i8>;
+ def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, subop, VPR64, v4i32, v4i16, v4i16>;
+ def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, subop, VPR64, v2i64, v2i32, v2i32>;
+}
+
+defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
+ add, int_arm_neon_vabds>;
+defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
+ add, int_arm_neon_vabdu>;
+
+multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, string subop> {
+ def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ opnode, !cast<PatFrag>(subop # "_16B"),
+ VPR128, v8i16, v16i8, v8i8>;
+ def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ opnode, !cast<PatFrag>(subop # "_8H"),
+ VPR128, v4i32, v8i16, v4i16>;
+ def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ opnode, !cast<PatFrag>(subop # "_4S"),
+ VPR128, v2i64, v4i32, v2i32>;
+}
+
+defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
+ "NI_sabdl_hi">;
+defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
+ "NI_uabdl_hi">;
+
+// Long pattern with 2 operands
+multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, VPR128, VPR64, v8i16, v8i8>;
+ def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, VPR128, VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, VPR128, VPR64, v2i64, v2i32>;
+ }
+}
+
+defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
+defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
+
+class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
+ NoItinerary>;
+
+multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
+ string opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
- def bbb : NeonI_Scalar3Same<u, 0b00, opcode,
- (outs FPR8:$Rd), (ins FPR8:$Rn, FPR8:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
+ def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ !cast<PatFrag>(opnode # "_16B"),
+ v8i16, v16i8>;
+ def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ !cast<PatFrag>(opnode # "_8H"),
+ v4i32, v8i16>;
+ def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ !cast<PatFrag>(opnode # "_4S"),
+ v2i64, v4i32>;
+ }
+}
+
+defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
+ "NI_smull_hi", 1>;
+defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
+ "NI_umull_hi", 1>;
+
+// Long pattern with 3 operands
+class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (opnode
+ (ResTy VPR128:$src),
+ (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, v8i16, v8i8>;
+ def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, v4i32, v4i16>;
+ def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, v2i64, v2i32>;
+}
+
+def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
+ (add node:$Rd,
+ (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
+
+def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
+ (add node:$Rd,
+ (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
+
+def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
+ (sub node:$Rd,
+ (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
+
+def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
+ (sub node:$Rd,
+ (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
+
+defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
+defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
+
+defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
+defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
+
+class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator subop, SDPatternOperator opnode,
+ RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (subop
+ (ResTy VPR128:$src),
+ (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator subop, string opnode> {
+ def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ subop, !cast<PatFrag>(opnode # "_16B"),
+ VPR128, v8i16, v16i8>;
+ def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ subop, !cast<PatFrag>(opnode # "_8H"),
+ VPR128, v4i32, v8i16>;
+ def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ subop, !cast<PatFrag>(opnode # "_4S"),
+ VPR128, v2i64, v4i32>;
+}
+
+defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
+ add, "NI_smull_hi">;
+defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
+ add, "NI_umull_hi">;
+
+defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
+ sub, "NI_smull_hi">;
+defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
+ sub, "NI_umull_hi">;
+
+multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, int_arm_neon_vqdmull,
+ VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, int_arm_neon_vqdmull,
+ VPR64, v2i64, v2i32>;
+}
+
+defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
+ int_arm_neon_vqadds>;
+defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
+ int_arm_neon_vqsubs>;
+
+multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, VPR128, VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, VPR128, VPR64, v2i64, v2i32>;
+ }
+}
+
+defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
+ int_arm_neon_vqdmull, 1>;
+
+multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
+ string opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ !cast<PatFrag>(opnode # "_8H"),
+ v4i32, v8i16>;
+ def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ !cast<PatFrag>(opnode # "_4S"),
+ v2i64, v4i32>;
+ }
+}
+
+defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
+ "NI_qdmull_hi", 1>;
+
+multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ opnode, NI_qdmull_hi_8H,
+ VPR128, v4i32, v8i16>;
+ def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ opnode, NI_qdmull_hi_4S,
+ VPR128, v2i64, v4i32>;
+}
+
+defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
+ int_arm_neon_vqadds>;
+defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
+ int_arm_neon_vqsubs>;
+
+multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, VPR128, VPR64, v8i16, v8i8>;
+
+ def _1q1d : NeonI_3VDiff<0b0, u, 0b11, opcode,
+ (outs VPR128:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
+ asmop # "\t$Rd.1q, $Rn.1d, $Rm.1d",
+ [], NoItinerary>;
+ }
+}
+
+defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
+
+multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
+ string opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ !cast<PatFrag>(opnode # "_16B"),
+ v8i16, v16i8>;
+
+ def _1q2d : NeonI_3VDiff<0b1, u, 0b11, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
+ [], NoItinerary>;
+ }
+}
+
+defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
+ 1>;
+
+// End of implementation for instruction class (3V Diff)
+
+// The followings are vector load/store multiple N-element structure
+// (class SIMD lselem).
+
+// ld1: load multiple 1-element structure to 1/2/3/4 registers.
+// ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
+// The structure consists of a sequence of sets of N values.
+// The first element of the structure is placed in the first lane
+// of the first first vector, the second element in the first lane
+// of the second vector, and so on.
+// E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
+// the three 64-bit vectors list {BA, DC, FE}.
+// E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
+// 64-bit vectors list {DA, EB, FC}.
+// Store instructions store multiple structure to N registers like load.
+
+
+class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
+ RegisterOperand VecList, string asmop>
+ : NeonI_LdStMult<q, 1, opcode, size,
+ (outs VecList:$Rt), (ins GPR64xsp:$Rn),
+ asmop # "\t$Rt, [$Rn]",
+ [],
+ NoItinerary> {
+ let mayLoad = 1;
+ let neverHasSideEffects = 1;
+}
+
+multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
+ def _8B : NeonI_LDVList<0, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"), asmop>;
+
+ def _4H : NeonI_LDVList<0, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"), asmop>;
+
+ def _2S : NeonI_LDVList<0, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"), asmop>;
+
+ def _16B : NeonI_LDVList<1, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"), asmop>;
+
+ def _8H : NeonI_LDVList<1, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"), asmop>;
+
+ def _4S : NeonI_LDVList<1, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"), asmop>;
+
+ def _2D : NeonI_LDVList<1, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"), asmop>;
+}
+
+// Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
+defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
+def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
+
+defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
+
+defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
+
+defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
+
+// Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
+defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">;
+def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
+
+defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">;
+def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
+
+defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">;
+def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
+
+class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
+ RegisterOperand VecList, string asmop>
+ : NeonI_LdStMult<q, 0, opcode, size,
+ (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
+ asmop # "\t$Rt, [$Rn]",
+ [],
+ NoItinerary> {
+ let mayStore = 1;
+ let neverHasSideEffects = 1;
+}
+
+multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
+ def _8B : NeonI_STVList<0, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"), asmop>;
+
+ def _4H : NeonI_STVList<0, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"), asmop>;
+
+ def _2S : NeonI_STVList<0, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"), asmop>;
+
+ def _16B : NeonI_STVList<1, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"), asmop>;
+
+ def _8H : NeonI_STVList<1, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"), asmop>;
+
+ def _4S : NeonI_STVList<1, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"), asmop>;
+
+ def _2D : NeonI_STVList<1, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"), asmop>;
+}
+
+// Store multiple N-element structures from N registers (N = 1,2,3,4)
+defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
+def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
+
+defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
+
+defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
+
+defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
+
+// Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
+defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">;
+def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
+
+defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">;
+def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
+
+defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">;
+def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
+
+def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
+def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
+
+def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
+def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
+
+def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>;
+def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>;
+
+def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
+def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
+
+def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
+def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
+
+def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>;
+def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>;
+
+def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr),
+ (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
+def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr),
+ (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
+
+def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr),
+ (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
+def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr),
+ (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
+
+def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr),
+ (ST1_8H GPR64xsp:$addr, VPR128:$value)>;
+def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr),
+ (ST1_16B GPR64xsp:$addr, VPR128:$value)>;
+
+def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr),
+ (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
+def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr),
+ (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
+
+def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr),
+ (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
+def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr),
+ (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
+
+def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
+ (ST1_4H GPR64xsp:$addr, VPR64:$value)>;
+def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
+ (ST1_8B GPR64xsp:$addr, VPR64:$value)>;
+
+// End of vector load/store multiple N-element structure(class SIMD lselem)
+
+// The followings are post-index vector load/store multiple N-element
+// structure(class SIMD lselem-post)
+def exact1_asmoperand : AsmOperandClass {
+ let Name = "Exact1";
+ let PredicateMethod = "isExactImm<1>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact1 : Operand<i32>, ImmLeaf<i32, [{return Imm == 1;}]> {
+ let ParserMatchClass = exact1_asmoperand;
+}
+
+def exact2_asmoperand : AsmOperandClass {
+ let Name = "Exact2";
+ let PredicateMethod = "isExactImm<2>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact2 : Operand<i32>, ImmLeaf<i32, [{return Imm == 2;}]> {
+ let ParserMatchClass = exact2_asmoperand;
+}
+
+def exact3_asmoperand : AsmOperandClass {
+ let Name = "Exact3";
+ let PredicateMethod = "isExactImm<3>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact3 : Operand<i32>, ImmLeaf<i32, [{return Imm == 3;}]> {
+ let ParserMatchClass = exact3_asmoperand;
+}
+
+def exact4_asmoperand : AsmOperandClass {
+ let Name = "Exact4";
+ let PredicateMethod = "isExactImm<4>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact4 : Operand<i32>, ImmLeaf<i32, [{return Imm == 4;}]> {
+ let ParserMatchClass = exact4_asmoperand;
+}
+
+def exact6_asmoperand : AsmOperandClass {
+ let Name = "Exact6";
+ let PredicateMethod = "isExactImm<6>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact6 : Operand<i32>, ImmLeaf<i32, [{return Imm == 6;}]> {
+ let ParserMatchClass = exact6_asmoperand;
+}
+
+def exact8_asmoperand : AsmOperandClass {
+ let Name = "Exact8";
+ let PredicateMethod = "isExactImm<8>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
+ let ParserMatchClass = exact8_asmoperand;
+}
+
+def exact12_asmoperand : AsmOperandClass {
+ let Name = "Exact12";
+ let PredicateMethod = "isExactImm<12>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact12 : Operand<i32>, ImmLeaf<i32, [{return Imm == 12;}]> {
+ let ParserMatchClass = exact12_asmoperand;
+}
+
+def exact16_asmoperand : AsmOperandClass {
+ let Name = "Exact16";
+ let PredicateMethod = "isExactImm<16>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
+ let ParserMatchClass = exact16_asmoperand;
+}
+
+def exact24_asmoperand : AsmOperandClass {
+ let Name = "Exact24";
+ let PredicateMethod = "isExactImm<24>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
+ let ParserMatchClass = exact24_asmoperand;
+}
+
+def exact32_asmoperand : AsmOperandClass {
+ let Name = "Exact32";
+ let PredicateMethod = "isExactImm<32>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
+ let ParserMatchClass = exact32_asmoperand;
+}
+
+def exact48_asmoperand : AsmOperandClass {
+ let Name = "Exact48";
+ let PredicateMethod = "isExactImm<48>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
+ let ParserMatchClass = exact48_asmoperand;
+}
+
+def exact64_asmoperand : AsmOperandClass {
+ let Name = "Exact64";
+ let PredicateMethod = "isExactImm<64>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
+ let ParserMatchClass = exact64_asmoperand;
+}
+
+multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
+ RegisterOperand VecList, Operand ImmTy,
+ string asmop> {
+ let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
+ DecoderMethod = "DecodeVLDSTPostInstruction" in {
+ def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
+ (outs VecList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, ImmTy:$amt),
+ asmop # "\t$Rt, [$Rn], $amt",
+ [],
+ NoItinerary> {
+ let Rm = 0b11111;
+ }
+
+ def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
+ (outs VecList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
+ asmop # "\t$Rt, [$Rn], $Rm",
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
+ Operand ImmTy2, string asmop> {
+ defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"),
+ ImmTy, asmop>;
+
+ defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"),
+ ImmTy, asmop>;
+
+ defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"),
+ ImmTy, asmop>;
+
+ defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"),
+ ImmTy2, asmop>;
+
+ defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"),
+ ImmTy2, asmop>;
+
+ defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"),
+ ImmTy2, asmop>;
+
+ defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"),
+ ImmTy2, asmop>;
+}
+
+// Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
+defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
+defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
+ "ld1">;
+
+defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
+
+defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
+ "ld3">;
+
+defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
+
+// Post-index load multiple 1-element structures from N consecutive registers
+// (N = 2,3,4)
+defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
+ "ld1">;
+defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
+ uimm_exact16, "ld1">;
+
+defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
+ "ld1">;
+defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
+ uimm_exact24, "ld1">;
+
+defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
+ "ld1">;
+defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
+ uimm_exact32, "ld1">;
+
+multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
+ RegisterOperand VecList, Operand ImmTy,
+ string asmop> {
+ let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
+ DecoderMethod = "DecodeVLDSTPostInstruction" in {
+ def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
+ (outs GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
+ asmop # "\t$Rt, [$Rn], $amt",
+ [],
+ NoItinerary> {
+ let Rm = 0b11111;
+ }
+
+ def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
+ (outs GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
+ asmop # "\t$Rt, [$Rn], $Rm",
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
+ Operand ImmTy2, string asmop> {
+ defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
+
+ defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"),
+ ImmTy, asmop>;
+
+ defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"),
+ ImmTy, asmop>;
+
+ defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"),
+ ImmTy2, asmop>;
+
+ defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"),
+ ImmTy2, asmop>;
+
+ defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"),
+ ImmTy2, asmop>;
+
+ defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"),
+ ImmTy2, asmop>;
+}
+
+// Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
+defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
+defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
+ "st1">;
+
+defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
+
+defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
+ "st3">;
+
+defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
+
+// Post-index load multiple 1-element structures from N consecutive registers
+// (N = 2,3,4)
+defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
+ "st1">;
+defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
+ uimm_exact16, "st1">;
+
+defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
+ "st1">;
+defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
+ uimm_exact24, "st1">;
+
+defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
+ "st1">;
+defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
+ uimm_exact32, "st1">;
+
+// End of post-index vector load/store multiple N-element structure
+// (class SIMD lselem-post)
+
+// The followings are vector load/store single N-element structure
+// (class SIMD lsone).
+def neon_uimm0_bare : Operand<i64>,
+ ImmLeaf<i64, [{return Imm == 0;}]> {
+ let ParserMatchClass = neon_uimm0_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm1_bare : Operand<i64>,
+ ImmLeaf<i64, [{return Imm < 2;}]> {
+ let ParserMatchClass = neon_uimm1_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm2_bare : Operand<i64>,
+ ImmLeaf<i64, [{return Imm < 4;}]> {
+ let ParserMatchClass = neon_uimm2_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm3_bare : Operand<i64>,
+ ImmLeaf<i64, [{return Imm < 8;}]> {
+ let ParserMatchClass = uimm3_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm4_bare : Operand<i64>,
+ ImmLeaf<i64, [{return Imm < 16;}]> {
+ let ParserMatchClass = uimm4_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
+ RegisterOperand VecList, string asmop>
+ : NeonI_LdOne_Dup<q, r, opcode, size,
+ (outs VecList:$Rt), (ins GPR64xsp:$Rn),
+ asmop # "\t$Rt, [$Rn]",
+ [],
+ NoItinerary> {
+ let mayLoad = 1;
+ let neverHasSideEffects = 1;
+}
+
+multiclass LDN_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop> {
+ def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"), asmop>;
+
+ def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"), asmop>;
+
+ def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"), asmop>;
+
+ def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11,
+ !cast<RegisterOperand>(List # "1D_operand"), asmop>;
+
+ def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"), asmop>;
+
+ def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"), asmop>;
+
+ def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"), asmop>;
+
+ def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"), asmop>;
+}
+
+// Load single 1-element structure to all lanes of 1 register
+defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">;
+
+// Load single N-element structure to all lanes of N consecutive
+// registers (N = 2,3,4)
+defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">;
+defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">;
+defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">;
+
+
+class LD1R_pattern <ValueType VTy, ValueType DTy, PatFrag LoadOp,
+ Instruction INST>
+ : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))),
+ (VTy (INST GPR64xsp:$Rn))>;
+
+// Match all LD1R instructions
+def : LD1R_pattern<v8i8, i32, extloadi8, LD1R_8B>;
+
+def : LD1R_pattern<v16i8, i32, extloadi8, LD1R_16B>;
+
+def : LD1R_pattern<v4i16, i32, extloadi16, LD1R_4H>;
+
+def : LD1R_pattern<v8i16, i32, extloadi16, LD1R_8H>;
+
+def : LD1R_pattern<v2i32, i32, load, LD1R_2S>;
+def : LD1R_pattern<v2f32, f32, load, LD1R_2S>;
+
+def : LD1R_pattern<v4i32, i32, load, LD1R_4S>;
+def : LD1R_pattern<v4f32, f32, load, LD1R_4S>;
+
+def : LD1R_pattern<v1i64, i64, load, LD1R_1D>;
+def : LD1R_pattern<v1f64, f64, load, LD1R_1D>;
+
+def : LD1R_pattern<v2i64, i64, load, LD1R_2D>;
+def : LD1R_pattern<v2f64, f64, load, LD1R_2D>;
+
+
+multiclass VectorList_Bare_BHSD<string PREFIX, int Count,
+ RegisterClass RegList> {
+ defm B : VectorList_operands<PREFIX, "B", Count, RegList>;
+ defm H : VectorList_operands<PREFIX, "H", Count, RegList>;
+ defm S : VectorList_operands<PREFIX, "S", Count, RegList>;
+ defm D : VectorList_operands<PREFIX, "D", Count, RegList>;
+}
+
+// Special vector list operand of 128-bit vectors with bare layout.
+// i.e. only show ".b", ".h", ".s", ".d"
+defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>;
+defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>;
+defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>;
+defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>;
+
+class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
+ Operand ImmOp, string asmop>
+ : NeonI_LdStOne_Lane<1, r, op2_1, op0,
+ (outs VList:$Rt),
+ (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane),
+ asmop # "\t$Rt[$lane], [$Rn]",
+ [],
+ NoItinerary> {
+ let mayLoad = 1;
+ let neverHasSideEffects = 1;
+ let hasExtraDefRegAllocReq = 1;
+ let Constraints = "$src = $Rt";
+}
+
+multiclass LDN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
+ def _B : NeonI_LDN_Lane<r, 0b00, op0,
+ !cast<RegisterOperand>(List # "B_operand"),
+ neon_uimm4_bare, asmop> {
+ let Inst{12-10} = lane{2-0};
+ let Inst{30} = lane{3};
+ }
+
+ def _H : NeonI_LDN_Lane<r, 0b01, op0,
+ !cast<RegisterOperand>(List # "H_operand"),
+ neon_uimm3_bare, asmop> {
+ let Inst{12-10} = {lane{1}, lane{0}, 0b0};
+ let Inst{30} = lane{2};
+ }
+
+ def _S : NeonI_LDN_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "S_operand"),
+ neon_uimm2_bare, asmop> {
+ let Inst{12-10} = {lane{0}, 0b0, 0b0};
+ let Inst{30} = lane{1};
+ }
+
+ def _D : NeonI_LDN_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "D_operand"),
+ neon_uimm1_bare, asmop> {
+ let Inst{12-10} = 0b001;
+ let Inst{30} = lane{0};
+ }
+}
+
+// Load single 1-element structure to one lane of 1 register.
+defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">;
+
+// Load single N-element structure to one lane of N consecutive registers
+// (N = 2,3,4)
+defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">;
+defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">;
+defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">;
+
+multiclass LD1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
+ Operand ImmOp, Operand ImmOp2, PatFrag LoadOp,
+ Instruction INST> {
+ def : Pat<(VTy (vector_insert (VTy VPR64:$src),
+ (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))),
+ (VTy (EXTRACT_SUBREG
+ (INST GPR64xsp:$Rn,
+ (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ ImmOp:$lane),
+ sub_64))>;
+
+ def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src),
+ (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))),
+ (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>;
+}
+
+// Match all LD1LN instructions
+defm : LD1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
+ extloadi8, LD1LN_B>;
+
+defm : LD1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
+ extloadi16, LD1LN_H>;
+
+defm : LD1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
+ load, LD1LN_S>;
+defm : LD1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
+ load, LD1LN_S>;
+
+defm : LD1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
+ load, LD1LN_D>;
+defm : LD1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
+ load, LD1LN_D>;
+
+class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
+ Operand ImmOp, string asmop>
+ : NeonI_LdStOne_Lane<0, r, op2_1, op0,
+ (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane),
+ asmop # "\t$Rt[$lane], [$Rn]",
+ [],
+ NoItinerary> {
+ let mayStore = 1;
+ let neverHasSideEffects = 1;
+ let hasExtraDefRegAllocReq = 1;
+}
+
+multiclass STN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
+ def _B : NeonI_STN_Lane<r, 0b00, op0,
+ !cast<RegisterOperand>(List # "B_operand"),
+ neon_uimm4_bare, asmop> {
+ let Inst{12-10} = lane{2-0};
+ let Inst{30} = lane{3};
+ }
+
+ def _H : NeonI_STN_Lane<r, 0b01, op0,
+ !cast<RegisterOperand>(List # "H_operand"),
+ neon_uimm3_bare, asmop> {
+ let Inst{12-10} = {lane{1}, lane{0}, 0b0};
+ let Inst{30} = lane{2};
+ }
+
+ def _S : NeonI_STN_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "S_operand"),
+ neon_uimm2_bare, asmop> {
+ let Inst{12-10} = {lane{0}, 0b0, 0b0};
+ let Inst{30} = lane{1};
+ }
+
+ def _D : NeonI_STN_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "D_operand"),
+ neon_uimm1_bare, asmop>{
+ let Inst{12-10} = 0b001;
+ let Inst{30} = lane{0};
+ }
+}
+
+// Store single 1-element structure from one lane of 1 register.
+defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">;
+
+// Store single N-element structure from one lane of N consecutive registers
+// (N = 2,3,4)
+defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">;
+defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">;
+defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">;
+
+multiclass ST1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
+ Operand ImmOp, Operand ImmOp2, PatFrag StoreOp,
+ Instruction INST> {
+ def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)),
+ GPR64xsp:$Rn),
+ (INST GPR64xsp:$Rn,
+ (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64),
+ ImmOp:$lane)>;
+
+ def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)),
+ GPR64xsp:$Rn),
+ (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>;
+}
+
+// Match all ST1LN instructions
+defm : ST1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
+ truncstorei8, ST1LN_B>;
+
+defm : ST1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
+ truncstorei16, ST1LN_H>;
+
+defm : ST1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
+ store, ST1LN_S>;
+defm : ST1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
+ store, ST1LN_S>;
+
+defm : ST1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
+ store, ST1LN_D>;
+defm : ST1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
+ store, ST1LN_D>;
+
+// End of vector load/store single N-element structure (class SIMD lsone).
+
+
+// The following are post-index load/store single N-element instructions
+// (class SIMD lsone-post)
+
+multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
+ RegisterOperand VecList, Operand ImmTy,
+ string asmop> {
+ let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn",
+ DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
+ def _fixed : NeonI_LdOne_Dup_Post<q, r, opcode, size,
+ (outs VecList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, ImmTy:$amt),
+ asmop # "\t$Rt, [$Rn], $amt",
+ [],
+ NoItinerary> {
+ let Rm = 0b11111;
+ }
+
+ def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size,
+ (outs VecList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
+ asmop # "\t$Rt, [$Rn], $Rm",
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass LDWB_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop,
+ Operand uimm_b, Operand uimm_h,
+ Operand uimm_s, Operand uimm_d> {
+ defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"),
+ uimm_b, asmop>;
+
+ defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"),
+ uimm_h, asmop>;
+
+ defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"),
+ uimm_s, asmop>;
+
+ defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11,
+ !cast<RegisterOperand>(List # "1D_operand"),
+ uimm_d, asmop>;
+
+ defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"),
+ uimm_b, asmop>;
+
+ defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"),
+ uimm_h, asmop>;
+
+ defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"),
+ uimm_s, asmop>;
+
+ defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"),
+ uimm_d, asmop>;
+}
+
+// Post-index load single 1-element structure to all lanes of 1 register
+defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1,
+ uimm_exact2, uimm_exact4, uimm_exact8>;
+
+// Post-index load single N-element structure to all lanes of N consecutive
+// registers (N = 2,3,4)
+defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2,
+ uimm_exact4, uimm_exact8, uimm_exact16>;
+defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3,
+ uimm_exact6, uimm_exact12, uimm_exact24>;
+defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4,
+ uimm_exact8, uimm_exact16, uimm_exact32>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
+ Constraints = "$Rn = $wb, $Rt = $src",
+ DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
+ class LDN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
+ Operand ImmTy, Operand ImmOp, string asmop>
+ : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
+ (outs VList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, ImmTy:$amt,
+ VList:$src, ImmOp:$lane),
+ asmop # "\t$Rt[$lane], [$Rn], $amt",
[],
- NoItinerary>;
- def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
- (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
+ NoItinerary> {
+ let Rm = 0b11111;
+ }
+
+ class LDN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
+ Operand ImmTy, Operand ImmOp, string asmop>
+ : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
+ (outs VList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, GPR64noxzr:$Rm,
+ VList:$src, ImmOp:$lane),
+ asmop # "\t$Rt[$lane], [$Rn], $Rm",
[],
NoItinerary>;
- def sss : NeonI_Scalar3Same<u, 0b10, opcode,
- (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
+}
+
+multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
+ Operand uimm_b, Operand uimm_h,
+ Operand uimm_s, Operand uimm_d> {
+ def _B_fixed : LDN_WBFx_Lane<r, 0b00, op0,
+ !cast<RegisterOperand>(List # "B_operand"),
+ uimm_b, neon_uimm4_bare, asmop> {
+ let Inst{12-10} = lane{2-0};
+ let Inst{30} = lane{3};
+ }
+
+ def _B_register : LDN_WBReg_Lane<r, 0b00, op0,
+ !cast<RegisterOperand>(List # "B_operand"),
+ uimm_b, neon_uimm4_bare, asmop> {
+ let Inst{12-10} = lane{2-0};
+ let Inst{30} = lane{3};
+ }
+
+ def _H_fixed : LDN_WBFx_Lane<r, 0b01, op0,
+ !cast<RegisterOperand>(List # "H_operand"),
+ uimm_h, neon_uimm3_bare, asmop> {
+ let Inst{12-10} = {lane{1}, lane{0}, 0b0};
+ let Inst{30} = lane{2};
+ }
+
+ def _H_register : LDN_WBReg_Lane<r, 0b01, op0,
+ !cast<RegisterOperand>(List # "H_operand"),
+ uimm_h, neon_uimm3_bare, asmop> {
+ let Inst{12-10} = {lane{1}, lane{0}, 0b0};
+ let Inst{30} = lane{2};
+ }
+
+ def _S_fixed : LDN_WBFx_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "S_operand"),
+ uimm_s, neon_uimm2_bare, asmop> {
+ let Inst{12-10} = {lane{0}, 0b0, 0b0};
+ let Inst{30} = lane{1};
+ }
+
+ def _S_register : LDN_WBReg_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "S_operand"),
+ uimm_s, neon_uimm2_bare, asmop> {
+ let Inst{12-10} = {lane{0}, 0b0, 0b0};
+ let Inst{30} = lane{1};
+ }
+
+ def _D_fixed : LDN_WBFx_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "D_operand"),
+ uimm_d, neon_uimm1_bare, asmop> {
+ let Inst{12-10} = 0b001;
+ let Inst{30} = lane{0};
+ }
+
+ def _D_register : LDN_WBReg_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "D_operand"),
+ uimm_d, neon_uimm1_bare, asmop> {
+ let Inst{12-10} = 0b001;
+ let Inst{30} = lane{0};
+ }
+}
+
+// Post-index load single 1-element structure to one lane of 1 register.
+defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1,
+ uimm_exact2, uimm_exact4, uimm_exact8>;
+
+// Post-index load single N-element structure to one lane of N consecutive
+// registers
+// (N = 2,3,4)
+defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2,
+ uimm_exact4, uimm_exact8, uimm_exact16>;
+defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3,
+ uimm_exact6, uimm_exact12, uimm_exact24>;
+defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4,
+ uimm_exact8, uimm_exact16, uimm_exact32>;
+
+let mayStore = 1, neverHasSideEffects = 1,
+ hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb",
+ DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
+ class STN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
+ Operand ImmTy, Operand ImmOp, string asmop>
+ : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
+ (outs GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, ImmTy:$amt,
+ VList:$Rt, ImmOp:$lane),
+ asmop # "\t$Rt[$lane], [$Rn], $amt",
+ [],
+ NoItinerary> {
+ let Rm = 0b11111;
+ }
+
+ class STN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
+ Operand ImmTy, Operand ImmOp, string asmop>
+ : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
+ (outs GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt,
+ ImmOp:$lane),
+ asmop # "\t$Rt[$lane], [$Rn], $Rm",
[],
NoItinerary>;
- def ddd : NeonI_Scalar3Same<u, 0b11, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
- NoItinerary>;
+}
+
+multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
+ Operand uimm_b, Operand uimm_h,
+ Operand uimm_s, Operand uimm_d> {
+ def _B_fixed : STN_WBFx_Lane<r, 0b00, op0,
+ !cast<RegisterOperand>(List # "B_operand"),
+ uimm_b, neon_uimm4_bare, asmop> {
+ let Inst{12-10} = lane{2-0};
+ let Inst{30} = lane{3};
+ }
+
+ def _B_register : STN_WBReg_Lane<r, 0b00, op0,
+ !cast<RegisterOperand>(List # "B_operand"),
+ uimm_b, neon_uimm4_bare, asmop> {
+ let Inst{12-10} = lane{2-0};
+ let Inst{30} = lane{3};
+ }
+
+ def _H_fixed : STN_WBFx_Lane<r, 0b01, op0,
+ !cast<RegisterOperand>(List # "H_operand"),
+ uimm_h, neon_uimm3_bare, asmop> {
+ let Inst{12-10} = {lane{1}, lane{0}, 0b0};
+ let Inst{30} = lane{2};
+ }
+
+ def _H_register : STN_WBReg_Lane<r, 0b01, op0,
+ !cast<RegisterOperand>(List # "H_operand"),
+ uimm_h, neon_uimm3_bare, asmop> {
+ let Inst{12-10} = {lane{1}, lane{0}, 0b0};
+ let Inst{30} = lane{2};
+ }
+
+ def _S_fixed : STN_WBFx_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "S_operand"),
+ uimm_s, neon_uimm2_bare, asmop> {
+ let Inst{12-10} = {lane{0}, 0b0, 0b0};
+ let Inst{30} = lane{1};
+ }
+
+ def _S_register : STN_WBReg_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "S_operand"),
+ uimm_s, neon_uimm2_bare, asmop> {
+ let Inst{12-10} = {lane{0}, 0b0, 0b0};
+ let Inst{30} = lane{1};
+ }
+
+ def _D_fixed : STN_WBFx_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "D_operand"),
+ uimm_d, neon_uimm1_bare, asmop> {
+ let Inst{12-10} = 0b001;
+ let Inst{30} = lane{0};
+ }
+
+ def _D_register : STN_WBReg_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "D_operand"),
+ uimm_d, neon_uimm1_bare, asmop> {
+ let Inst{12-10} = 0b001;
+ let Inst{30} = lane{0};
}
}
-class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD>
- : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
- (SUBREG_TO_REG (i64 0),
- (INSTD (EXTRACT_SUBREG VPR64:$Rn, sub_64),
- (EXTRACT_SUBREG VPR64:$Rm, sub_64)),
- sub_64)>;
+// Post-index store single 1-element structure from one lane of 1 register.
+defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1,
+ uimm_exact2, uimm_exact4, uimm_exact8>;
+
+// Post-index store single N-element structure from one lane of N consecutive
+// registers (N = 2,3,4)
+defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2,
+ uimm_exact4, uimm_exact8, uimm_exact16>;
+defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3,
+ uimm_exact6, uimm_exact12, uimm_exact24>;
+defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4,
+ uimm_exact8, uimm_exact16, uimm_exact32>;
+
+// End of post-index load/store single N-element instructions
+// (class SIMD lsone-post)
+
+// Neon Scalar instructions implementation
+// Scalar Three Same
+
+class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
+ RegisterClass FPRC>
+ : NeonI_Scalar3Same<u, size, opcode,
+ (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+
+class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
+
+multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
+ bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
+ def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
+ }
+}
+
+multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
+ string asmop, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
+ def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
+ }
+}
+
+multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
+ string asmop, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
+ def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
+ def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
+ def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
+ }
+}
+
+multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD> {
+ def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+}
+
+multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTB,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD>
+ : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
+ def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
+ (INSTB FPR8:$Rn, FPR8:$Rm)>;
+
+ def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
+ (INSTH FPR16:$Rn, FPR16:$Rm)>;
+ def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+}
+
+class Neon_Scalar3Same_cmp_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+
+multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS> {
+ def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
+ (INSTH FPR16:$Rn, FPR16:$Rm)>;
+ def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+}
+
+multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+ def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+}
+
+multiclass Neon_Scalar3Same_cmp_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+ def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+}
+
+class Neon_Scalar3Same_cmp_V1_D_size_patterns<CondCode CC,
+ Instruction INSTD>
+ : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+
+// Scalar Three Different
+
+class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
+ RegisterClass FPRCD, RegisterClass FPRCS>
+ : NeonI_Scalar3Diff<u, size, opcode,
+ (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+
+multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
+ def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
+ def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
+}
+
+multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
+ let Constraints = "$Src = $Rd" in {
+ def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
+ (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+ def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS> {
+ def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
+ (INSTH FPR16:$Rn, FPR16:$Rm)>;
+ def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+}
+
+multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS> {
+ def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
+ (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
+ def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
+ (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
+}
+
+// Scalar Two Registers Miscellaneous
+
+class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
+ RegisterClass FPRCD, RegisterClass FPRCS>
+ : NeonI_Scalar2SameMisc<u, size, opcode,
+ (outs FPRCD:$Rd), (ins FPRCS:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn"),
+ [],
+ NoItinerary>;
+
+multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
+ string asmop> {
+ def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
+ FPR32>;
+ def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
+ FPR64>;
+}
+
+multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
+ def dd : NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
+}
+
+multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
+ def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
+ def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
+ def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
+}
+
+class NeonI_Scalar2SameMisc_fcvtxn_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR32, FPR64>;
+
+multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
+ string asmop> {
+ def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
+ def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
+ def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
+}
+
+class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
+ string asmop, RegisterClass FPRC>
+ : NeonI_Scalar2SameMisc<u, size, opcode,
+ (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn"),
+ [],
+ NoItinerary>;
+
+multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
+ string asmop> {
+
+ let Constraints = "$Src = $Rd" in {
+ def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
+ def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
+ def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
+ def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
+ }
+}
+
+class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1f32 (opnode (v1f64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+
+multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn))),
+ (INSTS FPR32:$Rn)>;
+ def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+}
+
+multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
+ SDPatternOperator Dopnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn))),
+ (INSTS FPR32:$Rn)>;
+ def : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+}
+
+multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))),
+ (INSTS FPR32:$Rn)>;
+ def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+}
+
+class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_Scalar2SameMisc<u, 0b11, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [],
+ NoItinerary>;
+
+multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
+ string asmop> {
+ def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
+ (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
+ !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
+ [],
+ NoItinerary>;
+ def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Rn, fpz32:$FPImm),
+ !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
+ [],
+ NoItinerary>;
+}
+
+class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
+ (v1i64 (bitconvert (v8i8 Neon_AllZero))))),
+ (INSTD FPR64:$Rn, 0)>;
+
+class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<CondCode CC,
+ Instruction INSTD>
+ : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn),
+ (i32 neon_uimm0:$Imm), CC)),
+ (INSTD FPR64:$Rn, neon_uimm0:$Imm)>;
+
+multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn),
+ (v1f32 (scalar_to_vector (f32 fpz32:$FPImm))))),
+ (INSTS FPR32:$Rn, fpz32:$FPImm)>;
+ def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn),
+ (v1f32 (scalar_to_vector (f32 fpz32:$FPImm))))),
+ (INSTD FPR64:$Rn, fpz32:$FPImm)>;
+}
+
+multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD> {
+ def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+}
+
+multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTB,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD>
+ : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
+ def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
+ (INSTB FPR8:$Rn)>;
+ def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
+ (INSTH FPR16:$Rn)>;
+ def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
+ (INSTS FPR32:$Rn)>;
+}
+
+multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
+ SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
+ (INSTH FPR16:$Rn)>;
+ def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
+ (INSTS FPR32:$Rn)>;
+ def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+
+}
+
+multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
+ SDPatternOperator opnode,
+ Instruction INSTB,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
+ (INSTB FPR8:$Src, FPR8:$Rn)>;
+ def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
+ (INSTH FPR16:$Src, FPR16:$Rn)>;
+ def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
+ (INSTS FPR32:$Src, FPR32:$Rn)>;
+ def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
+ (INSTD FPR64:$Src, FPR64:$Rn)>;
+}
+
+// Scalar Shift By Immediate
+
+class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
+ RegisterClass FPRC, Operand ImmTy>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary>;
+
+multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
+ string asmop> {
+ def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
+ string asmop>
+ : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
+ def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
+ bits<3> Imm;
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ let Inst{18-16} = Imm;
+ }
+ def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
+ bits<4> Imm;
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ let Inst{19-16} = Imm;
+ }
+ def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
+ string asmop> {
+ def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
+ string asmop>
+ : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
+ def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
+ bits<3> Imm;
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ let Inst{18-16} = Imm;
+ }
+ def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
+ bits<4> Imm;
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ let Inst{19-16} = Imm;
+ }
+ def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+}
+
+class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPR64:$Rd),
+ (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ let Constraints = "$Src = $Rd";
+}
+
+class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPR64:$Rd),
+ (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ let Constraints = "$Src = $Rd";
+}
+
+class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
+ RegisterClass FPRCD, RegisterClass FPRCS,
+ Operand ImmTy>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary>;
+
+multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
+ string asmop> {
+ def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
+ shr_imm8> {
+ bits<3> Imm;
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ let Inst{18-16} = Imm;
+ }
+ def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
+ shr_imm16> {
+ bits<4> Imm;
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ let Inst{19-16} = Imm;
+ }
+ def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
+ shr_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
+ def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+ def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ }
+}
+
+multiclass Neon_ScalarShiftRImm_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD> {
+ def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_ScalarShiftLImm_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD> {
+ def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+class Neon_ScalarShiftImm_arm_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
+ (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+
+multiclass Neon_ScalarShiftLImm_BHSD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTB,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD>
+ : Neon_ScalarShiftLImm_D_size_patterns<opnode, INSTD> {
+ def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))),
+ (INSTB FPR8:$Rn, imm:$Imm)>;
+ def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))),
+ (INSTH FPR16:$Rn, imm:$Imm)>;
+ def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+}
+
+class Neon_ScalarShiftLImm_accum_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
+ (i32 shl_imm64:$Imm))),
+ (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
+
+class Neon_ScalarShiftRImm_accum_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
+ (i32 shr_imm64:$Imm))),
+ (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
+
+multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
+ SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))),
+ (INSTH FPR16:$Rn, imm:$Imm)>;
+ def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+ def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator Sopnode,
+ SDPatternOperator Dopnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def ssi : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+ def ddi : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator Sopnode,
+ SDPatternOperator Dopnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def ssi : Pat<(v1i32 (Sopnode (v1f32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+ def ddi : Pat<(v1i64 (Dopnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+// Scalar Signed Shift Right (Immediate)
+defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
+defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
+// Pattern to match llvm.arm.* intrinsic.
+def : Neon_ScalarShiftImm_arm_D_size_patterns<sra, SSHRddi>;
+
+// Scalar Unsigned Shift Right (Immediate)
+defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
+defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
+// Pattern to match llvm.arm.* intrinsic.
+def : Neon_ScalarShiftImm_arm_D_size_patterns<srl, USHRddi>;
+
+// Scalar Signed Rounding Shift Right (Immediate)
+defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
+defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
+
+// Scalar Unigned Rounding Shift Right (Immediate)
+defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
+defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
+
+// Scalar Signed Shift Right and Accumulate (Immediate)
+def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
+def : Neon_ScalarShiftRImm_accum_D_size_patterns
+ <int_aarch64_neon_vsrads_n, SSRA>;
+
+// Scalar Unsigned Shift Right and Accumulate (Immediate)
+def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
+def : Neon_ScalarShiftRImm_accum_D_size_patterns
+ <int_aarch64_neon_vsradu_n, USRA>;
+
+// Scalar Signed Rounding Shift Right and Accumulate (Immediate)
+def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
+def : Neon_ScalarShiftRImm_accum_D_size_patterns
+ <int_aarch64_neon_vrsrads_n, SRSRA>;
+
+// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
+def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
+def : Neon_ScalarShiftRImm_accum_D_size_patterns
+ <int_aarch64_neon_vrsradu_n, URSRA>;
+
+// Scalar Shift Left (Immediate)
+defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
+defm : Neon_ScalarShiftLImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
+// Pattern to match llvm.arm.* intrinsic.
+def : Neon_ScalarShiftImm_arm_D_size_patterns<shl, SHLddi>;
+
+// Signed Saturating Shift Left (Immediate)
+defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
+defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
+ SQSHLbbi, SQSHLhhi,
+ SQSHLssi, SQSHLddi>;
+// Pattern to match llvm.arm.* intrinsic.
+defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
+
+// Unsigned Saturating Shift Left (Immediate)
+defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
+defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
+ UQSHLbbi, UQSHLhhi,
+ UQSHLssi, UQSHLddi>;
+// Pattern to match llvm.arm.* intrinsic.
+defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
+
+// Signed Saturating Shift Left Unsigned (Immediate)
+defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
+defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
+ SQSHLUbbi, SQSHLUhhi,
+ SQSHLUssi, SQSHLUddi>;
+
+// Shift Right And Insert (Immediate)
+def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
+def : Neon_ScalarShiftRImm_accum_D_size_patterns
+ <int_aarch64_neon_vsri, SRI>;
+
+// Shift Left And Insert (Immediate)
+def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
+def : Neon_ScalarShiftLImm_accum_D_size_patterns
+ <int_aarch64_neon_vsli, SLI>;
+
+// Signed Saturating Shift Right Narrow (Immediate)
+defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
+ SQSHRNbhi, SQSHRNhsi,
+ SQSHRNsdi>;
+
+// Unsigned Saturating Shift Right Narrow (Immediate)
+defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
+ UQSHRNbhi, UQSHRNhsi,
+ UQSHRNsdi>;
+
+// Signed Saturating Rounded Shift Right Narrow (Immediate)
+defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
+ SQRSHRNbhi, SQRSHRNhsi,
+ SQRSHRNsdi>;
+
+// Unsigned Saturating Rounded Shift Right Narrow (Immediate)
+defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
+ UQRSHRNbhi, UQRSHRNhsi,
+ UQRSHRNsdi>;
+
+// Signed Saturating Shift Right Unsigned Narrow (Immediate)
+defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
+ SQSHRUNbhi, SQSHRUNhsi,
+ SQSHRUNsdi>;
+
+// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
+defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
+ SQRSHRUNbhi, SQRSHRUNhsi,
+ SQRSHRUNsdi>;
+
+// Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
+defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
+defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_s32,
+ int_aarch64_neon_vcvtf64_n_s64,
+ SCVTF_Nssi, SCVTF_Nddi>;
+
+// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
+defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
+defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_u32,
+ int_aarch64_neon_vcvtf64_n_u64,
+ UCVTF_Nssi, UCVTF_Nddi>;
+
+// Scalar Floating-point Convert To Signed Fixed-point (Immediate)
+defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
+defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvts_n_s32_f32,
+ int_aarch64_neon_vcvtd_n_s64_f64,
+ FCVTZS_Nssi, FCVTZS_Nddi>;
+
+// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
+defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
+defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvts_n_u32_f32,
+ int_aarch64_neon_vcvtd_n_u64_f64,
+ FCVTZU_Nssi, FCVTZU_Nddi>;
+
+// Patterns For Convert Instructions Between v1f64 and v1i64
+class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INST FPR64:$Rn, imm:$Imm)>;
+
+class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INST FPR64:$Rn, imm:$Imm)>;
+
+def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp,
+ SCVTF_Nddi>;
+
+def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp,
+ UCVTF_Nddi>;
+
+def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs,
+ FCVTZS_Nddi>;
+
+def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu,
+ FCVTZU_Nddi>;
// Scalar Integer Add
let isCommutable = 1 in {
@@ -1461,9 +4791,15 @@ def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
// Scalar Integer Sub
def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
-// Pattern for Scalar Integer Add and Sub with D register
-def : Neon_Scalar_D_size_patterns<add, ADDddd>;
-def : Neon_Scalar_D_size_patterns<sub, SUBddd>;
+// Pattern for Scalar Integer Add and Sub with D register only
+defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
+defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
+
+// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
// Scalar Integer Saturating Add (Signed, Unsigned)
defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
@@ -1473,41 +4809,1212 @@ defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
-// Patterns for Scalar Integer Saturating Add, Sub with D register only
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
+
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Saturating Add, Sub (Signed, Unsigned)
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb,
+ SQADDhhh, SQADDsss, SQADDddd>;
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb,
+ UQADDhhh, UQADDsss, UQADDddd>;
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb,
+ SQSUBhhh, SQSUBsss, SQSUBddd>;
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
+ UQSUBhhh, UQSUBsss, UQSUBddd>;
+
+// Scalar Integer Saturating Doubling Multiply Half High
+defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
+
+// Scalar Integer Saturating Rounding Doubling Multiply Half High
+defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
+
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Saturating Doubling Multiply Half High and
+// Scalar Integer Saturating Rounding Doubling Multiply Half High
+defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
+ SQDMULHsss>;
+defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
+ SQRDMULHsss>;
+
+// Scalar Floating-point Multiply Extended
+defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
+
+// Scalar Floating-point Reciprocal Step
+defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
+
+// Scalar Floating-point Reciprocal Square Root Step
+defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
+
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Floating-point Reciprocal Step and
+// Scalar Floating-point Reciprocal Square Root Step
+defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss,
+ FRECPSddd>;
+defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss,
+ FRSQRTSddd>;
+
+def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
+
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Floating-point Multiply Extended,
+multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+ def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+}
+
+defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx,
+ FMULXsss,FMULXddd>;
// Scalar Integer Shift Left (Signed, Unsigned)
def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Shift Left (Signed, Unsigned)
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
+
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Shift Left (Signed, Unsigned)
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
+
// Scalar Integer Saturating Shift Left (Signed, Unsigned)
defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
-// Scalar Integer Rouding Shift Left (Signed, Unsigned)
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
+ SQSHLhhh, SQSHLsss, SQSHLddd>;
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
+ UQSHLhhh, UQSHLsss, UQSHLddd>;
+
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
+
+// Scalar Integer Rounding Shift Left (Signed, Unsigned)
def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Rounding Shift Left (Signed, Unsigned)
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
+
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Rounding Shift Left (Signed, Unsigned)
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
+
// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
-// Patterns for Scalar Integer Shift Lef, Saturating Shift Left,
-// Rounding Shift Left, Rounding Saturating Shift Left with D register only
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
-def : Neon_Scalar_D_size_patterns<shl, SSHLddd>;
-def : Neon_Scalar_D_size_patterns<shl, USHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
+ SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
+ UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
+
+// Signed Saturating Doubling Multiply-Add Long
+defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
+defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
+ SQDMLALshh, SQDMLALdss>;
+
+// Signed Saturating Doubling Multiply-Subtract Long
+defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
+defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
+ SQDMLSLshh, SQDMLSLdss>;
+
+// Signed Saturating Doubling Multiply Long
+defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
+defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
+ SQDMULLshh, SQDMULLdss>;
+
+// Scalar Signed Integer Convert To Floating-point
+defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
+defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
+ int_aarch64_neon_vcvtf64_s64,
+ SCVTFss, SCVTFdd>;
+
+// Scalar Unsigned Integer Convert To Floating-point
+defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
+defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
+ int_aarch64_neon_vcvtf64_u64,
+ UCVTFss, UCVTFdd>;
+
+// Scalar Floating-point Converts
+def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">;
+def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
+ FCVTXN>;
+
+defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
+ FCVTNSss, FCVTNSdd>;
+
+defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
+ FCVTNUss, FCVTNUdd>;
+
+defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
+ FCVTMSss, FCVTMSdd>;
+
+defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
+ FCVTMUss, FCVTMUdd>;
+
+defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
+ FCVTASss, FCVTASdd>;
+
+defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
+ FCVTAUss, FCVTAUdd>;
+
+defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
+ FCVTPSss, FCVTPSdd>;
+
+defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
+ FCVTPUss, FCVTPUdd>;
+
+defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
+ FCVTZSss, FCVTZSdd>;
+
+defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
+ FCVTZUss, FCVTZUdd>;
+
+// Patterns For Convert Instructions Between v1f64 and v1i64
+class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>;
+def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>;
+
+def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>;
+def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>;
+
+// Scalar Floating-point Reciprocal Estimate
+defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
+defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe,
+ FRECPEss, FRECPEdd>;
+
+// Scalar Floating-point Reciprocal Exponent
+defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
+defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
+ FRECPXss, FRECPXdd>;
+
+// Scalar Floating-point Reciprocal Square Root Estimate
+defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
+defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte,
+ FRSQRTEss, FRSQRTEdd>;
+
+// Scalar Floating-point Round
+class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST>
+ : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>;
+def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>;
+def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>;
+def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>;
+def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>;
+def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>;
+def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>;
+
+// Scalar Integer Compare
+
+// Scalar Compare Bitwise Equal
+def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
+def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
+
+class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
+ Instruction INSTD,
+ CondCode CC>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+
+def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
+
+// Scalar Compare Signed Greather Than Or Equal
+def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
+def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
+def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>;
+
+// Scalar Compare Unsigned Higher Or Same
+def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
+def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
+def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>;
+
+// Scalar Compare Unsigned Higher
+def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
+def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
+def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>;
+
+// Scalar Compare Signed Greater Than
+def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
+def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
+def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>;
+
+// Scalar Compare Bitwise Test Bits
+def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
+def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
+def : Neon_Scalar3Same_cmp_D_size_patterns<Neon_tst, CMTSTddd>;
+
+// Scalar Compare Bitwise Equal To Zero
+def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
+def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
+ CMEQddi>;
+def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETEQ, CMEQddi>;
+
+// Scalar Compare Signed Greather Than Or Equal To Zero
+def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
+def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
+ CMGEddi>;
+def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGE, CMGEddi>;
+
+// Scalar Compare Signed Greater Than Zero
+def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
+def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
+ CMGTddi>;
+def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGT, CMGTddi>;
+
+// Scalar Compare Signed Less Than Or Equal To Zero
+def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
+def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
+ CMLEddi>;
+def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLE, CMLEddi>;
+
+// Scalar Compare Less Than Zero
+def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
+def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
+ CMLTddi>;
+def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>;
+
+// Scalar Floating-point Compare
+
+// Scalar Floating-point Compare Mask Equal
+defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vceq,
+ FCMEQsss, FCMEQddd>;
+def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>;
+
+// Scalar Floating-point Compare Mask Equal To Zero
+defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vceq,
+ FCMEQZssi, FCMEQZddi>;
+def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpz32:$FPImm), SETEQ)),
+ (FCMEQZddi FPR64:$Rn, fpz32:$FPImm)>;
+
+// Scalar Floating-point Compare Mask Greater Than Or Equal
+defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcge,
+ FCMGEsss, FCMGEddd>;
+def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>;
+
+// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
+defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcge,
+ FCMGEZssi, FCMGEZddi>;
+
+// Scalar Floating-point Compare Mask Greather Than
+defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcgt,
+ FCMGTsss, FCMGTddd>;
+def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>;
+
+// Scalar Floating-point Compare Mask Greather Than Zero
+defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcgt,
+ FCMGTZssi, FCMGTZddi>;
+
+// Scalar Floating-point Compare Mask Less Than Or Equal To Zero
+defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vclez,
+ FCMLEZssi, FCMLEZddi>;
+
+// Scalar Floating-point Compare Mask Less Than Zero
+defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcltz,
+ FCMLTZssi, FCMLTZddi>;
+
+// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
+defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcage,
+ FACGEsss, FACGEddd>;
+
+// Scalar Floating-point Absolute Compare Mask Greater Than
+defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcagt,
+ FACGTsss, FACGTddd>;
+
+// Scakar Floating-point Absolute Difference
+defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">;
+defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd,
+ FABDsss, FABDddd>;
+
+// Scalar Absolute Value
+defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
+defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
+
+// Scalar Signed Saturating Absolute Value
+defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
+defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
+ SQABSbb, SQABShh, SQABSss, SQABSdd>;
+
+// Scalar Negate
+defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
+defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
+
+// Scalar Signed Saturating Negate
+defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
+defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
+ SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
+
+// Scalar Signed Saturating Accumulated of Unsigned Value
+defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
+defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
+ SUQADDbb, SUQADDhh,
+ SUQADDss, SUQADDdd>;
+
+// Scalar Unsigned Saturating Accumulated of Signed Value
+defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
+defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
+ USQADDbb, USQADDhh,
+ USQADDss, USQADDdd>;
+
+def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src),
+ (v1i64 FPR64:$Rn))),
+ (SUQADDdd FPR64:$Src, FPR64:$Rn)>;
+
+def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src),
+ (v1i64 FPR64:$Rn))),
+ (USQADDdd FPR64:$Src, FPR64:$Rn)>;
+
+def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))),
+ (ABSdd FPR64:$Rn)>;
+
+def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))),
+ (SQABSdd FPR64:$Rn)>;
+
+def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))),
+ (SQNEGdd FPR64:$Rn)>;
+
+def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))),
+ (v1i64 FPR64:$Rn))),
+ (NEGdd FPR64:$Rn)>;
+
+// Scalar Signed Saturating Extract Unsigned Narrow
+defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
+defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
+ SQXTUNbh, SQXTUNhs,
+ SQXTUNsd>;
+
+// Scalar Signed Saturating Extract Narrow
+defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
+defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
+ SQXTNbh, SQXTNhs,
+ SQXTNsd>;
+
+// Scalar Unsigned Saturating Extract Narrow
+defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
+defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
+ UQXTNbh, UQXTNhs,
+ UQXTNsd>;
+
+// Scalar Reduce Pairwise
+
+multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
+ string asmop, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
+ (outs FPR64:$Rd), (ins VPR128:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn.2d"),
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
+ string asmop, bit Commutable = 0>
+ : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
+ let isCommutable = Commutable in {
+ def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
+ (outs FPR32:$Rd), (ins VPR64:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn.2s"),
+ [],
+ NoItinerary>;
+ }
+}
+
+// Scalar Reduce Addition Pairwise (Integer) with
+// Pattern to match llvm.arm.* intrinsic
+defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
+
+// Pattern to match llvm.aarch64.* intrinsic for
+// Scalar Reduce Addition Pairwise (Integer)
+def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
+ (ADDPvv_D_2D VPR128:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))),
+ (ADDPvv_D_2D VPR128:$Rn)>;
+
+// Scalar Reduce Addition Pairwise (Floating Point)
+defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
+
+// Scalar Reduce Maximum Pairwise (Floating Point)
+defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
+
+// Scalar Reduce Minimum Pairwise (Floating Point)
+defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
+
+// Scalar Reduce maxNum Pairwise (Floating Point)
+defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
+
+// Scalar Reduce minNum Pairwise (Floating Point)
+defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
+
+multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
+ SDPatternOperator opnodeD,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
+ (INSTS VPR64:$Rn)>;
+ def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
+ (INSTD VPR128:$Rn)>;
+}
+
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
+ int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
+ int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
+ int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
+ int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
+ int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vaddv,
+ int_aarch64_neon_vaddv, FADDPvv_S_2S, FADDPvv_D_2D>;
+
+def : Pat<(v1f32 (int_aarch64_neon_vaddv (v4f32 VPR128:$Rn))),
+ (FADDPvv_S_2S (v2f32
+ (EXTRACT_SUBREG
+ (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))),
+ sub_64)))>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vmaxv,
+ int_aarch64_neon_vmaxv, FMAXPvv_S_2S, FMAXPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vminv,
+ int_aarch64_neon_vminv, FMINPvv_S_2S, FMINPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vmaxnmv,
+ int_aarch64_neon_vmaxnmv, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vminnmv,
+ int_aarch64_neon_vminnmv, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
+
+// Scalar by element Arithmetic
+
+class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
+ string rmlane, bit u, bit szhi, bit szlo,
+ RegisterClass ResFPR, RegisterClass OpFPR,
+ RegisterOperand OpVPR, Operand OpImm>
+ : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
+ (outs ResFPR:$Rd),
+ (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ bits<3> Imm;
+ bits<5> MRm;
+}
+
+class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
+ string rmlane,
+ bit u, bit szhi, bit szlo,
+ RegisterClass ResFPR,
+ RegisterClass OpFPR,
+ RegisterOperand OpVPR,
+ Operand OpImm>
+ : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
+ (outs ResFPR:$Rd),
+ (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ bits<3> Imm;
+ bits<5> MRm;
+}
+
+// Scalar Floating Point multiply (scalar, by element)
+def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
+ 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
+ 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Floating Point multiply extended (scalar, by element)
+def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
+ 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
+ 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns<
+ SDPatternOperator opnode,
+ Instruction INST,
+ ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
+ ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
+
+ def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
+ (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))),
+ (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
+ (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))),
+ (ResTy (INST (ResTy FPRC:$Rn),
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
+ OpNImm:$Imm))>;
+
+ // swapped operands
+ def : Pat<(ResTy (opnode
+ (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
+ (ResTy FPRC:$Rn))),
+ (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (opnode
+ (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
+ (ResTy FPRC:$Rn))),
+ (ResTy (INST (ResTy FPRC:$Rn),
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+// Patterns for Scalar Floating Point multiply (scalar, by element)
+defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S,
+ f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D,
+ f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
+
+// Patterns for Scalar Floating Point multiply extended (scalar, by element)
+defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
+ FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare,
+ v2f32, v4f32, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
+ FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
+ v1f64, v2f64, neon_uimm0_bare>;
+
+
+// Scalar Floating Point fused multiply-add (scalar, by element)
+def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
+ 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
+ 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Floating Point fused multiply-subtract (scalar, by element)
+def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
+ 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
+ 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+// We are allowed to match the fma instruction regardless of compile options.
+multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
+ Instruction FMLAI, Instruction FMLSI,
+ ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
+ ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
+ // fmla
+ def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
+ (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLAI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
+ (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLAI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn),
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
+ OpNImm:$Imm))>;
+
+ // swapped fmla operands
+ def : Pat<(ResTy (fma
+ (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
+ (ResTy FPRC:$Rn),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLAI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (fma
+ (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
+ (ResTy FPRC:$Rn),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLAI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn),
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
+ OpNImm:$Imm))>;
+
+ // fmls
+ def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
+ (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLSI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
+ (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLSI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn),
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
+ OpNImm:$Imm))>;
+
+ // swapped fmls operands
+ def : Pat<(ResTy (fma
+ (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
+ (ResTy FPRC:$Rn),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLSI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (fma
+ (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
+ (ResTy FPRC:$Rn),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLSI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn),
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+// Scalar Floating Point fused multiply-add and
+// multiply-subtract (scalar, by element)
+defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S,
+ f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
+ f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
+defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
+ f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
+
+// Scalar Signed saturating doubling multiply long (scalar, by element)
+def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+multiclass Neon_ScalarXIndexedElem_MUL_Patterns<
+ SDPatternOperator opnode,
+ Instruction INST,
+ ValueType ResTy, RegisterClass FPRC,
+ ValueType OpVTy, ValueType OpTy,
+ ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
+
+ def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
+ (OpVTy (scalar_to_vector
+ (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
+ (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
+
+ //swapped operands
+ def : Pat<(ResTy (opnode
+ (OpVTy (scalar_to_vector
+ (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
+ (OpVTy FPRC:$Rn))),
+ (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
+}
+
+
+// Patterns for Scalar Signed saturating doubling
+// multiply long (scalar, by element)
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
+ SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16,
+ i32, VPR64Lo, neon_uimm2_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
+ SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16,
+ i32, VPR128Lo, neon_uimm3_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
+ SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32,
+ i32, VPR64Lo, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
+ SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32,
+ i32, VPR128Lo, neon_uimm2_bare>;
+
+// Scalar Signed saturating doubling multiply-add long (scalar, by element)
+def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Signed saturating doubling
+// multiply-subtract long (scalar, by element)
+def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
+ SDPatternOperator opnode,
+ SDPatternOperator coreopnode,
+ Instruction INST,
+ ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC,
+ ValueType OpTy,
+ ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
+
+ def : Pat<(ResTy (opnode
+ (ResTy ResFPRC:$Ra),
+ (ResTy (coreopnode (OpTy FPRC:$Rn),
+ (OpTy (scalar_to_vector
+ (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))),
+ (ResTy (INST (ResTy ResFPRC:$Ra),
+ (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
+
+ // swapped operands
+ def : Pat<(ResTy (opnode
+ (ResTy ResFPRC:$Ra),
+ (ResTy (coreopnode
+ (OpTy (scalar_to_vector
+ (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))),
+ (OpTy FPRC:$Rn))))),
+ (ResTy (INST (ResTy ResFPRC:$Ra),
+ (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
+}
+
+// Patterns for Scalar Signed saturating
+// doubling multiply-add long (scalar, by element)
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
+ int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
+ i32, VPR64Lo, neon_uimm2_bare>;
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
+ int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
+ i32, VPR128Lo, neon_uimm3_bare>;
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
+ int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
+ i32, VPR64Lo, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
+ int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
+ i32, VPR128Lo, neon_uimm2_bare>;
+
+// Patterns for Scalar Signed saturating
+// doubling multiply-sub long (scalar, by element)
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
+ int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
+ i32, VPR64Lo, neon_uimm2_bare>;
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
+ int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
+ i32, VPR128Lo, neon_uimm3_bare>;
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
+ int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
+ i32, VPR64Lo, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
+ int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
+ i32, VPR128Lo, neon_uimm2_bare>;
+
+// Scalar general arithmetic operation
+class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (INST FPR64:$Rn, FPR64:$Rm)>;
+
+class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
+ (v1f64 FPR64:$Ra))),
+ (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
+
+def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
+
+def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
+def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
+
+def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
+def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
+
+// Scalar Signed saturating doubling multiply returning
+// high half (scalar, by element)
+def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+// Patterns for Scalar Signed saturating doubling multiply returning
+// high half (scalar, by element)
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
+ SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16,
+ i32, VPR64Lo, neon_uimm2_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
+ SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16,
+ i32, VPR128Lo, neon_uimm3_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
+ SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32,
+ i32, VPR64Lo, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
+ SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32,
+ i32, VPR128Lo, neon_uimm2_bare>;
+
+// Scalar Signed saturating rounding doubling multiply
+// returning high half (scalar, by element)
+def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
+ SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32,
+ VPR64Lo, neon_uimm2_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
+ SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32,
+ VPR128Lo, neon_uimm3_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
+ SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32,
+ VPR64Lo, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
+ SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32,
+ VPR128Lo, neon_uimm2_bare>;
+
+// Scalar Copy - DUP element to scalar
+class NeonI_Scalar_DUP<string asmop, string asmlane,
+ RegisterClass ResRC, RegisterOperand VPRC,
+ Operand OpImm>
+ : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ bits<4> Imm;
+}
+
+def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
+}
+
+multiclass NeonI_Scalar_DUP_Elt_pattern<Instruction DUPI, ValueType ResTy,
+ ValueType OpTy, Operand OpImm,
+ ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
+ def : Pat<(ResTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
+ (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
+ (ResTy (DUPI
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+// Patterns for vector extract of FP data using scalar DUP instructions
+defm : NeonI_Scalar_DUP_Elt_pattern<DUPsv_S, f32,
+ v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
+defm : NeonI_Scalar_DUP_Elt_pattern<DUPdv_D, f64,
+ v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
+
+multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI,
+ ValueType ResTy, ValueType OpTy,Operand OpLImm,
+ ValueType NOpTy, ValueType ExTy, Operand OpNImm> {
+
+ def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)),
+ (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>;
+
+ def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)),
+ (ResTy (DUPI
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+// Patterns for extract subvectors of v1ix data using scalar DUP instructions.
+defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare,
+ v8i8, v16i8, neon_uimm3_bare>;
+defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare,
+ v4i16, v8i16, neon_uimm2_bare>;
+defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare,
+ v2i32, v4i32, neon_uimm1_bare>;
+
+multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy,
+ ValueType OpTy, ValueType ElemTy,
+ Operand OpImm, ValueType OpNTy,
+ ValueType ExTy, Operand OpNImm> {
+
+ def : Pat<(ResTy (vector_insert (ResTy undef),
+ (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
+ (neon_uimm0_bare:$Imm))),
+ (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (vector_insert (ResTy undef),
+ (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
+ (OpNImm:$Imm))),
+ (ResTy (DUPI
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy,
+ ValueType OpTy, ValueType ElemTy,
+ Operand OpImm, ValueType OpNTy,
+ ValueType ExTy, Operand OpNImm> {
+
+ def : Pat<(ResTy (scalar_to_vector
+ (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))),
+ (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (scalar_to_vector
+ (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))),
+ (ResTy (DUPI
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+// Patterns for vector copy to v1ix and v1fx vectors using scalar DUP
+// instructions.
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
+ v1i64, v2i64, i64, neon_uimm1_bare,
+ v1i64, v2i64, neon_uimm0_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
+ v1i32, v4i32, i32, neon_uimm2_bare,
+ v2i32, v4i32, neon_uimm1_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H,
+ v1i16, v8i16, i32, neon_uimm3_bare,
+ v4i16, v8i16, neon_uimm2_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B,
+ v1i8, v16i8, i32, neon_uimm4_bare,
+ v8i8, v16i8, neon_uimm3_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
+ v1f64, v2f64, f64, neon_uimm1_bare,
+ v1f64, v2f64, neon_uimm0_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
+ v1f32, v4f32, f32, neon_uimm2_bare,
+ v2f32, v4f32, neon_uimm1_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
+ v1i64, v2i64, i64, neon_uimm1_bare,
+ v1i64, v2i64, neon_uimm0_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
+ v1i32, v4i32, i32, neon_uimm2_bare,
+ v2i32, v4i32, neon_uimm1_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H,
+ v1i16, v8i16, i32, neon_uimm3_bare,
+ v4i16, v8i16, neon_uimm2_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B,
+ v1i8, v16i8, i32, neon_uimm4_bare,
+ v8i8, v16i8, neon_uimm3_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
+ v1f64, v2f64, f64, neon_uimm1_bare,
+ v1f64, v2f64, neon_uimm0_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
+ v1f32, v4f32, f32, neon_uimm2_bare,
+ v2f32, v4f32, neon_uimm1_bare>;
+
+multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
+ Instruction DUPI, Operand OpImm,
+ RegisterClass ResRC> {
+ def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"),
+ (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
+}
+
+// Aliases for Scalar copy - DUP element (scalar)
+// FIXME: This is actually the preferred syntax but TableGen can't deal with
+// custom printing of aliases.
+defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>;
+defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
+defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
+defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
+
+multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy,
+ ValueType OpTy> {
+ def : Pat<(ResTy (GetLow VPR128:$Rn)),
+ (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>;
+ def : Pat<(ResTy (GetHigh VPR128:$Rn)),
+ (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
+}
+
+defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>;
+defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>;
+defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>;
+defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
+defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
+defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -1578,57 +6085,2587 @@ def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
-
// ...and scalar bitcasts...
+def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
+def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
+def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
+def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
+
+def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v1f64 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v2i32 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v2f32 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v4i16 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v8i8 FPR64:$src))), (FMOVxd $src)>;
+
+def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
+
+def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
+
+def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
+def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
+def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
+def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
+def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
+
+def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
+
+def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
+def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
+def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
+
+def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v1f64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v2i32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v2f32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v4i16 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v8i8 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+
+def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
+
+def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+// Scalar Three Same
+
+def neon_uimm3 : Operand<i64>,
+ ImmLeaf<i64, [{return Imm < 8;}]> {
+ let ParserMatchClass = uimm3_asmoperand;
+ let PrintMethod = "printUImmHexOperand";
+}
+
+def neon_uimm4 : Operand<i64>,
+ ImmLeaf<i64, [{return Imm < 16;}]> {
+ let ParserMatchClass = uimm4_asmoperand;
+ let PrintMethod = "printUImmHexOperand";
+}
+
+// Bitwise Extract
+class NeonI_Extract<bit q, bits<2> op2, string asmop,
+ string OpS, RegisterOperand OpVPR, Operand OpImm>
+ : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
+ (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
+ asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
+ ", $Rm." # OpS # ", $Index",
+ [],
+ NoItinerary>{
+ bits<4> Index;
+}
+
+def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
+ VPR64, neon_uimm3> {
+ let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
+}
+
+def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
+ VPR128, neon_uimm4> {
+ let Inst{14-11} = Index;
+}
+
+class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
+ Operand OpImm>
+ : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
+ (i64 OpImm:$Imm))),
+ (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
+
+def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
+
+// Table lookup
+class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
+ string asmop, string OpS, RegisterOperand OpVPR,
+ RegisterOperand VecList>
+ : NeonI_TBL<q, op2, len, op,
+ (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
+ [],
+ NoItinerary>;
+
+// The vectors in look up table are always 16b
+multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
+ def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
+ !cast<RegisterOperand>(List # "16B_operand")>;
+
+ def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
+ !cast<RegisterOperand>(List # "16B_operand")>;
+}
+
+defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
+defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
+defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
+defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
+
+// Table lookup extention
+class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
+ string asmop, string OpS, RegisterOperand OpVPR,
+ RegisterOperand VecList>
+ : NeonI_TBL<q, op2, len, op,
+ (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
+ [],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+// The vectors in look up table are always 16b
+multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
+ def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
+ !cast<RegisterOperand>(List # "16B_operand")>;
+
+ def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
+ !cast<RegisterOperand>(List # "16B_operand")>;
+}
+
+defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
+defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
+defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
+defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
+
+class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
+ RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
+ : NeonI_copy<0b1, 0b0, 0b0011,
+ (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd." # Res # "[$Imm], $Rn",
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (vector_insert
+ (ResTy VPR128:$src),
+ (OpTy OpGPR:$Rn),
+ (OpImm:$Imm))))],
+ NoItinerary> {
+ bits<4> Imm;
+ let Constraints = "$src = $Rd";
+}
+
+//Insert element (vector, from main)
+def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
+ neon_uimm4_bare> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
+ neon_uimm3_bare> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
+ neon_uimm2_bare> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
+ neon_uimm1_bare> {
+ let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
+}
+
+def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn",
+ (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>;
+def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn",
+ (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>;
+def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn",
+ (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>;
+def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn",
+ (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>;
+
+class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
+ RegisterClass OpGPR, ValueType OpTy,
+ Operand OpImm, Instruction INS>
+ : Pat<(ResTy (vector_insert
+ (ResTy VPR64:$src),
+ (OpTy OpGPR:$Rn),
+ (OpImm:$Imm))),
+ (ResTy (EXTRACT_SUBREG
+ (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
+ OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
+
+def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
+ neon_uimm3_bare, INSbw>;
+def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
+ neon_uimm2_bare, INShw>;
+def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
+ neon_uimm1_bare, INSsw>;
+def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
+ neon_uimm0_bare, INSdx>;
+
+class NeonI_INS_element<string asmop, string Res, Operand ResImm>
+ : NeonI_insert<0b1, 0b1,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
+ ResImm:$Immd, ResImm:$Immn),
+ asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
+ [],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ bits<4> Immd;
+ bits<4> Immn;
+}
+
+//Insert element (vector, from element)
+def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
+ let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
+ let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
+}
+def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
+ let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
+ let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0};
+ // bit 11 is unspecified, but should be set to zero.
+}
+def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
+ let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
+ let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0};
+ // bits 11-12 are unspecified, but should be set to zero.
+}
+def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
+ let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
+ let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0};
+ // bits 11-13 are unspecified, but should be set to zero.
+}
+
+def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]",
+ (INSELb VPR128:$Rd, VPR128:$Rn,
+ neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>;
+def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]",
+ (INSELh VPR128:$Rd, VPR128:$Rn,
+ neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>;
+def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]",
+ (INSELs VPR128:$Rd, VPR128:$Rn,
+ neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>;
+def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]",
+ (INSELd VPR128:$Rd, VPR128:$Rn,
+ neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>;
+
+multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
+ ValueType MidTy, Operand StImm, Operand NaImm,
+ Instruction INS> {
+def : Pat<(ResTy (vector_insert
+ (ResTy VPR128:$src),
+ (MidTy (vector_extract
+ (ResTy VPR128:$Rn),
+ (StImm:$Immn))),
+ (StImm:$Immd))),
+ (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
+ StImm:$Immd, StImm:$Immn)>;
+
+def : Pat <(ResTy (vector_insert
+ (ResTy VPR128:$src),
+ (MidTy (vector_extract
+ (NaTy VPR64:$Rn),
+ (NaImm:$Immn))),
+ (StImm:$Immd))),
+ (INS (ResTy VPR128:$src),
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
+ StImm:$Immd, NaImm:$Immn)>;
+
+def : Pat <(NaTy (vector_insert
+ (NaTy VPR64:$src),
+ (MidTy (vector_extract
+ (ResTy VPR128:$Rn),
+ (StImm:$Immn))),
+ (NaImm:$Immd))),
+ (NaTy (EXTRACT_SUBREG
+ (ResTy (INS
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
+ (ResTy VPR128:$Rn),
+ NaImm:$Immd, StImm:$Immn)),
+ sub_64))>;
+
+def : Pat <(NaTy (vector_insert
+ (NaTy VPR64:$src),
+ (MidTy (vector_extract
+ (NaTy VPR64:$Rn),
+ (NaImm:$Immn))),
+ (NaImm:$Immd))),
+ (NaTy (EXTRACT_SUBREG
+ (ResTy (INS
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
+ NaImm:$Immd, NaImm:$Immn)),
+ sub_64))>;
+}
+
+defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
+ neon_uimm1_bare, INSELs>;
+defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
+ neon_uimm0_bare, INSELd>;
+defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
+ neon_uimm3_bare, INSELb>;
+defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
+ neon_uimm2_bare, INSELh>;
+defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
+ neon_uimm1_bare, INSELs>;
+defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
+ neon_uimm0_bare, INSELd>;
+
+multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
+ ValueType MidTy,
+ RegisterClass OpFPR, Operand ResImm,
+ SubRegIndex SubIndex, Instruction INS> {
+def : Pat <(ResTy (vector_insert
+ (ResTy VPR128:$src),
+ (MidTy OpFPR:$Rn),
+ (ResImm:$Imm))),
+ (INS (ResTy VPR128:$src),
+ (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
+ ResImm:$Imm,
+ (i64 0))>;
+
+def : Pat <(NaTy (vector_insert
+ (NaTy VPR64:$src),
+ (MidTy OpFPR:$Rn),
+ (ResImm:$Imm))),
+ (NaTy (EXTRACT_SUBREG
+ (ResTy (INS
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
+ (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
+ ResImm:$Imm,
+ (i64 0))),
+ sub_64))>;
+}
+
+defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
+ sub_32, INSELs>;
+defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
+ sub_64, INSELd>;
+
+class NeonI_SMOV<string asmop, string Res, bit Q,
+ ValueType OpTy, ValueType eleTy,
+ Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
+ : NeonI_copy<Q, 0b0, 0b0101,
+ (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
+ [(set (ResTy ResGPR:$Rd),
+ (ResTy (sext_inreg
+ (ResTy (vector_extract
+ (OpTy VPR128:$Rn), (OpImm:$Imm))),
+ eleTy)))],
+ NoItinerary> {
+ bits<4> Imm;
+}
+
+//Signed integer move (main, from element)
+def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
+ GPR32, i32> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
+ GPR32, i32> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
+ GPR64, i64> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
+ GPR64, i64> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
+ GPR64, i64> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+
+multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
+ ValueType eleTy, Operand StImm, Operand NaImm,
+ Instruction SMOVI> {
+ def : Pat<(i64 (sext_inreg
+ (i64 (anyext
+ (i32 (vector_extract
+ (StTy VPR128:$Rn), (StImm:$Imm))))),
+ eleTy)),
+ (SMOVI VPR128:$Rn, StImm:$Imm)>;
+
+ def : Pat<(i64 (sext
+ (i32 (vector_extract
+ (StTy VPR128:$Rn), (StImm:$Imm))))),
+ (SMOVI VPR128:$Rn, StImm:$Imm)>;
+
+ def : Pat<(i64 (sext_inreg
+ (i64 (vector_extract
+ (NaTy VPR64:$Rn), (NaImm:$Imm))),
+ eleTy)),
+ (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ NaImm:$Imm)>;
+
+ def : Pat<(i64 (sext_inreg
+ (i64 (anyext
+ (i32 (vector_extract
+ (NaTy VPR64:$Rn), (NaImm:$Imm))))),
+ eleTy)),
+ (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ NaImm:$Imm)>;
+
+ def : Pat<(i64 (sext
+ (i32 (vector_extract
+ (NaTy VPR64:$Rn), (NaImm:$Imm))))),
+ (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ NaImm:$Imm)>;
+}
+
+defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
+ neon_uimm3_bare, SMOVxb>;
+defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
+ neon_uimm2_bare, SMOVxh>;
+defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
+ neon_uimm1_bare, SMOVxs>;
+
+class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
+ ValueType eleTy, Operand StImm, Operand NaImm,
+ Instruction SMOVI>
+ : Pat<(i32 (sext_inreg
+ (i32 (vector_extract
+ (NaTy VPR64:$Rn), (NaImm:$Imm))),
+ eleTy)),
+ (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ NaImm:$Imm)>;
+
+def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
+ neon_uimm3_bare, SMOVwb>;
+def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
+ neon_uimm2_bare, SMOVwh>;
+
+class NeonI_UMOV<string asmop, string Res, bit Q,
+ ValueType OpTy, Operand OpImm,
+ RegisterClass ResGPR, ValueType ResTy>
+ : NeonI_copy<Q, 0b0, 0b0111,
+ (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
+ [(set (ResTy ResGPR:$Rd),
+ (ResTy (vector_extract
+ (OpTy VPR128:$Rn), (OpImm:$Imm))))],
+ NoItinerary> {
+ bits<4> Imm;
+}
+
+//Unsigned integer move (main, from element)
+def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
+ GPR32, i32> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
+ GPR32, i32> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
+ GPR32, i32> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
+ GPR64, i64> {
+ let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
+}
+
+def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]",
+ (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>;
+def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]",
+ (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>;
+
+class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
+ Operand StImm, Operand NaImm,
+ Instruction SMOVI>
+ : Pat<(ResTy (vector_extract
+ (NaTy VPR64:$Rn), NaImm:$Imm)),
+ (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ NaImm:$Imm)>;
+
+def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
+ neon_uimm3_bare, UMOVwb>;
+def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
+ neon_uimm2_bare, UMOVwh>;
+def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
+ neon_uimm1_bare, UMOVws>;
+
+def : Pat<(i32 (and
+ (i32 (vector_extract
+ (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
+ 255)),
+ (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
+
+def : Pat<(i32 (and
+ (i32 (vector_extract
+ (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
+ 65535)),
+ (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
+
+def : Pat<(i64 (zext
+ (i32 (vector_extract
+ (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
+ (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
+
+def : Pat<(i32 (and
+ (i32 (vector_extract
+ (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
+ 255)),
+ (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
+ neon_uimm3_bare:$Imm)>;
+
+def : Pat<(i32 (and
+ (i32 (vector_extract
+ (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
+ 65535)),
+ (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
+ neon_uimm2_bare:$Imm)>;
+
+def : Pat<(i64 (zext
+ (i32 (vector_extract
+ (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
+ (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
+ neon_uimm0_bare:$Imm)>;
+
+// Additional copy patterns for scalar types
+def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
+ (UMOVwb (v16i8
+ (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
+
+def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
+ (UMOVwh (v8i16
+ (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
+
+def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
+ (FMOVws FPR32:$Rn)>;
+
+def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
+ (FMOVxd FPR64:$Rn)>;
+
+def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
+ (f64 FPR64:$Rn)>;
+
+def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
+ (f32 FPR32:$Rn)>;
+
+def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
+ (v1i8 (EXTRACT_SUBREG (v16i8
+ (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
+ sub_8))>;
+
+def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
+ (v1i16 (EXTRACT_SUBREG (v8i16
+ (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
+ sub_16))>;
+
+def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
+ (FMOVsw $src)>;
+
+def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
+ (FMOVdx $src)>;
+
+def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))),
+ (v1f32 FPR32:$Rn)>;
+def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
+ (v1f64 FPR64:$Rn)>;
+
+def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
+ (FMOVdd $src)>;
+
+def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
+ (f64 FPR64:$src), sub_64)>;
+
+class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
+ RegisterOperand ResVPR, Operand OpImm>
+ : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
+ (ins VPR128:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ bits<4> Imm;
+}
+
+def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
+ neon_uimm4_bare> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+
+def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
+ neon_uimm3_bare> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+
+def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
+ neon_uimm2_bare> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+
+def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
+ neon_uimm1_bare> {
+ let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
+}
+
+def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
+ neon_uimm4_bare> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+
+def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
+ neon_uimm3_bare> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+
+def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
+ neon_uimm2_bare> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+
+multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
+ ValueType OpTy,ValueType NaTy,
+ ValueType ExTy, Operand OpLImm,
+ Operand OpNImm> {
+def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
+ (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
+
+def : Pat<(ResTy (Neon_vduplane
+ (NaTy VPR64:$Rn), OpNImm:$Imm)),
+ (ResTy (DUPELT
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
+}
+defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
+ neon_uimm4_bare, neon_uimm3_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
+ neon_uimm4_bare, neon_uimm3_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
+ neon_uimm3_bare, neon_uimm2_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
+ neon_uimm3_bare, neon_uimm2_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
+ neon_uimm2_bare, neon_uimm1_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
+ neon_uimm2_bare, neon_uimm1_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
+ neon_uimm1_bare, neon_uimm0_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
+ neon_uimm2_bare, neon_uimm1_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
+ neon_uimm2_bare, neon_uimm1_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
+ neon_uimm1_bare, neon_uimm0_bare>;
+
+def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
+ (v2f32 (DUPELT2s
+ (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
+ (i64 0)))>;
+def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
+ (v4f32 (DUPELT4s
+ (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
+ (i64 0)))>;
+def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
+ (v2f64 (DUPELT2d
+ (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
+ (i64 0)))>;
+
+class NeonI_DUP<bit Q, string asmop, string rdlane,
+ RegisterOperand ResVPR, ValueType ResTy,
+ RegisterClass OpGPR, ValueType OpTy>
+ : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
+ asmop # "\t$Rd" # rdlane # ", $Rn",
+ [(set (ResTy ResVPR:$Rd),
+ (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
+ NoItinerary>;
+
+def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
+ let Inst{20-16} = 0b00001;
+ // bits 17-20 are unspecified, but should be set to zero.
+}
+
+def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
+ let Inst{20-16} = 0b00010;
+ // bits 18-20 are unspecified, but should be set to zero.
+}
+
+def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
+ let Inst{20-16} = 0b00100;
+ // bits 19-20 are unspecified, but should be set to zero.
+}
+
+def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
+ let Inst{20-16} = 0b01000;
+ // bit 20 is unspecified, but should be set to zero.
+}
+
+def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
+ let Inst{20-16} = 0b00001;
+ // bits 17-20 are unspecified, but should be set to zero.
+}
+
+def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
+ let Inst{20-16} = 0b00010;
+ // bits 18-20 are unspecified, but should be set to zero.
+}
+
+def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
+ let Inst{20-16} = 0b00100;
+ // bits 19-20 are unspecified, but should be set to zero.
+}
+
+// patterns for CONCAT_VECTORS
+multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
+def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
+ (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
+def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
+ (INSELd
+ (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
+ (i64 1),
+ (i64 0))>;
+def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
+ (DUPELT2d
+ (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ (i64 0))> ;
+}
+
+defm : Concat_Vector_Pattern<v16i8, v8i8>;
+defm : Concat_Vector_Pattern<v8i16, v4i16>;
+defm : Concat_Vector_Pattern<v4i32, v2i32>;
+defm : Concat_Vector_Pattern<v2i64, v1i64>;
+defm : Concat_Vector_Pattern<v4f32, v2f32>;
+defm : Concat_Vector_Pattern<v2f64, v1f64>;
+
+//patterns for EXTRACT_SUBVECTOR
+def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
+ (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
+ (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
+ (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
+ (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
+ (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
+ (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+
+// The followings are for instruction class (3V Elem)
+
+// Variant 1
+
+class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS, string EleOpS,
+ Operand OpImm, RegisterOperand ResVPR,
+ RegisterOperand OpVPR, RegisterOperand EleOpVPR>
+ : NeonI_2VElem<q, u, size, opcode,
+ (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
+ EleOpVPR:$Re, OpImm:$Index),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
+ ", $Re." # EleOpS # "[$Index]",
+ [],
+ NoItinerary> {
+ bits<3> Index;
+ bits<5> Re;
+
+ let Constraints = "$src = $Rd";
+}
+
+multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
+ // vector register class for element is always 128-bit to cover the max index
+ def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
+ neon_uimm2_bare, VPR64, VPR64, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
+ neon_uimm2_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ // Index operations on 16-bit(H) elements are restricted to using v0-v15.
+ def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
+ neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+
+ def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
+ neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+}
+
+defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
+defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
+
+// Pattern for lane in 128-bit vector
+class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand ResVPR, RegisterOperand OpVPR,
+ RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
+ ValueType EleOpTy>
+ : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
+ (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
+
+// Pattern for lane in 64-bit vector
+class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand ResVPR, RegisterOperand OpVPR,
+ RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
+ ValueType EleOpTy>
+ : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
+ (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST ResVPR:$src, OpVPR:$Rn,
+ (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
+
+multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
+{
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
+ op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>;
+
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
+ op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>;
+
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
+ op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
+
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
+ op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
+ op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>;
+
+ def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
+ op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
+}
+
+defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
+defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
+
+class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS, string EleOpS,
+ Operand OpImm, RegisterOperand ResVPR,
+ RegisterOperand OpVPR, RegisterOperand EleOpVPR>
+ : NeonI_2VElem<q, u, size, opcode,
+ (outs ResVPR:$Rd), (ins OpVPR:$Rn,
+ EleOpVPR:$Re, OpImm:$Index),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
+ ", $Re." # EleOpS # "[$Index]",
+ [],
+ NoItinerary> {
+ bits<3> Index;
+ bits<5> Re;
+}
+
+multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
+ // vector register class for element is always 128-bit to cover the max index
+ def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
+ neon_uimm2_bare, VPR64, VPR64, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
+ neon_uimm2_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ // Index operations on 16-bit(H) elements are restricted to using v0-v15.
+ def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
+ neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+
+ def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
+ neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+}
+
+defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
+defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
+defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
+
+// Pattern for lane in 128-bit vector
+class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand OpVPR, RegisterOperand EleOpVPR,
+ ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
+ : Pat<(ResTy (op (OpTy OpVPR:$Rn),
+ (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
+
+// Pattern for lane in 64-bit vector
+class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand OpVPR, RegisterOperand EleOpVPR,
+ ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
+ : Pat<(ResTy (op (OpTy OpVPR:$Rn),
+ (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST OpVPR:$Rn,
+ (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
+
+multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
+ op, VPR64, VPR128, v2i32, v2i32, v4i32>;
+
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
+ op, VPR128, VPR128, v4i32, v4i32, v4i32>;
+
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
+ op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
+
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
+ op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
+ op, VPR64, VPR64, v2i32, v2i32, v2i32>;
+
+ def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
+ op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
+}
+
+defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
+defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
+defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
+
+// Variant 2
+
+multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
+ // vector register class for element is always 128-bit to cover the max index
+ def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
+ neon_uimm2_bare, VPR64, VPR64, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
+ neon_uimm2_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ // _1d2d doesn't exist!
+
+ def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
+ neon_uimm1_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{0}};
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Re;
+ }
+}
+
+defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
+defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
+
+class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand OpVPR, RegisterOperand EleOpVPR,
+ ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
+ SDPatternOperator coreop>
+ : Pat<(ResTy (op (OpTy OpVPR:$Rn),
+ (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
+ (INST OpVPR:$Rn,
+ (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
+
+multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
+ op, VPR64, VPR128, v2f32, v2f32, v4f32>;
+
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
+ op, VPR128, VPR128, v4f32, v4f32, v4f32>;
+
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
+ op, VPR128, VPR128, v2f64, v2f64, v2f64>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
+ op, VPR64, VPR64, v2f32, v2f32, v2f32>;
+
+ def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
+ op, VPR128, VPR64, v2f64, v2f64, v1f64,
+ BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
+}
+
+defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
+defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
+
+def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))),
+ (v2f32 VPR64:$Rn))),
+ (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
+
+def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))),
+ (v4f32 VPR128:$Rn))),
+ (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
+
+def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))),
+ (v2f64 VPR128:$Rn))),
+ (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>;
+
+// The followings are patterns using fma
+// -ffp-contract=fast generates fma
+
+multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
+ // vector register class for element is always 128-bit to cover the max index
+ def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
+ neon_uimm2_bare, VPR64, VPR64, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
+ neon_uimm2_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ // _1d2d doesn't exist!
+
+ def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
+ neon_uimm1_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{0}};
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Re;
+ }
+}
+
+defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
+defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
+
+// Pattern for lane in 128-bit vector
+class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand ResVPR, RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy,
+ SDPatternOperator coreop>
+ : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
+ (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))),
+ (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
+
+// Pattern for lane 0
+class NI_2VEfma_lane0<Instruction INST, SDPatternOperator op,
+ RegisterOperand ResVPR, ValueType ResTy>
+ : Pat<(ResTy (op (ResTy ResVPR:$Rn),
+ (ResTy (Neon_vdup (f32 FPR32:$Re))),
+ (ResTy ResVPR:$src))),
+ (INST ResVPR:$src, ResVPR:$Rn,
+ (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
+
+// Pattern for lane in 64-bit vector
+class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand ResVPR, RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy,
+ SDPatternOperator coreop>
+ : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
+ (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
+ (INST ResVPR:$src, ResVPR:$Rn,
+ (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
+
+// Pattern for lane in 64-bit vector
+class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
+ SDPatternOperator op,
+ RegisterOperand ResVPR, RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy,
+ SDPatternOperator coreop>
+ : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
+ (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
+ (INST ResVPR:$src, ResVPR:$Rn,
+ (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
+
+
+multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
+ neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
+ BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
+
+ def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_2s4s"),
+ op, VPR64, v2f32>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
+ neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
+ BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
+
+ def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_4s4s"),
+ op, VPR128, v4f32>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
+ neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
+ BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
+ neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
+ BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
+
+ def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
+ neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
+ BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
+}
+
+defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
+
+// Pattern for lane 0
+class NI_2VEfms_lane0<Instruction INST, SDPatternOperator op,
+ RegisterOperand ResVPR, ValueType ResTy>
+ : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)),
+ (ResTy (Neon_vdup (f32 FPR32:$Re))),
+ (ResTy ResVPR:$src))),
+ (INST ResVPR:$src, ResVPR:$Rn,
+ (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
+
+multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
+{
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
+ neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
+ BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
+ neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
+ BinOpFrag<(Neon_vduplane
+ (fneg node:$LHS), node:$RHS)>>;
+
+ def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_2s4s"),
+ op, VPR64, v2f32>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
+ neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
+ BinOpFrag<(fneg (Neon_vduplane
+ node:$LHS, node:$RHS))>>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
+ neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
+ BinOpFrag<(Neon_vduplane
+ (fneg node:$LHS), node:$RHS)>>;
+
+ def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_4s4s"),
+ op, VPR128, v4f32>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
+ neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
+ BinOpFrag<(fneg (Neon_vduplane
+ node:$LHS, node:$RHS))>>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
+ neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
+ BinOpFrag<(Neon_vduplane
+ (fneg node:$LHS), node:$RHS)>>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
+ neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
+ BinOpFrag<(fneg (Neon_vduplane
+ node:$LHS, node:$RHS))>>;
+
+ def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
+ neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
+ BinOpFrag<(Neon_vduplane
+ (fneg node:$LHS), node:$RHS)>>;
+
+ def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
+ neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
+ BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
+
+ def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
+ neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
+ BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>;
+
+ def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
+ neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
+ BinOpFrag<(fneg (Neon_combine_2d
+ node:$LHS, node:$RHS))>>;
+
+ def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
+ neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
+ BinOpFrag<(Neon_combine_2d
+ (fneg node:$LHS), (fneg node:$RHS))>>;
+}
+
+defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
+
+// Variant 3: Long type
+// E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
+// SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
+
+multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
+ // vector register class for element is always 128-bit to cover the max index
+ def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
+ neon_uimm2_bare, VPR128, VPR64, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
+ neon_uimm2_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ // Index operations on 16-bit(H) elements are restricted to using v0-v15.
+ def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
+ neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+
+ def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
+ neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+}
+
+defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
+defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
+defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
+defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
+defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
+defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
+
+multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
+ // vector register class for element is always 128-bit to cover the max index
+ def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
+ neon_uimm2_bare, VPR128, VPR64, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
+ neon_uimm2_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ // Index operations on 16-bit(H) elements are restricted to using v0-v15.
+ def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
+ neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+
+ def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
+ neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+}
+
+defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
+defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
+defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
+
+def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
+ (FMOVdd $src)>;
+def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$src))),
+ (FMOVss $src)>;
+
+// Pattern for lane in 128-bit vector
+class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand EleOpVPR, ValueType ResTy,
+ ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
+ SDPatternOperator hiop>
+ : Pat<(ResTy (op (ResTy VPR128:$src),
+ (HalfOpTy (hiop (OpTy VPR128:$Rn))),
+ (HalfOpTy (Neon_vduplane
+ (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
+
+// Pattern for lane in 64-bit vector
+class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand EleOpVPR, ValueType ResTy,
+ ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
+ SDPatternOperator hiop>
+ : Pat<(ResTy (op (ResTy VPR128:$src),
+ (HalfOpTy (hiop (OpTy VPR128:$Rn))),
+ (HalfOpTy (Neon_vduplane
+ (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST VPR128:$src, VPR128:$Rn,
+ (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
+
+class NI_2VEL2_lane0<Instruction INST, SDPatternOperator op,
+ ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
+ SDPatternOperator hiop, Instruction DupInst>
+ : Pat<(ResTy (op (ResTy VPR128:$src),
+ (HalfOpTy (hiop (OpTy VPR128:$Rn))),
+ (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
+ (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>;
+
+multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
+ op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
+
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
+ op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>;
+
+ def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
+ op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
+
+ def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
+ op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
+
+ def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
+ op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
+
+ def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
+ op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
+ op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
+
+ def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
+ op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>;
+
+ def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
+ op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
+
+ def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
+ op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
+}
+
+defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
+defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
+defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
+defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
+
+// Pattern for lane in 128-bit vector
+class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand EleOpVPR, ValueType ResTy,
+ ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
+ SDPatternOperator hiop>
+ : Pat<(ResTy (op
+ (HalfOpTy (hiop (OpTy VPR128:$Rn))),
+ (HalfOpTy (Neon_vduplane
+ (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
+
+// Pattern for lane in 64-bit vector
+class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand EleOpVPR, ValueType ResTy,
+ ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
+ SDPatternOperator hiop>
+ : Pat<(ResTy (op
+ (HalfOpTy (hiop (OpTy VPR128:$Rn))),
+ (HalfOpTy (Neon_vduplane
+ (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST VPR128:$Rn,
+ (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
+
+// Pattern for fixed lane 0
+class NI_2VEL2_mul_lane0<Instruction INST, SDPatternOperator op,
+ ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
+ SDPatternOperator hiop, Instruction DupInst>
+ : Pat<(ResTy (op
+ (HalfOpTy (hiop (OpTy VPR128:$Rn))),
+ (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
+ (INST VPR128:$Rn, (DupInst $Re), 0)>;
+
+multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
+ op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
+
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
+ op, VPR64, VPR128, v2i64, v2i32, v4i32>;
+
+ def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
+ op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
+
+ def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
+ op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
+
+ def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_4s8h"),
+ op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
+
+ def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_2d4s"),
+ op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
+ op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
+
+ def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
+ op, VPR64, VPR64, v2i64, v2i32, v2i32>;
+
+ def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
+ op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
+
+ def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
+ op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
+}
+
+defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
+defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
+defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
+
+multiclass NI_qdma<SDPatternOperator op> {
+ def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
+ (op node:$Ra,
+ (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
+
+ def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
+ (op node:$Ra,
+ (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
+}
+
+defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
+defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
+
+multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
+ !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
+ v4i32, v4i16, v8i16>;
+
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
+ !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
+ v2i64, v2i32, v4i32>;
+
+ def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
+ !cast<PatFrag>(op # "_4s"), VPR128Lo,
+ v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
+
+ def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
+ !cast<PatFrag>(op # "_2d"), VPR128,
+ v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
+
+ def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
+ !cast<PatFrag>(op # "_4s"),
+ v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
+
+ def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
+ !cast<PatFrag>(op # "_2d"),
+ v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
+ !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
+ v4i32, v4i16, v4i16>;
+
+ def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
+ !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
+ v2i64, v2i32, v2i32>;
+
+ def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
+ !cast<PatFrag>(op # "_4s"), VPR64Lo,
+ v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
+
+ def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
+ !cast<PatFrag>(op # "_2d"), VPR64,
+ v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
+}
+
+defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
+defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
+
+// End of implementation for instruction class (3V Elem)
+
+class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
+ bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
+ SDPatternOperator Neon_Rev>
+ : NeonI_2VMisc<Q, U, size, opcode,
+ (outs ResVPR:$Rd), (ins ResVPR:$Rn),
+ asmop # "\t$Rd." # Res # ", $Rn." # Res,
+ [(set (ResTy ResVPR:$Rd),
+ (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
+ NoItinerary> ;
+
+def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
+ v16i8, Neon_rev64>;
+def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
+ v8i16, Neon_rev64>;
+def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
+ v4i32, Neon_rev64>;
+def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
+ v8i8, Neon_rev64>;
+def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
+ v4i16, Neon_rev64>;
+def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
+ v2i32, Neon_rev64>;
+
+def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
+def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
+
+def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
+ v16i8, Neon_rev32>;
+def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
+ v8i16, Neon_rev32>;
+def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
+ v8i8, Neon_rev32>;
+def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
+ v4i16, Neon_rev32>;
+
+def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
+ v16i8, Neon_rev16>;
+def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
+ v8i8, Neon_rev16>;
+
+multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
+ SDPatternOperator Neon_Padd> {
+ def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.8h, $Rn.16b",
+ [(set (v8i16 VPR128:$Rd),
+ (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.4h, $Rn.8b",
+ [(set (v4i16 VPR64:$Rd),
+ (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.8h",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.4h",
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2d, $Rn.4s",
+ [(set (v2i64 VPR128:$Rd),
+ (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.1d, $Rn.2s",
+ [(set (v1i64 VPR64:$Rd),
+ (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
+ NoItinerary>;
+}
+
+defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
+ int_arm_neon_vpaddls>;
+defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
+ int_arm_neon_vpaddlu>;
+
+multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
+ SDPatternOperator Neon_Padd> {
+ let Constraints = "$src = $Rd" in {
+ def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.8h, $Rn.16b",
+ [(set (v8i16 VPR128:$Rd),
+ (v8i16 (Neon_Padd
+ (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+ asmop # "\t$Rd.4h, $Rn.8b",
+ [(set (v4i16 VPR64:$Rd),
+ (v4i16 (Neon_Padd
+ (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.8h",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (Neon_Padd
+ (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.4h",
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (Neon_Padd
+ (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.2d, $Rn.4s",
+ [(set (v2i64 VPR128:$Rd),
+ (v2i64 (Neon_Padd
+ (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+ asmop # "\t$Rd.1d, $Rn.2s",
+ [(set (v1i64 VPR64:$Rd),
+ (v1i64 (Neon_Padd
+ (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
+ NoItinerary>;
+ }
+}
+
+defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
+ int_arm_neon_vpadals>;
+defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
+ int_arm_neon_vpadalu>;
+
+multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
+ def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [], NoItinerary>;
+
+ def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.8h, $Rn.8h",
+ [], NoItinerary>;
+
+ def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [], NoItinerary>;
+
+ def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2d, $Rn.2d",
+ [], NoItinerary>;
+
+ def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.8b, $Rn.8b",
+ [], NoItinerary>;
+
+ def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.4h, $Rn.4h",
+ [], NoItinerary>;
+
+ def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2s",
+ [], NoItinerary>;
+}
+
+defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
+defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
+defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
+defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
+
+multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
+ SDPatternOperator Neon_Op> {
+ def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
+ (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
+
+ def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
+ (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
+
+ def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
+ (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
+
+ def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
+ (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
+
+ def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
+ (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
+
+ def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
+ (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
+
+ def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
+ (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
+}
+
+defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
+defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
+defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
+
+def : Pat<(v16i8 (sub
+ (v16i8 Neon_AllZero),
+ (v16i8 VPR128:$Rn))),
+ (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
+def : Pat<(v8i8 (sub
+ (v8i8 Neon_AllZero),
+ (v8i8 VPR64:$Rn))),
+ (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
+def : Pat<(v8i16 (sub
+ (v8i16 (bitconvert (v16i8 Neon_AllZero))),
+ (v8i16 VPR128:$Rn))),
+ (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
+def : Pat<(v4i16 (sub
+ (v4i16 (bitconvert (v8i8 Neon_AllZero))),
+ (v4i16 VPR64:$Rn))),
+ (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
+def : Pat<(v4i32 (sub
+ (v4i32 (bitconvert (v16i8 Neon_AllZero))),
+ (v4i32 VPR128:$Rn))),
+ (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
+def : Pat<(v2i32 (sub
+ (v2i32 (bitconvert (v8i8 Neon_AllZero))),
+ (v2i32 VPR64:$Rn))),
+ (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
+def : Pat<(v2i64 (sub
+ (v2i64 (bitconvert (v16i8 Neon_AllZero))),
+ (v2i64 VPR128:$Rn))),
+ (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
+
+multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
+ let Constraints = "$src = $Rd" in {
+ def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [], NoItinerary>;
+
+ def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.8h, $Rn.8h",
+ [], NoItinerary>;
+
+ def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [], NoItinerary>;
+
+ def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.2d, $Rn.2d",
+ [], NoItinerary>;
+
+ def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+ asmop # "\t$Rd.8b, $Rn.8b",
+ [], NoItinerary>;
+
+ def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+ asmop # "\t$Rd.4h, $Rn.4h",
+ [], NoItinerary>;
+
+ def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2s",
+ [], NoItinerary>;
+ }
+}
+
+defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
+defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
+
+multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
+ SDPatternOperator Neon_Op> {
+ def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
+ (v16i8 (!cast<Instruction>(Prefix # 16b)
+ (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
+
+ def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
+ (v8i16 (!cast<Instruction>(Prefix # 8h)
+ (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
+
+ def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
+ (v4i32 (!cast<Instruction>(Prefix # 4s)
+ (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
+
+ def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
+ (v2i64 (!cast<Instruction>(Prefix # 2d)
+ (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
+
+ def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
+ (v8i8 (!cast<Instruction>(Prefix # 8b)
+ (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
+
+ def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
+ (v4i16 (!cast<Instruction>(Prefix # 4h)
+ (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
+
+ def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
+ (v2i32 (!cast<Instruction>(Prefix # 2s)
+ (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
+}
+
+defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
+defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
+
+multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
+ SDPatternOperator Neon_Op> {
+ def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [(set (v16i8 VPR128:$Rd),
+ (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.8h, $Rn.8h",
+ [(set (v8i16 VPR128:$Rd),
+ (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.8b, $Rn.8b",
+ [(set (v8i8 VPR64:$Rd),
+ (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.4h, $Rn.4h",
+ [(set (v4i16 VPR64:$Rd),
+ (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2s",
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
+ NoItinerary>;
+}
+
+defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
+defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
+
+multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
+ bits<5> Opcode> {
+ def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [], NoItinerary>;
+
+ def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.8b, $Rn.8b",
+ [], NoItinerary>;
+}
+
+defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
+defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
+defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
+
+def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
+ (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
+def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
+ (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
+
+def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
+ (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
+def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
+ (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
+
+def : Pat<(v16i8 (xor
+ (v16i8 VPR128:$Rn),
+ (v16i8 Neon_AllOne))),
+ (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
+def : Pat<(v8i8 (xor
+ (v8i8 VPR64:$Rn),
+ (v8i8 Neon_AllOne))),
+ (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
+def : Pat<(v8i16 (xor
+ (v8i16 VPR128:$Rn),
+ (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
+ (NOT16b VPR128:$Rn)>;
+def : Pat<(v4i16 (xor
+ (v4i16 VPR64:$Rn),
+ (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
+ (NOT8b VPR64:$Rn)>;
+def : Pat<(v4i32 (xor
+ (v4i32 VPR128:$Rn),
+ (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
+ (NOT16b VPR128:$Rn)>;
+def : Pat<(v2i32 (xor
+ (v2i32 VPR64:$Rn),
+ (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
+ (NOT8b VPR64:$Rn)>;
+def : Pat<(v2i64 (xor
+ (v2i64 VPR128:$Rn),
+ (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
+ (NOT16b VPR128:$Rn)>;
+
+def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
+ (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
+def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
+ (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
+
+multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
+ SDPatternOperator Neon_Op> {
+ def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [(set (v4f32 VPR128:$Rd),
+ (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2d, $Rn.2d",
+ [(set (v2f64 VPR128:$Rd),
+ (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2s",
+ [(set (v2f32 VPR64:$Rd),
+ (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
+ NoItinerary>;
+}
+
+defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
+defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
+
+multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
+ def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.8b, $Rn.8h",
+ [], NoItinerary>;
+
+ def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4h, $Rn.4s",
+ [], NoItinerary>;
+
+ def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2d",
+ [], NoItinerary>;
+
+ let Constraints = "$Rd = $src" in {
+ def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "2\t$Rd.16b, $Rn.8h",
+ [], NoItinerary>;
+
+ def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "2\t$Rd.8h, $Rn.4s",
+ [], NoItinerary>;
+
+ def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "2\t$Rd.4s, $Rn.2d",
+ [], NoItinerary>;
+ }
+}
+
+defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
+defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
+defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
+defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
+
+multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix,
+ SDPatternOperator Neon_Op> {
+ def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
+ (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
+
+ def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
+ (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
+
+ def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
+ (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
+
+ def : Pat<(v16i8 (concat_vectors
+ (v8i8 VPR64:$src),
+ (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
+ (!cast<Instruction>(Prefix # 8h16b)
+ (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
+ VPR128:$Rn)>;
+
+ def : Pat<(v8i16 (concat_vectors
+ (v4i16 VPR64:$src),
+ (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
+ (!cast<Instruction>(Prefix # 4s8h)
+ (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
+ VPR128:$Rn)>;
+
+ def : Pat<(v4i32 (concat_vectors
+ (v2i32 VPR64:$src),
+ (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
+ (!cast<Instruction>(Prefix # 2d4s)
+ (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
+ VPR128:$Rn)>;
+}
+
+defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
+defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
+defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
+defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
+
+multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
+ let DecoderMethod = "DecodeSHLLInstruction" in {
+ def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR64:$Rn, uimm_exact8:$Imm),
+ asmop # "\t$Rd.8h, $Rn.8b, $Imm",
+ [], NoItinerary>;
+
+ def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR64:$Rn, uimm_exact16:$Imm),
+ asmop # "\t$Rd.4s, $Rn.4h, $Imm",
+ [], NoItinerary>;
+
+ def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR64:$Rn, uimm_exact32:$Imm),
+ asmop # "\t$Rd.2d, $Rn.2s, $Imm",
+ [], NoItinerary>;
+
+ def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR128:$Rn, uimm_exact8:$Imm),
+ asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
+ [], NoItinerary>;
+
+ def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR128:$Rn, uimm_exact16:$Imm),
+ asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
+ [], NoItinerary>;
+
+ def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR128:$Rn, uimm_exact32:$Imm),
+ asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
+ [], NoItinerary>;
+ }
+}
+
+defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
+
+class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
+ SDPatternOperator ExtOp, Operand Neon_Imm,
+ string suffix>
+ : Pat<(DesTy (shl
+ (DesTy (ExtOp (OpTy VPR64:$Rn))),
+ (DesTy (Neon_vdup
+ (i32 Neon_Imm:$Imm))))),
+ (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
+
+class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
+ SDPatternOperator ExtOp, Operand Neon_Imm,
+ string suffix, PatFrag GetHigh>
+ : Pat<(DesTy (shl
+ (DesTy (ExtOp
+ (OpTy (GetHigh VPR128:$Rn)))),
+ (DesTy (Neon_vdup
+ (i32 Neon_Imm:$Imm))))),
+ (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
+
+def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
+def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
+def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
+def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
+def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
+def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
+def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
+ Neon_High16B>;
+def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
+ Neon_High16B>;
+def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
+ Neon_High8H>;
+def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
+ Neon_High8H>;
+def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
+ Neon_High4S>;
+def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
+ Neon_High4S>;
+
+multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
+ def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4h, $Rn.4s",
+ [], NoItinerary>;
+
+ def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2d",
+ [], NoItinerary>;
+
+ let Constraints = "$src = $Rd" in {
+ def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "2\t$Rd.8h, $Rn.4s",
+ [], NoItinerary>;
+
+ def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "2\t$Rd.4s, $Rn.2d",
+ [], NoItinerary>;
+ }
+}
+
+defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
+
+multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
+ SDPatternOperator f32_to_f16_Op,
+ SDPatternOperator f64_to_f32_Op> {
+
+ def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
+ (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
+
+ def : Pat<(v8i16 (concat_vectors
+ (v4i16 VPR64:$src),
+ (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
+ (!cast<Instruction>(prefix # "4s8h")
+ (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
+ (v4f32 VPR128:$Rn))>;
+
+ def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
+ (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
+
+ def : Pat<(v4f32 (concat_vectors
+ (v2f32 VPR64:$src),
+ (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
+ (!cast<Instruction>(prefix # "2d4s")
+ (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
+ (v2f64 VPR128:$Rn))>;
+}
+
+defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
+
+multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
+ bits<5> opcode> {
+ def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2d",
+ [], NoItinerary>;
+
+ def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "2\t$Rd.4s, $Rn.2d",
+ [], NoItinerary> {
+ let Constraints = "$src = $Rd";
+ }
+
+ def : Pat<(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))),
+ (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
+
+ def : Pat<(v4f32 (concat_vectors
+ (v2f32 VPR64:$src),
+ (v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))))),
+ (!cast<Instruction>(prefix # "2d4s")
+ (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
+ VPR128:$Rn)>;
+}
+
+defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
+
+def Neon_High4Float : PatFrag<(ops node:$in),
+ (extract_subvector (v4f32 node:$in), (iPTR 2))>;
+
+multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
+ def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4h",
+ [], NoItinerary>;
+
+ def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2d, $Rn.2s",
+ [], NoItinerary>;
+
+ def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "2\t$Rd.4s, $Rn.8h",
+ [], NoItinerary>;
+
+ def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "2\t$Rd.2d, $Rn.4s",
+ [], NoItinerary>;
+}
+
+defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
+
+multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
+ def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
+ (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
+
+ def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
+ (v4i16 (Neon_High8H
+ (v8i16 VPR128:$Rn))))),
+ (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
+
+ def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
+ (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
+
+ def : Pat<(v2f64 (fextend
+ (v2f32 (Neon_High4Float
+ (v4f32 VPR128:$Rn))))),
+ (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
+}
+
+defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
+
+multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
+ ValueType ResTy4s, ValueType OpTy4s,
+ ValueType ResTy2d, ValueType OpTy2d,
+ ValueType ResTy2s, ValueType OpTy2s,
+ SDPatternOperator Neon_Op> {
+
+ def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [(set (ResTy4s VPR128:$Rd),
+ (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2d, $Rn.2d",
+ [(set (ResTy2d VPR128:$Rd),
+ (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2s",
+ [(set (ResTy2s VPR64:$Rd),
+ (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
+ NoItinerary>;
+}
+
+multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
+ bits<5> opcode, SDPatternOperator Neon_Op> {
+ defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
+ v2f64, v2i32, v2f32, Neon_Op>;
+}
+
+defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
+ int_aarch64_neon_fcvtns>;
+defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
+ int_aarch64_neon_fcvtnu>;
+defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
+ int_aarch64_neon_fcvtps>;
+defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
+ int_aarch64_neon_fcvtpu>;
+defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
+ int_aarch64_neon_fcvtms>;
+defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
+ int_aarch64_neon_fcvtmu>;
+defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
+defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
+defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
+ int_aarch64_neon_fcvtas>;
+defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
+ int_aarch64_neon_fcvtau>;
+
+multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
+ bits<5> opcode, SDPatternOperator Neon_Op> {
+ defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
+ v2i64, v2f32, v2i32, Neon_Op>;
+}
+
+defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
+defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
+
+multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
+ bits<5> opcode, SDPatternOperator Neon_Op> {
+ defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
+ v2f64, v2f32, v2f32, Neon_Op>;
+}
+
+defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
+ int_aarch64_neon_frintn>;
+defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
+defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
+defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
+defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
+defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
+defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
+defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
+ int_arm_neon_vrecpe>;
+defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
+ int_arm_neon_vrsqrte>;
+defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
+
+multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
+ bits<5> opcode, SDPatternOperator Neon_Op> {
+ def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2s",
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
+ NoItinerary>;
+}
+
+defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
+ int_arm_neon_vrecpe>;
+defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
+ int_arm_neon_vrsqrte>;
+
+// Crypto Class
+class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+ : NeonI_Crypto_AES<size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [(set (v16i8 VPR128:$Rd),
+ (v16i8 (opnode (v16i8 VPR128:$src),
+ (v16i8 VPR128:$Rn))))],
+ NoItinerary>{
+ let Constraints = "$src = $Rd";
+ let Predicates = [HasNEON, HasCrypto];
+}
+
+def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
+def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
+
+class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+ : NeonI_Crypto_AES<size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [(set (v16i8 VPR128:$Rd),
+ (v16i8 (opnode (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
+def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
+
+class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+ : NeonI_Crypto_SHA<size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (opnode (v4i32 VPR128:$src),
+ (v4i32 VPR128:$Rn))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ let Predicates = [HasNEON, HasCrypto];
+}
+
+def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
+ int_arm_neon_sha1su1>;
+def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
+ int_arm_neon_sha256su0>;
+
+class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+ : NeonI_Crypto_SHA<size, opcode,
+ (outs FPR32:$Rd), (ins FPR32:$Rn),
+ asmop # "\t$Rd, $Rn",
+ [(set (v1i32 FPR32:$Rd),
+ (v1i32 (opnode (v1i32 FPR32:$Rn))))],
+ NoItinerary> {
+ let Predicates = [HasNEON, HasCrypto];
+}
+
+def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
+
+class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
+ SDPatternOperator opnode>
+ : NeonI_Crypto_3VSHA<size, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (opnode (v4i32 VPR128:$src),
+ (v4i32 VPR128:$Rn),
+ (v4i32 VPR128:$Rm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ let Predicates = [HasNEON, HasCrypto];
+}
+
+def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
+ int_arm_neon_sha1su0>;
+def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
+ int_arm_neon_sha256su1>;
+
+class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
+ SDPatternOperator opnode>
+ : NeonI_Crypto_3VSHA<size, opcode,
+ (outs FPR128:$Rd),
+ (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd, $Rn, $Rm.4s",
+ [(set (v4i32 FPR128:$Rd),
+ (v4i32 (opnode (v4i32 FPR128:$src),
+ (v4i32 FPR128:$Rn),
+ (v4i32 VPR128:$Rm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ let Predicates = [HasNEON, HasCrypto];
+}
+
+def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
+ int_arm_neon_sha256h>;
+def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
+ int_arm_neon_sha256h2>;
+
+class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop,
+ SDPatternOperator opnode>
+ : NeonI_Crypto_3VSHA<size, opcode,
+ (outs FPR128:$Rd),
+ (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd, $Rn, $Rm.4s",
+ [(set (v4i32 FPR128:$Rd),
+ (v4i32 (opnode (v4i32 FPR128:$src),
+ (v1i32 FPR32:$Rn),
+ (v4i32 VPR128:$Rm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ let Predicates = [HasNEON, HasCrypto];
+}
+
+def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>;
+def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>;
+def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>;
+
+//
+// Patterns for handling half-precision values
+//
+
+// Convert f16 value coming in as i16 value to f32
+def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))),
+ (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
+def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))),
+ (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
+
+def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 (
+ f32_to_f16 (f32 FPR32:$Rn))))))),
+ (f32 FPR32:$Rn)>;
+
+// Patterns for vector extract of half-precision FP value in i16 storage type
+def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
+ (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))),
+ (FCVTsh (f16 (DUPhv_H
+ (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ neon_uimm2_bare:$Imm)))>;
+
+def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
+ (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))),
+ (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>;
+
+// Patterns for vector insert of half-precision FP value 0 in i16 storage type
+def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
+ (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
+ (neon_uimm3_bare:$Imm))),
+ (v8i16 (INSELh (v8i16 VPR128:$Rn),
+ (v8i16 (SUBREG_TO_REG (i64 0),
+ (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
+ sub_16)),
+ neon_uimm3_bare:$Imm, 0))>;
+
+def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
+ (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
+ (neon_uimm2_bare:$Imm))),
+ (v4i16 (EXTRACT_SUBREG
+ (v8i16 (INSELh
+ (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ (v8i16 (SUBREG_TO_REG (i64 0),
+ (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
+ sub_16)),
+ neon_uimm2_bare:$Imm, 0)),
+ sub_64))>;
+
+// Patterns for vector insert of half-precision FP value in i16 storage type
+def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
+ (i32 (assertsext (i32 (fp_to_sint
+ (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
+ (neon_uimm3_bare:$Imm))),
+ (v8i16 (INSELh (v8i16 VPR128:$Rn),
+ (v8i16 (SUBREG_TO_REG (i64 0),
+ (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
+ sub_16)),
+ neon_uimm3_bare:$Imm, 0))>;
+
+def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
+ (i32 (assertsext (i32 (fp_to_sint
+ (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
+ (neon_uimm2_bare:$Imm))),
+ (v4i16 (EXTRACT_SUBREG
+ (v8i16 (INSELh
+ (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ (v8i16 (SUBREG_TO_REG (i64 0),
+ (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
+ sub_16)),
+ neon_uimm2_bare:$Imm, 0)),
+ sub_64))>;
+
+def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
+ (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
+ (neon_uimm3_bare:$Imm1))),
+ (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
+ neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
+
+// Patterns for vector copy of half-precision FP value in i16 storage type
+def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
+ (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
+ (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
+ 65535)))))))),
+ (neon_uimm3_bare:$Imm1))),
+ (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
+ neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
+
+def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
+ (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
+ (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)),
+ 65535)))))))),
+ (neon_uimm3_bare:$Imm1))),
+ (v4i16 (EXTRACT_SUBREG
+ (v8i16 (INSELh
+ (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
+ neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)),
+ sub_64))>;
+
-def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))),
- (f64 (EXTRACT_SUBREG (v8i8 VPR64:$src), sub_64))>;
-def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))),
- (f64 (EXTRACT_SUBREG (v4i16 VPR64:$src), sub_64))>;
-def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))),
- (f64 (EXTRACT_SUBREG (v2i32 VPR64:$src), sub_64))>;
-def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))),
- (f64 (EXTRACT_SUBREG (v2f32 VPR64:$src), sub_64))>;
-def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))),
- (f64 (EXTRACT_SUBREG (v1i64 VPR64:$src), sub_64))>;
-def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))),
- (f128 (EXTRACT_SUBREG (v16i8 VPR128:$src), sub_alias))>;
-def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))),
- (f128 (EXTRACT_SUBREG (v8i16 VPR128:$src), sub_alias))>;
-def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))),
- (f128 (EXTRACT_SUBREG (v4i32 VPR128:$src), sub_alias))>;
-def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))),
- (f128 (EXTRACT_SUBREG (v2i64 VPR128:$src), sub_alias))>;
-def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))),
- (f128 (EXTRACT_SUBREG (v4f32 VPR128:$src), sub_alias))>;
-def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))),
- (f128 (EXTRACT_SUBREG (v2f64 VPR128:$src), sub_alias))>;
-
-def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))),
- (v8i8 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
-def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))),
- (v4i16 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
-def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))),
- (v2i32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
-def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
- (v2f32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
-def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))),
- (v1i64 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
-def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
- (v16i8 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
- sub_alias))>;
-def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))),
- (v8i16 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
- sub_alias))>;
-def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
- (v4i32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
- sub_alias))>;
-def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
- (v2i64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
- sub_alias))>;
-def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))),
- (v4f32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
- sub_alias))>;
-def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))),
- (v2f64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
- sub_alias))>;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index 7ce5ce3..8cfb968 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -121,7 +121,7 @@ bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()));
+ MCOp = lowerSymbolOperand(MO, getSymbol(MO.getGlobal()));
break;
case MachineOperand::MO_MachineBasicBlock:
MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index b3a81b1..4e2022c 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -18,9 +18,19 @@ def sub_32 : SubRegIndex<32>;
def sub_16 : SubRegIndex<16>;
def sub_8 : SubRegIndex<8>;
-// The VPR registers are handled as sub-registers of FPR equivalents, but
-// they're really the same thing. We give this concept a special index.
-def sub_alias : SubRegIndex<128>;
+// Note: Code depends on these having consecutive numbers.
+def qqsub : SubRegIndex<256, 256>;
+
+def qsub_0 : SubRegIndex<128>;
+def qsub_1 : SubRegIndex<128, 128>;
+def qsub_2 : ComposedSubRegIndex<qqsub, qsub_0>;
+def qsub_3 : ComposedSubRegIndex<qqsub, qsub_1>;
+
+def dsub_0 : SubRegIndex<64>;
+def dsub_1 : SubRegIndex<64, 64>;
+def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>;
+def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>;
+def dsub_4 : ComposedSubRegIndex<qsub_2, dsub_0>;
}
// Registers are identified with 5-bit ID numbers.
@@ -137,60 +147,51 @@ foreach Index = 0-31 in {
}
-def FPR8 : RegisterClass<"AArch64", [i8], 8,
+def FPR8 : RegisterClass<"AArch64", [i8, v1i8], 8,
(sequence "B%u", 0, 31)> {
}
-def FPR16 : RegisterClass<"AArch64", [f16], 16,
+def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16,
(sequence "H%u", 0, 31)> {
}
-def FPR32 : RegisterClass<"AArch64", [f32], 32,
+def FPR32 : RegisterClass<"AArch64", [f32, v1i32, v1f32], 32,
(sequence "S%u", 0, 31)> {
}
-def FPR64 : RegisterClass<"AArch64", [f64], 64,
- (sequence "D%u", 0, 31)> {
-}
+def FPR64 : RegisterClass<"AArch64",
+ [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64],
+ 64, (sequence "D%u", 0, 31)>;
-def FPR128 : RegisterClass<"AArch64", [f128], 128,
- (sequence "Q%u", 0, 31)> {
-}
+def FPR128 : RegisterClass<"AArch64",
+ [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8],
+ 128, (sequence "Q%u", 0, 31)>;
+
+def FPR64Lo : RegisterClass<"AArch64",
+ [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64],
+ 64, (sequence "D%u", 0, 15)>;
+def FPR128Lo : RegisterClass<"AArch64",
+ [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8],
+ 128, (sequence "Q%u", 0, 15)>;
//===----------------------------------------------------------------------===//
// Vector registers:
//===----------------------------------------------------------------------===//
-// NEON registers simply specify the overall vector, and it's expected that
-// Instructions will individually specify the acceptable data layout. In
-// principle this leaves two approaches open:
-// + An operand, giving a single ADDvvv instruction (for example). This turns
-// out to be unworkable in the assembly parser (without every Instruction
-// having a "cvt" function, at least) because the constraints can't be
-// properly enforced. It also complicates specifying patterns since each
-// instruction will accept many types.
-// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific
-// details about NEON registers, but simplifies most other details.
-//
-// The second approach was taken.
-
-foreach Index = 0-31 in {
- def V # Index : AArch64RegWithSubs<Index, "v" # Index,
- [!cast<Register>("Q" # Index)],
- [sub_alias]>,
- DwarfRegNum<[!add(Index, 64)]>;
+def VPR64AsmOperand : AsmOperandClass {
+ let Name = "VPR";
+ let PredicateMethod = "isReg";
+ let RenderMethod = "addRegOperands";
}
-// These two classes contain the same registers, which should be reasonably
-// sensible for MC and allocation purposes, but allows them to be treated
-// separately for things like stack spilling.
-def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8, v1i64], 64,
- (sequence "V%u", 0, 31)>;
+def VPR64 : RegisterOperand<FPR64, "printVPRRegister">;
-def VPR128 : RegisterClass<"AArch64",
- [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128,
- (sequence "V%u", 0, 31)>;
+def VPR128 : RegisterOperand<FPR128, "printVPRRegister">;
+
+def VPR64Lo : RegisterOperand<FPR64Lo, "printVPRRegister">;
+
+def VPR128Lo : RegisterOperand<FPR128Lo, "printVPRRegister">;
// Flags register
def NZCV : Register<"nzcv"> {
@@ -201,3 +202,90 @@ def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
let CopyCost = -1;
let isAllocatable = 0;
}
+
+//===----------------------------------------------------------------------===//
+// Consecutive vector registers
+//===----------------------------------------------------------------------===//
+// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D30_D31
+def Tuples2D : RegisterTuples<[dsub_0, dsub_1],
+ [(rotl FPR64, 0), (rotl FPR64, 1)]>;
+
+// 3 Consecutive 64-bit registers: D0_D1_D2, ..., D31_D0_D1
+def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2],
+ [(rotl FPR64, 0), (rotl FPR64, 1),
+ (rotl FPR64, 2)]>;
+
+// 4 Consecutive 64-bit registers: D0_D1_D2_D3, ..., D31_D0_D1_D2
+def Tuples4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3],
+ [(rotl FPR64, 0), (rotl FPR64, 1),
+ (rotl FPR64, 2), (rotl FPR64, 3)]>;
+
+// 2 Consecutive 128-bit registers: Q0_Q1, Q1_Q2, ..., Q30_Q31
+def Tuples2Q : RegisterTuples<[qsub_0, qsub_1],
+ [(rotl FPR128, 0), (rotl FPR128, 1)]>;
+
+// 3 Consecutive 128-bit registers: Q0_Q1_Q2, ..., Q31_Q0_Q1
+def Tuples3Q : RegisterTuples<[qsub_0, qsub_1, qsub_2],
+ [(rotl FPR128, 0), (rotl FPR128, 1),
+ (rotl FPR128, 2)]>;
+
+// 4 Consecutive 128-bit registers: Q0_Q1_Q2_Q3, ..., Q31_Q0_Q1_Q2
+def Tuples4Q : RegisterTuples<[qsub_0, qsub_1, qsub_2, qsub_3],
+ [(rotl FPR128, 0), (rotl FPR128, 1),
+ (rotl FPR128, 2), (rotl FPR128, 3)]>;
+
+// The followings are super register classes to model 2/3/4 consecutive
+// 64-bit/128-bit registers.
+
+def DPair : RegisterClass<"AArch64", [v2i64], 64, (add Tuples2D)>;
+
+def DTriple : RegisterClass<"AArch64", [untyped], 64, (add Tuples3D)> {
+ let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
+}
+
+def DQuad : RegisterClass<"AArch64", [v4i64], 64, (add Tuples4D)>;
+
+def QPair : RegisterClass<"AArch64", [v4i64], 128, (add Tuples2Q)>;
+
+def QTriple : RegisterClass<"AArch64", [untyped], 128, (add Tuples3Q)> {
+ let Size = 384; // 3 x 128 bits, we have no predefined type of that size.
+}
+
+def QQuad : RegisterClass<"AArch64", [v8i64], 128, (add Tuples4Q)>;
+
+
+// The followings are vector list operands
+multiclass VectorList_operands<string PREFIX, string LAYOUT, int Count,
+ RegisterClass RegList> {
+ def _asmoperand : AsmOperandClass {
+ let Name = PREFIX # LAYOUT # Count;
+ let RenderMethod = "addVectorListOperands";
+ let PredicateMethod =
+ "isVectorList<A64Layout::VL_" # LAYOUT # ", " # Count # ">";
+ let ParserMethod = "ParseVectorList";
+ }
+
+ def _operand : RegisterOperand<RegList,
+ "printVectorList<A64Layout::VL_" # LAYOUT # ", " # Count # ">"> {
+ let ParserMatchClass =
+ !cast<AsmOperandClass>(PREFIX # LAYOUT # "_asmoperand");
+ }
+}
+
+multiclass VectorList_BHSD<string PREFIX, int Count, RegisterClass DRegList,
+ RegisterClass QRegList> {
+ defm 8B : VectorList_operands<PREFIX, "8B", Count, DRegList>;
+ defm 4H : VectorList_operands<PREFIX, "4H", Count, DRegList>;
+ defm 2S : VectorList_operands<PREFIX, "2S", Count, DRegList>;
+ defm 1D : VectorList_operands<PREFIX, "1D", Count, DRegList>;
+ defm 16B : VectorList_operands<PREFIX, "16B", Count, QRegList>;
+ defm 8H : VectorList_operands<PREFIX, "8H", Count, QRegList>;
+ defm 4S : VectorList_operands<PREFIX, "4S", Count, QRegList>;
+ defm 2D : VectorList_operands<PREFIX, "2D", Count, QRegList>;
+}
+
+// Vector list operand with 1/2/3/4 registers: VOne8B_operand,..., VQuad2D_operand
+defm VOne : VectorList_BHSD<"VOne", 1, FPR64, FPR128>;
+defm VPair : VectorList_BHSD<"VPair", 2, DPair, QPair>;
+defm VTriple : VectorList_BHSD<"VTriple", 3, DTriple, QTriple>;
+defm VQuad : VectorList_BHSD<"VQuad", 4, DQuad, QQuad>; \ No newline at end of file
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index d71bb4e..5c693c1 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -25,11 +25,31 @@
using namespace llvm;
+// Pin the vtable to this file.
+void AArch64Subtarget::anchor() {}
+
AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS)
- : AArch64GenSubtargetInfo(TT, CPU, FS), HasNEON(false), HasCrypto(false),
- TargetTriple(TT) {
+ : AArch64GenSubtargetInfo(TT, CPU, FS), HasFPARMv8(false), HasNEON(false),
+ HasCrypto(false), TargetTriple(TT), CPUString(CPU) {
+
+ initializeSubtargetFeatures(CPU, FS);
+}
+
+void AArch64Subtarget::initializeSubtargetFeatures(StringRef CPU,
+ StringRef FS) {
+ if (CPU.empty())
+ CPUString = "generic";
+
+ std::string FullFS = FS;
+ if (CPUString == "generic") {
+ // Enable FP by default.
+ if (FullFS.empty())
+ FullFS = "+fp-armv8";
+ else
+ FullFS = "+fp-armv8," + FullFS;
+ }
- ParseSubtargetFeatures(CPU, FS);
+ ParseSubtargetFeatures(CPU, FullFS);
}
bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV,
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 35a7c8d..bbfd3bc 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -27,18 +27,31 @@ class StringRef;
class GlobalValue;
class AArch64Subtarget : public AArch64GenSubtargetInfo {
+ virtual void anchor();
protected:
+ bool HasFPARMv8;
bool HasNEON;
bool HasCrypto;
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
+
+ /// CPUString - String name of used CPU.
+ std::string CPUString;
+
+private:
+ void initializeSubtargetFeatures(StringRef CPU, StringRef FS);
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS);
+ virtual bool enableMachineScheduler() const {
+ return true;
+ }
+
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -46,11 +59,13 @@ public:
bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
- bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+ bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+ bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
-
bool hasCrypto() const { return HasCrypto; }
+
+ const std::string & getCPUString() const { return CPUString; }
};
} // End llvm namespace
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 43e91ac..fbbce11 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -54,8 +54,9 @@ public:
#include "AArch64GenAsmMatcher.inc"
};
- AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &MII)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
// Initialize the set of available features.
@@ -126,6 +127,11 @@ public:
OperandMatchResultTy
ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, StringRef &Layout,
+ SMLoc &LayoutLoc);
+
+ OperandMatchResultTy ParseVectorList(SmallVectorImpl<MCParsedAsmOperand *> &);
+
bool validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -153,6 +159,7 @@ private:
k_Immediate, // Including expressions referencing symbols
k_Register,
k_ShiftExtend,
+ k_VectorList, // A sequential list of 1 to 4 registers.
k_SysReg, // The register operand of MRS and MSR instructions
k_Token, // The mnemonic; other raw tokens the auto-generated
k_WrappedRegister // Load/store exclusive permit a wrapped register.
@@ -188,6 +195,13 @@ private:
bool ImplicitAmount;
};
+ // A vector register list is a sequential list of 1 to 4 registers.
+ struct VectorListOp {
+ unsigned RegNum;
+ unsigned Count;
+ A64Layout::VectorLayout Layout;
+ };
+
struct SysRegOp {
const char *Data;
unsigned Length;
@@ -205,6 +219,7 @@ private:
struct ImmOp Imm;
struct RegOp Reg;
struct ShiftExtendOp ShiftExtend;
+ struct VectorListOp VectorList;
struct SysRegOp SysReg;
struct TokOp Tok;
};
@@ -664,6 +679,44 @@ public:
return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;
}
+ // if 0 < value <= w, return true
+ bool isShrFixedWidth(int w) const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= w;
+ }
+
+ bool isShrImm8() const { return isShrFixedWidth(8); }
+
+ bool isShrImm16() const { return isShrFixedWidth(16); }
+
+ bool isShrImm32() const { return isShrFixedWidth(32); }
+
+ bool isShrImm64() const { return isShrFixedWidth(64); }
+
+ // if 0 <= value < w, return true
+ bool isShlFixedWidth(int w) const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < w;
+ }
+
+ bool isShlImm8() const { return isShlFixedWidth(8); }
+
+ bool isShlImm16() const { return isShlFixedWidth(16); }
+
+ bool isShlImm32() const { return isShlFixedWidth(32); }
+
+ bool isShlImm64() const { return isShlFixedWidth(64); }
+
bool isNeonMovImmShiftLSL() const {
if (!isShiftOrExtend())
return false;
@@ -697,6 +750,12 @@ public:
return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16;
}
+ template <A64Layout::VectorLayout Layout, unsigned Count>
+ bool isVectorList() const {
+ return Kind == k_VectorList && VectorList.Layout == Layout &&
+ VectorList.Count == Count;
+ }
+
template <int MemSize> bool isSImm7Scaled() const {
if (!isImm())
return false;
@@ -756,6 +815,17 @@ public:
return true;
}
+ // if value == N, return true
+ template<int N>
+ bool isExactImm() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() == N;
+ }
+
static AArch64Operand *CreateImmWithLSL(const MCExpr *Val,
unsigned ShiftAmount,
bool ImplicitAmount,
@@ -817,6 +887,18 @@ public:
return Op;
}
+ static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
+ A64Layout::VectorLayout Layout,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_VectorList, S, E);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.Layout = Layout;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static AArch64Operand *CreateToken(StringRef Str, SMLoc S) {
AArch64Operand *Op = new AArch64Operand(k_Token, S, S);
Op->Tok.Data = Str.data();
@@ -1164,6 +1246,11 @@ public:
}
Inst.addOperand(MCOperand::CreateImm(Imm));
}
+
+ void addVectorListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+ }
};
} // end anonymous namespace.
@@ -1203,7 +1290,6 @@ AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
else
return MatchOperand_Success;
}
-
// ... or it might be a symbolish thing
}
// Fall through
@@ -1247,7 +1333,7 @@ AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return ParseOperand(Operands, Mnemonic);
}
// The following will likely be useful later, but not in very early cases
- case AsmToken::LCurly: // Weird SIMD lists
+ case AsmToken::LCurly: // SIMD vector list is not parsed here
llvm_unreachable("Don't know how to deal with '{' in operand");
return MatchOperand_ParseFail;
}
@@ -1405,7 +1491,7 @@ AArch64AsmParser::ParseImmWithLSLOperand(
// The optional operand must be "lsl #N" where N is non-negative.
if (Parser.getTok().is(AsmToken::Identifier)
- && Parser.getTok().getIdentifier().lower() == "lsl") {
+ && Parser.getTok().getIdentifier().equals_lower("lsl")) {
Parser.Lex();
if (Parser.getTok().is(AsmToken::Hash)) {
@@ -1462,9 +1548,8 @@ AArch64AsmParser::ParseCRxOperand(
return MatchOperand_ParseFail;
}
- std::string LowerTok = Parser.getTok().getIdentifier().lower();
- StringRef Tok(LowerTok);
- if (Tok[0] != 'c') {
+ StringRef Tok = Parser.getTok().getIdentifier();
+ if (Tok[0] != 'c' && Tok[0] != 'C') {
Error(S, "Expected cN operand where 0 <= N <= 15");
return MatchOperand_ParseFail;
}
@@ -1536,22 +1621,11 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
std::string LowerReg = Tok.getString().lower();
size_t DotPos = LowerReg.find('.');
- RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
- if (RegNum == AArch64::NoRegister) {
- RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
- .Case("ip0", AArch64::X16)
- .Case("ip1", AArch64::X17)
- .Case("fp", AArch64::X29)
- .Case("lr", AArch64::X30)
- .Default(AArch64::NoRegister);
- }
- if (RegNum == AArch64::NoRegister)
- return false;
-
+ bool IsVec128 = false;
SMLoc S = Tok.getLoc();
RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos);
- if (DotPos == StringRef::npos) {
+ if (DotPos == std::string::npos) {
Layout = StringRef();
} else {
// Everything afterwards needs to be a literal token, expected to be
@@ -1561,20 +1635,78 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
// gives us a permanent string to use in the token (a pointer into LowerReg
// would go out of scope when we return).
LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1);
- std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos);
+ StringRef LayoutText = StringRef(LowerReg).substr(DotPos);
+
+ // See if it's a 128-bit layout first.
Layout = StringSwitch<const char *>(LayoutText)
- .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d")
- .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s")
- .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h")
- .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b")
+ .Case(".q", ".q").Case(".1q", ".1q")
+ .Case(".d", ".d").Case(".2d", ".2d")
+ .Case(".s", ".s").Case(".4s", ".4s")
+ .Case(".h", ".h").Case(".8h", ".8h")
+ .Case(".b", ".b").Case(".16b", ".16b")
.Default("");
+ if (Layout.size() != 0)
+ IsVec128 = true;
+ else {
+ Layout = StringSwitch<const char *>(LayoutText)
+ .Case(".1d", ".1d")
+ .Case(".2s", ".2s")
+ .Case(".4h", ".4h")
+ .Case(".8b", ".8b")
+ .Default("");
+ }
+
if (Layout.size() == 0) {
- // Malformed register
+ // If we've still not pinned it down the register is malformed.
return false;
}
}
+ RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
+ if (RegNum == AArch64::NoRegister) {
+ RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
+ .Case("ip0", AArch64::X16)
+ .Case("ip1", AArch64::X17)
+ .Case("fp", AArch64::X29)
+ .Case("lr", AArch64::X30)
+ .Case("v0", IsVec128 ? AArch64::Q0 : AArch64::D0)
+ .Case("v1", IsVec128 ? AArch64::Q1 : AArch64::D1)
+ .Case("v2", IsVec128 ? AArch64::Q2 : AArch64::D2)
+ .Case("v3", IsVec128 ? AArch64::Q3 : AArch64::D3)
+ .Case("v4", IsVec128 ? AArch64::Q4 : AArch64::D4)
+ .Case("v5", IsVec128 ? AArch64::Q5 : AArch64::D5)
+ .Case("v6", IsVec128 ? AArch64::Q6 : AArch64::D6)
+ .Case("v7", IsVec128 ? AArch64::Q7 : AArch64::D7)
+ .Case("v8", IsVec128 ? AArch64::Q8 : AArch64::D8)
+ .Case("v9", IsVec128 ? AArch64::Q9 : AArch64::D9)
+ .Case("v10", IsVec128 ? AArch64::Q10 : AArch64::D10)
+ .Case("v11", IsVec128 ? AArch64::Q11 : AArch64::D11)
+ .Case("v12", IsVec128 ? AArch64::Q12 : AArch64::D12)
+ .Case("v13", IsVec128 ? AArch64::Q13 : AArch64::D13)
+ .Case("v14", IsVec128 ? AArch64::Q14 : AArch64::D14)
+ .Case("v15", IsVec128 ? AArch64::Q15 : AArch64::D15)
+ .Case("v16", IsVec128 ? AArch64::Q16 : AArch64::D16)
+ .Case("v17", IsVec128 ? AArch64::Q17 : AArch64::D17)
+ .Case("v18", IsVec128 ? AArch64::Q18 : AArch64::D18)
+ .Case("v19", IsVec128 ? AArch64::Q19 : AArch64::D19)
+ .Case("v20", IsVec128 ? AArch64::Q20 : AArch64::D20)
+ .Case("v21", IsVec128 ? AArch64::Q21 : AArch64::D21)
+ .Case("v22", IsVec128 ? AArch64::Q22 : AArch64::D22)
+ .Case("v23", IsVec128 ? AArch64::Q23 : AArch64::D23)
+ .Case("v24", IsVec128 ? AArch64::Q24 : AArch64::D24)
+ .Case("v25", IsVec128 ? AArch64::Q25 : AArch64::D25)
+ .Case("v26", IsVec128 ? AArch64::Q26 : AArch64::D26)
+ .Case("v27", IsVec128 ? AArch64::Q27 : AArch64::D27)
+ .Case("v28", IsVec128 ? AArch64::Q28 : AArch64::D28)
+ .Case("v29", IsVec128 ? AArch64::Q29 : AArch64::D29)
+ .Case("v30", IsVec128 ? AArch64::Q30 : AArch64::D30)
+ .Case("v31", IsVec128 ? AArch64::Q31 : AArch64::D31)
+ .Default(AArch64::NoRegister);
+ }
+ if (RegNum == AArch64::NoRegister)
+ return false;
+
return true;
}
@@ -1606,6 +1738,7 @@ AArch64AsmParser::ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
case 'h': NumLanes = 8; break;
case 's': NumLanes = 4; break;
case 'd': NumLanes = 2; break;
+ case 'q': NumLanes = 1; break;
}
}
@@ -1824,6 +1957,148 @@ AArch64AsmParser::ParseShiftExtend(
return MatchOperand_Success;
}
+/// Try to parse a vector register token, If it is a vector register,
+/// the token is eaten and return true. Otherwise return false.
+bool AArch64AsmParser::TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc,
+ StringRef &Layout, SMLoc &LayoutLoc) {
+ bool IsVector = true;
+
+ if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc))
+ IsVector = false;
+ else if (!AArch64MCRegisterClasses[AArch64::FPR64RegClassID]
+ .contains(RegNum) &&
+ !AArch64MCRegisterClasses[AArch64::FPR128RegClassID]
+ .contains(RegNum))
+ IsVector = false;
+ else if (Layout.size() == 0)
+ IsVector = false;
+
+ if (!IsVector)
+ Error(Parser.getTok().getLoc(), "expected vector type register");
+
+ Parser.Lex(); // Eat this token.
+ return IsVector;
+}
+
+
+// A vector list contains 1-4 consecutive registers.
+// Now there are two kinds of vector list when number of vector > 1:
+// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout}
+// (2) {Vn.layout - Vm.layout}
+// If the layout is like .b/.h/.s/.d, also parse the lane.
+AArch64AsmParser::OperandMatchResultTy AArch64AsmParser::ParseVectorList(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ if (Parser.getTok().isNot(AsmToken::LCurly)) {
+ Error(Parser.getTok().getLoc(), "'{' expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc SLoc = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '{' token.
+
+ unsigned Reg, Count = 1;
+ StringRef LayoutStr;
+ SMLoc RegEndLoc, LayoutLoc;
+ if (!TryParseVector(Reg, RegEndLoc, LayoutStr, LayoutLoc))
+ return MatchOperand_ParseFail;
+
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the minus.
+
+ unsigned Reg2;
+ StringRef LayoutStr2;
+ SMLoc RegEndLoc2, LayoutLoc2;
+ SMLoc RegLoc2 = Parser.getTok().getLoc();
+
+ if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2))
+ return MatchOperand_ParseFail;
+ unsigned Space = (Reg < Reg2) ? (Reg2 - Reg) : (Reg2 + 32 - Reg);
+
+ if (LayoutStr != LayoutStr2) {
+ Error(LayoutLoc2, "expected the same vector layout");
+ return MatchOperand_ParseFail;
+ }
+ if (Space == 0 || Space > 3) {
+ Error(RegLoc2, "invalid number of vectors");
+ return MatchOperand_ParseFail;
+ }
+
+ Count += Space;
+ } else {
+ unsigned LastReg = Reg;
+ while (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+ unsigned Reg2;
+ StringRef LayoutStr2;
+ SMLoc RegEndLoc2, LayoutLoc2;
+ SMLoc RegLoc2 = Parser.getTok().getLoc();
+
+ if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2))
+ return MatchOperand_ParseFail;
+ unsigned Space = (LastReg < Reg2) ? (Reg2 - LastReg)
+ : (Reg2 + 32 - LastReg);
+ Count++;
+
+ // The space between two vectors should be 1. And they should have the same layout.
+ // Total count shouldn't be great than 4
+ if (Space != 1) {
+ Error(RegLoc2, "invalid space between two vectors");
+ return MatchOperand_ParseFail;
+ }
+ if (LayoutStr != LayoutStr2) {
+ Error(LayoutLoc2, "expected the same vector layout");
+ return MatchOperand_ParseFail;
+ }
+ if (Count > 4) {
+ Error(RegLoc2, "invalid number of vectors");
+ return MatchOperand_ParseFail;
+ }
+
+ LastReg = Reg2;
+ }
+ }
+
+ if (Parser.getTok().isNot(AsmToken::RCurly)) {
+ Error(Parser.getTok().getLoc(), "'}' expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc ELoc = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '}' token.
+
+ A64Layout::VectorLayout Layout = A64StringToVectorLayout(LayoutStr);
+ if (Count > 1) { // If count > 1, create vector list using super register.
+ bool IsVec64 = (Layout < A64Layout::VL_16B);
+ static unsigned SupRegIDs[3][2] = {
+ { AArch64::QPairRegClassID, AArch64::DPairRegClassID },
+ { AArch64::QTripleRegClassID, AArch64::DTripleRegClassID },
+ { AArch64::QQuadRegClassID, AArch64::DQuadRegClassID }
+ };
+ unsigned SupRegID = SupRegIDs[Count - 2][static_cast<int>(IsVec64)];
+ unsigned Sub0 = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0;
+ const MCRegisterInfo *MRI = getContext().getRegisterInfo();
+ Reg = MRI->getMatchingSuperReg(Reg, Sub0,
+ &AArch64MCRegisterClasses[SupRegID]);
+ }
+ Operands.push_back(
+ AArch64Operand::CreateVectorList(Reg, Count, Layout, SLoc, ELoc));
+
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ uint32_t NumLanes = 0;
+ switch(Layout) {
+ case A64Layout::VL_B : NumLanes = 16; break;
+ case A64Layout::VL_H : NumLanes = 8; break;
+ case A64Layout::VL_S : NumLanes = 4; break;
+ case A64Layout::VL_D : NumLanes = 2; break;
+ default:
+ SMLoc Loc = getLexer().getLoc();
+ Error(Loc, "expected comma before next operand");
+ return MatchOperand_ParseFail;
+ }
+ return ParseNEONLane(Operands, NumLanes);
+ } else {
+ return MatchOperand_Success;
+ }
+}
+
// FIXME: We would really like to be able to tablegen'erate this.
bool AArch64AsmParser::
validateInstruction(MCInst &Inst,
@@ -2240,6 +2515,30 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_Width64:
return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
"expected integer in range [<lsb>, 63]");
+ case Match_ShrImm8:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 8]");
+ case Match_ShrImm16:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 16]");
+ case Match_ShrImm32:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 32]");
+ case Match_ShrImm64:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 64]");
+ case Match_ShlImm8:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 7]");
+ case Match_ShlImm16:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 15]");
+ case Match_ShlImm32:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 31]");
+ case Match_ShlImm64:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 63]");
}
llvm_unreachable("Implement any new match types added!");
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index a88a8e8..be4d7f2 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -82,15 +82,38 @@ static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeFPR128LoRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
unsigned OptionHiS,
@@ -113,6 +136,30 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
template<int RegWidth>
static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
unsigned FullImm,
@@ -183,6 +230,17 @@ static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
static bool Check(DecodeStatus &Out, DecodeStatus In);
@@ -331,6 +389,14 @@ DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus
+DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ return DecodeFPR64RegisterClass(Inst, RegNo, Address, Decoder);
+}
static DecodeStatus
DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
@@ -343,28 +409,80 @@ DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
- if (RegNo > 31)
+static DecodeStatus
+DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
return MCDisassembler::Fail;
- uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo);
+ return DecodeFPR128RegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 30)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR64noxzrRegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Register));
return MCDisassembler::Success;
}
-static DecodeStatus
-DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeRegisterClassByID(llvm::MCInst &Inst, unsigned RegNo,
+ unsigned RegID,
+ const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
- uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo);
+ uint16_t Register = getReg(Decoder, RegID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Register));
return MCDisassembler::Success;
}
+static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClassByID(Inst, RegNo, AArch64::DPairRegClassID,
+ Decoder);
+}
+
+static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClassByID(Inst, RegNo, AArch64::QPairRegClassID,
+ Decoder);
+}
+
+static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClassByID(Inst, RegNo, AArch64::DTripleRegClassID,
+ Decoder);
+}
+
+static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClassByID(Inst, RegNo, AArch64::QTripleRegClassID,
+ Decoder);
+}
+
+static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClassByID(Inst, RegNo, AArch64::DQuadRegClassID,
+ Decoder);
+}
+
+static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClassByID(Inst, RegNo, AArch64::QQuadRegClassID,
+ Decoder);
+}
+
static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
unsigned OptionHiS,
uint64_t Address,
@@ -413,7 +531,73 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(8 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(16 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(32 - Val));
+ return MCDisassembler::Success;
+}
+static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(64 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val > 7)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val > 15)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val > 31)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val > 63)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
template<int RegWidth>
static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
@@ -570,11 +754,11 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned IsToVec = fieldFromInstruction(Insn, 16, 1);
if (IsToVec) {
- DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder);
DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
} else {
DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
- DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder);
}
// Add the lane
@@ -838,3 +1022,551 @@ DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount,
Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
return MCDisassembler::Success;
}
+
+// Decode post-index vector load/store instructions.
+// This is necessary as we need to decode Rm: if Rm == 0b11111, the last
+// operand is an immediate equal the the length of vector list in bytes,
+// or Rm is decoded to a GPR64noxzr register.
+static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned Rm = fieldFromInstruction(Insn, 16, 5);
+ unsigned Opcode = fieldFromInstruction(Insn, 12, 4);
+ unsigned IsLoad = fieldFromInstruction(Insn, 22, 1);
+ // 0 for 64bit vector list, 1 for 128bit vector list
+ unsigned Is128BitVec = fieldFromInstruction(Insn, 30, 1);
+
+ unsigned NumVecs;
+ switch (Opcode) {
+ case 0: // ld4/st4
+ case 2: // ld1/st1 with 4 vectors
+ NumVecs = 4; break;
+ case 4: // ld3/st3
+ case 6: // ld1/st1 with 3 vectors
+ NumVecs = 3; break;
+ case 7: // ld1/st1 with 1 vector
+ NumVecs = 1; break;
+ case 8: // ld2/st2
+ case 10: // ld1/st1 with 2 vectors
+ NumVecs = 2; break;
+ default:
+ llvm_unreachable("Invalid opcode for post-index load/store instructions");
+ }
+
+ // Decode vector list of 1/2/3/4 vectors for load instructions.
+ if (IsLoad) {
+ switch (NumVecs) {
+ case 1:
+ Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 4:
+ Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ }
+ }
+
+ // Decode write back register, which is equal to Rn.
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+
+ if (Rm == 31) // If Rm is 0x11111, add the vector list length in byte
+ Inst.addOperand(MCOperand::CreateImm(NumVecs * (Is128BitVec ? 16 : 8)));
+ else // Decode Rm
+ DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder);
+
+ // Decode vector list of 1/2/3/4 vectors for load instructions.
+ if (!IsLoad) {
+ switch (NumVecs) {
+ case 1:
+ Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 4:
+ Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ }
+ }
+
+ return MCDisassembler::Success;
+}
+
+// Decode post-index vector load/store lane instructions.
+// This is necessary as we need to decode Rm: if Rm == 0b11111, the last
+// operand is an immediate equal the the length of the changed bytes,
+// or Rm is decoded to a GPR64noxzr register.
+static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ bool Is64bitVec = false;
+ bool IsLoadDup = false;
+ bool IsLoad = false;
+ // The total number of bytes transferred.
+ // TransferBytes = NumVecs * OneLaneBytes
+ unsigned TransferBytes = 0;
+ unsigned NumVecs = 0;
+ unsigned Opc = Inst.getOpcode();
+ switch (Opc) {
+ case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register:
+ case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register:
+ case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register:
+ case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register: {
+ switch (Opc) {
+ case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register:
+ TransferBytes = 1; break;
+ case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register:
+ TransferBytes = 2; break;
+ case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register:
+ TransferBytes = 4; break;
+ case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register:
+ TransferBytes = 8; break;
+ }
+ Is64bitVec = true;
+ IsLoadDup = true;
+ NumVecs = 1;
+ break;
+ }
+
+ case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register:
+ case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register:
+ case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register:
+ case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register: {
+ switch (Opc) {
+ case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register:
+ TransferBytes = 1; break;
+ case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register:
+ TransferBytes = 2; break;
+ case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register:
+ TransferBytes = 4; break;
+ case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register:
+ TransferBytes = 8; break;
+ }
+ IsLoadDup = true;
+ NumVecs = 1;
+ break;
+ }
+
+ case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register:
+ case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register:
+ case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register:
+ case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register: {
+ switch (Opc) {
+ case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register:
+ TransferBytes = 2; break;
+ case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register:
+ TransferBytes = 4; break;
+ case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register:
+ TransferBytes = 8; break;
+ case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register:
+ TransferBytes = 16; break;
+ }
+ Is64bitVec = true;
+ IsLoadDup = true;
+ NumVecs = 2;
+ break;
+ }
+
+ case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register:
+ case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register:
+ case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register:
+ case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register: {
+ switch (Opc) {
+ case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register:
+ TransferBytes = 2; break;
+ case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register:
+ TransferBytes = 4; break;
+ case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register:
+ TransferBytes = 8; break;
+ case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register:
+ TransferBytes = 16; break;
+ }
+ IsLoadDup = true;
+ NumVecs = 2;
+ break;
+ }
+
+ case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register:
+ case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register:
+ case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register:
+ case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register: {
+ switch (Opc) {
+ case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register:
+ TransferBytes = 3; break;
+ case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register:
+ TransferBytes = 6; break;
+ case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register:
+ TransferBytes = 12; break;
+ case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register:
+ TransferBytes = 24; break;
+ }
+ Is64bitVec = true;
+ IsLoadDup = true;
+ NumVecs = 3;
+ break;
+ }
+
+ case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register:
+ case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_8H_register:
+ case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_4S_register:
+ case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register: {
+ switch (Opc) {
+ case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register:
+ TransferBytes = 3; break;
+ case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_8H_register:
+ TransferBytes = 6; break;
+ case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_4S_register:
+ TransferBytes = 12; break;
+ case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register:
+ TransferBytes = 24; break;
+ }
+ IsLoadDup = true;
+ NumVecs = 3;
+ break;
+ }
+
+ case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register:
+ case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register:
+ case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register:
+ case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register: {
+ switch (Opc) {
+ case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register:
+ TransferBytes = 4; break;
+ case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register:
+ TransferBytes = 8; break;
+ case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register:
+ TransferBytes = 16; break;
+ case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register:
+ TransferBytes = 32; break;
+ }
+ Is64bitVec = true;
+ IsLoadDup = true;
+ NumVecs = 4;
+ break;
+ }
+
+ case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register:
+ case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_8H_register:
+ case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_4S_register:
+ case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register: {
+ switch (Opc) {
+ case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register:
+ TransferBytes = 4; break;
+ case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_8H_register:
+ TransferBytes = 8; break;
+ case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_4S_register:
+ TransferBytes = 16; break;
+ case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register:
+ TransferBytes = 32; break;
+ }
+ IsLoadDup = true;
+ NumVecs = 4;
+ break;
+ }
+
+ case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register:
+ case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register:
+ case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register:
+ case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register:
+ TransferBytes = 1; break;
+ case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register:
+ TransferBytes = 2; break;
+ case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register:
+ TransferBytes = 4; break;
+ case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register:
+ TransferBytes = 8; break;
+ }
+ IsLoad = true;
+ NumVecs = 1;
+ break;
+ }
+
+ case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register:
+ case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register:
+ case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register:
+ case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register:
+ TransferBytes = 2; break;
+ case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register:
+ TransferBytes = 4; break;
+ case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register:
+ TransferBytes = 8; break;
+ case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register:
+ TransferBytes = 16; break;
+ }
+ IsLoad = true;
+ NumVecs = 2;
+ break;
+ }
+
+ case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register:
+ case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register:
+ case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register:
+ case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register:
+ TransferBytes = 3; break;
+ case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register:
+ TransferBytes = 6; break;
+ case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register:
+ TransferBytes = 12; break;
+ case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register:
+ TransferBytes = 24; break;
+ }
+ IsLoad = true;
+ NumVecs = 3;
+ break;
+ }
+
+ case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register:
+ case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register:
+ case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register:
+ case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register:
+ TransferBytes = 4; break;
+ case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register:
+ TransferBytes = 8; break;
+ case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register:
+ TransferBytes = 16; break;
+ case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register:
+ TransferBytes = 32; break;
+ }
+ IsLoad = true;
+ NumVecs = 4;
+ break;
+ }
+
+ case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register:
+ case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register:
+ case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register:
+ case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register:
+ TransferBytes = 1; break;
+ case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register:
+ TransferBytes = 2; break;
+ case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register:
+ TransferBytes = 4; break;
+ case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register:
+ TransferBytes = 8; break;
+ }
+ NumVecs = 1;
+ break;
+ }
+
+ case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register:
+ case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register:
+ case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register:
+ case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register:
+ TransferBytes = 2; break;
+ case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register:
+ TransferBytes = 4; break;
+ case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register:
+ TransferBytes = 8; break;
+ case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register:
+ TransferBytes = 16; break;
+ }
+ NumVecs = 2;
+ break;
+ }
+
+ case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register:
+ case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register:
+ case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register:
+ case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register:
+ TransferBytes = 3; break;
+ case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register:
+ TransferBytes = 6; break;
+ case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register:
+ TransferBytes = 12; break;
+ case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register:
+ TransferBytes = 24; break;
+ }
+ NumVecs = 3;
+ break;
+ }
+
+ case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register:
+ case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register:
+ case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register:
+ case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register:
+ TransferBytes = 4; break;
+ case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register:
+ TransferBytes = 8; break;
+ case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register:
+ TransferBytes = 16; break;
+ case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register:
+ TransferBytes = 32; break;
+ }
+ NumVecs = 4;
+ break;
+ }
+
+ default:
+ return MCDisassembler::Fail;
+ } // End of switch (Opc)
+
+ unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned Rm = fieldFromInstruction(Insn, 16, 5);
+
+ // Decode post-index of load duplicate lane
+ if (IsLoadDup) {
+ switch (NumVecs) {
+ case 1:
+ Is64bitVec ? DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ Is64bitVec ? DecodeDPairRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeQPairRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ Is64bitVec ? DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 4:
+ Is64bitVec ? DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder);
+ }
+
+ // Decode write back register, which is equal to Rn.
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+
+ if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes
+ Inst.addOperand(MCOperand::CreateImm(TransferBytes));
+ else // Decode Rm
+ DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder);
+
+ return MCDisassembler::Success;
+ }
+
+ // Decode post-index of load/store lane
+ // Loads have a vector list as output.
+ if (IsLoad) {
+ switch (NumVecs) {
+ case 1:
+ DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ DecodeQPairRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 4:
+ DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder);
+ }
+ }
+
+ // Decode write back register, which is equal to Rn.
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+
+ if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes
+ Inst.addOperand(MCOperand::CreateImm(TransferBytes));
+ else // Decode Rm
+ DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder);
+
+ // Decode the source vector list.
+ switch (NumVecs) {
+ case 1:
+ DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ DecodeQPairRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 4:
+ DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder);
+ }
+
+ // Decode lane
+ unsigned Q = fieldFromInstruction(Insn, 30, 1);
+ unsigned S = fieldFromInstruction(Insn, 10, 3);
+ unsigned lane = 0;
+ // Calculate the number of lanes by number of vectors and transfered bytes.
+ // NumLanes = 16 bytes / bytes of each lane
+ unsigned NumLanes = 16 / (TransferBytes / NumVecs);
+ switch (NumLanes) {
+ case 16: // A vector has 16 lanes, each lane is 1 bytes.
+ lane = (Q << 3) | S;
+ break;
+ case 8:
+ lane = (Q << 2) | (S >> 1);
+ break;
+ case 4:
+ lane = (Q << 1) | (S >> 2);
+ break;
+ case 2:
+ lane = Q;
+ break;
+ }
+ Inst.addOperand(MCOperand::CreateImm(lane));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned size = fieldFromInstruction(Insn, 22, 2);
+ unsigned Q = fieldFromInstruction(Insn, 30, 1);
+
+ DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder);
+
+ if(Q)
+ DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder);
+ else
+ DecodeFPR64RegisterClass(Inst, Rn, Address, Decoder);
+
+ switch (size) {
+ case 0:
+ Inst.addOperand(MCOperand::CreateImm(8));
+ break;
+ case 1:
+ Inst.addOperand(MCOperand::CreateImm(16));
+ break;
+ case 2:
+ Inst.addOperand(MCOperand::CreateImm(32));
+ break;
+ default :
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+}
+
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index b624331..0438de3 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -368,6 +368,14 @@ AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum,
O << "#" << (Imm * MemScale);
}
+void AArch64InstPrinter::printVPRRegister(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNo).getReg();
+ std::string Name = getRegisterName(Reg);
+ Name[0] = 'v';
+ O << Name;
+}
+
void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
@@ -454,8 +462,8 @@ void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum,
o << "#0x0";
}
-void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+void AArch64InstPrinter::printUImmHexOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MOUImm = MI->getOperand(OpNum);
assert(MOUImm.isImm() &&
@@ -467,6 +475,18 @@ void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum,
O.write_hex(Imm);
}
+void AArch64InstPrinter::printUImmBareOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MOUImm = MI->getOperand(OpNum);
+
+ assert(MOUImm.isImm()
+ && "Immediate operand required for Neon vector immediate inst.");
+
+ unsigned Imm = MOUImm.getImm();
+ O << Imm;
+}
+
void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
@@ -487,3 +507,33 @@ void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI,
O << "#0x";
O.write_hex(Mask);
}
+
+// If Count > 1, there are two valid kinds of vector list:
+// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout}
+// (2) {Vn.layout - Vm.layout}
+// We choose the first kind as output.
+template <A64Layout::VectorLayout Layout, unsigned Count>
+void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ assert(Count >= 1 && Count <= 4 && "Invalid Number of Vectors");
+
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ std::string LayoutStr = A64VectorLayoutToString(Layout);
+ O << "{";
+ if (Count > 1) { // Print sub registers separately
+ bool IsVec64 = (Layout < A64Layout::VL_16B);
+ unsigned SubRegIdx = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0;
+ for (unsigned I = 0; I < Count; I++) {
+ std::string Name = getRegisterName(MRI.getSubReg(Reg, SubRegIdx++));
+ Name[0] = 'v';
+ O << Name << LayoutStr;
+ if (I != Count - 1)
+ O << ", ";
+ }
+ } else { // Print the register directly when NumVecs is 1.
+ std::string Name = getRegisterName(Reg);
+ Name[0] = 'v';
+ O << Name << LayoutStr;
+ }
+ O << "}";
+}
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index f7439be..37b7273 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -157,6 +157,7 @@ public:
void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O, A64SE::ShiftExtSpecifiers Ext);
+ void printVPRRegister(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
@@ -168,9 +169,13 @@ public:
void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printUImmHexOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printUImmBareOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+
+ template <A64Layout::VectorLayout Layout, unsigned Count>
+ void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
};
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index a3373b1..8a9077c 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -578,8 +578,8 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
}
MCAsmBackend *
-llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+llvm::createAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
-
return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS());
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 104e4d2..a64c463 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -55,11 +55,10 @@ namespace {
/// by MachO. Beware!
class AArch64ELFStreamer : public MCELFStreamer {
public:
- AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter)
- : MCELFStreamer(Context, TAB, OS, Emitter),
- MappingSymbolCounter(0), LastEMS(EMS_None) {
- }
+ AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter)
+ : MCELFStreamer(Context, 0, TAB, OS, Emitter), MappingSymbolCounter(0),
+ LastEMS(EMS_None) {}
~AArch64ELFStreamer() {}
@@ -129,7 +128,7 @@ private:
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
- Symbol->setSection(*getCurrentSection().first);
+ AssignSection(Symbol, getCurrentSection().first);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 8ec8cbf..add874c 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -31,11 +31,12 @@ AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() {
UseDataRegionDirectives = true;
- WeakRefDirective = "\t.weak\t";
-
HasLEB128 = true;
SupportsDebugInformation = true;
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
}
+
+// Pin the vtable to this file.
+void AArch64ELFMCAsmInfo::anchor() {}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index a20bc47..d1dd285 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -14,13 +14,15 @@
#ifndef LLVM_AARCH64TARGETASMINFO_H
#define LLVM_AARCH64TARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- struct AArch64ELFMCAsmInfo : public MCAsmInfo {
- explicit AArch64ELFMCAsmInfo();
- };
+struct AArch64ELFMCAsmInfo : public MCAsmInfoELF {
+ explicit AArch64ELFMCAsmInfo();
+private:
+ virtual void anchor();
+};
} // namespace llvm
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index b9770b3..b41c566 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -59,6 +59,23 @@ public:
unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRightImm8(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRightImm16(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRightImm32(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRightImm64(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getShiftLeftImm8(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftLeftImm16(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftLeftImm32(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftLeftImm64(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
// Labels are handled mostly the same way: a symbol is needed, and
// just gets some fixup attached.
@@ -310,6 +327,45 @@ AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6;
}
+unsigned AArch64MCCodeEmitter::getShiftRightImm8(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return 8 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftRightImm16(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return 16 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftRightImm32(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return 32 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftRightImm64(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return 64 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftLeftImm8(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 8;
+}
+
+unsigned AArch64MCCodeEmitter::getShiftLeftImm16(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 16;
+}
+
+unsigned AArch64MCCodeEmitter::getShiftLeftImm32(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 32;
+}
+
+unsigned AArch64MCCodeEmitter::getShiftLeftImm64(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 64;
+}
template<AArch64::Fixups fixupDesired> unsigned
AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 3849fe3..670e657 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -43,8 +43,9 @@ MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS,
uint8_t OSABI);
-MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT,
- StringRef CPU);
+MCAsmBackend *createAArch64AsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
} // End llvm namespace
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index e675efc..ce970b0 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -306,6 +306,65 @@ namespace A64SE {
};
}
+namespace A64Layout {
+ enum VectorLayout {
+ Invalid = -1,
+ VL_8B,
+ VL_4H,
+ VL_2S,
+ VL_1D,
+
+ VL_16B,
+ VL_8H,
+ VL_4S,
+ VL_2D,
+
+ // Bare layout for the 128-bit vector
+ // (only show ".b", ".h", ".s", ".d" without vector number)
+ VL_B,
+ VL_H,
+ VL_S,
+ VL_D
+ };
+}
+
+inline static const char *
+A64VectorLayoutToString(A64Layout::VectorLayout Layout) {
+ switch (Layout) {
+ case A64Layout::VL_8B: return ".8b";
+ case A64Layout::VL_4H: return ".4h";
+ case A64Layout::VL_2S: return ".2s";
+ case A64Layout::VL_1D: return ".1d";
+ case A64Layout::VL_16B: return ".16b";
+ case A64Layout::VL_8H: return ".8h";
+ case A64Layout::VL_4S: return ".4s";
+ case A64Layout::VL_2D: return ".2d";
+ case A64Layout::VL_B: return ".b";
+ case A64Layout::VL_H: return ".h";
+ case A64Layout::VL_S: return ".s";
+ case A64Layout::VL_D: return ".d";
+ default: llvm_unreachable("Unknown Vector Layout");
+ }
+}
+
+inline static A64Layout::VectorLayout
+A64StringToVectorLayout(StringRef LayoutStr) {
+ return StringSwitch<A64Layout::VectorLayout>(LayoutStr)
+ .Case(".8b", A64Layout::VL_8B)
+ .Case(".4h", A64Layout::VL_4H)
+ .Case(".2s", A64Layout::VL_2S)
+ .Case(".1d", A64Layout::VL_1D)
+ .Case(".16b", A64Layout::VL_16B)
+ .Case(".8h", A64Layout::VL_8H)
+ .Case(".4s", A64Layout::VL_4S)
+ .Case(".2d", A64Layout::VL_2D)
+ .Case(".b", A64Layout::VL_B)
+ .Case(".h", A64Layout::VL_H)
+ .Case(".s", A64Layout::VL_S)
+ .Case(".d", A64Layout::VL_D)
+ .Default(A64Layout::Invalid);
+}
+
namespace A64SysReg {
enum SysRegROValues {
MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index e8c2f7c..ff585b4 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -657,6 +657,13 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
Modified = true;
for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
+ // Make sure to constrain the register class of the new register to
+ // match what we're replacing. Otherwise we can optimize a DPR_VFP2
+ // reference into a plain DPR, and that will end poorly. NewReg is
+ // always virtual here, so there will always be a matching subclass
+ // to find.
+ MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg()));
+
DEBUG(dbgs() << "Replacing operand "
<< **I << " with "
<< PrintReg(NewReg) << "\n");
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index e5da3a5..36e5680 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -45,7 +45,7 @@ def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
"Enable VFP4 instructions",
[FeatureVFP3, FeatureFP16]>;
-def FeatureV8FP : SubtargetFeature<"v8fp", "HasV8FP",
+def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
"true", "Enable ARMv8 FP",
[FeatureVFP4]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
@@ -67,6 +67,11 @@ def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable support for Performance Monitor extensions">;
def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
"Enable support for TrustZone security extensions">;
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+ "Enable support for Cryptography extensions",
+ [FeatureNEON]>;
+def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
+ "Enable support for CRC instructions">;
// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
@@ -114,10 +119,24 @@ def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
"Supports Multiprocessing extension">;
-// M-series ISA?
-def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true",
+// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8).
+def FeatureVirtualization : SubtargetFeature<"virtualization",
+ "HasVirtualization", "true",
+ "Supports Virtualization extension",
+ [FeatureHWDiv, FeatureHWDivARM]>;
+
+// M-series ISA
+def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
"Is microcontroller profile ('M' series)">;
+// R-series ISA
+def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass",
+ "Is realtime profile ('R' series)">;
+
+// A-series ISA
+def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
+ "Is application profile ('A' series)">;
+
// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
// See ARMInstrInfo.td for details.
def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
@@ -135,15 +154,19 @@ def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true",
def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
"Support ARM v6 instructions",
[HasV5TEOps]>;
+def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true",
+ "Support ARM v6M instructions",
+ [HasV6Ops]>;
def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
"Support ARM v6t2 instructions",
- [HasV6Ops, FeatureThumb2]>;
+ [HasV6MOps, FeatureThumb2]>;
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
[HasV6T2Ops, FeaturePerfMon]>;
def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
- [HasV7Ops]>;
+ [HasV7Ops, FeatureVirtualization,
+ FeatureMP]>;
//===----------------------------------------------------------------------===//
// ARM Processors supported.
@@ -179,9 +202,23 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
// FIXME: It has not been determined if A15 has these features.
def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
"Cortex-A15 ARM processors",
- [FeatureT2XtPk, FeatureFP16, FeatureVFP4,
+ [FeatureT2XtPk, FeatureVFP4,
+ FeatureMP, FeatureHWDiv, FeatureHWDivARM,
FeatureAvoidPartialCPSR,
- FeatureTrustZone]>;
+ FeatureTrustZone, FeatureVirtualization]>;
+
+def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
+ "Cortex-A53 ARM processors",
+ [FeatureHWDiv, FeatureHWDivARM,
+ FeatureTrustZone, FeatureT2XtPk,
+ FeatureCrypto, FeatureCRC]>;
+
+def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
+ "Cortex-A57 ARM processors",
+ [FeatureHWDiv, FeatureHWDivARM,
+ FeatureTrustZone, FeatureT2XtPk,
+ FeatureCrypto, FeatureCRC]>;
+
def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
"Cortex-R5 ARM processors",
[FeatureSlowFPBrcc,
@@ -243,7 +280,7 @@ def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
FeatureHasSlowFPVMLx]>;
// V6M Processors.
-def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM,
+def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
FeatureDB, FeatureMClass]>;
// V6T2 Processors.
@@ -258,26 +295,30 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
def : ProcessorModel<"cortex-a5", CortexA8Model,
[ProcA5, HasV7Ops, FeatureNEON, FeatureDB,
FeatureVFP4, FeatureDSPThumb2,
- FeatureHasRAS]>;
+ FeatureHasRAS, FeatureAClass]>;
def : ProcessorModel<"cortex-a8", CortexA8Model,
[ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS]>;
+ FeatureDSPThumb2, FeatureHasRAS,
+ FeatureAClass]>;
def : ProcessorModel<"cortex-a9", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS]>;
+ FeatureDSPThumb2, FeatureHasRAS,
+ FeatureAClass]>;
def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureMP,
- FeatureHasRAS]>;
+ FeatureHasRAS, FeatureAClass]>;
// FIXME: A15 has currently the same ProcessorModel as A9.
def : ProcessorModel<"cortex-a15", CortexA9Model,
[ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS]>;
+ FeatureDSPThumb2, FeatureHasRAS,
+ FeatureAClass]>;
// FIXME: R5 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r5", CortexA8Model,
[ProcR5, HasV7Ops, FeatureDB,
FeatureVFP3, FeatureDSPThumb2,
- FeatureHasRAS]>;
+ FeatureHasRAS, FeatureVFPOnlySP,
+ FeatureD16, FeatureRClass]>;
// V7M Processors.
def : ProcNoItin<"cortex-m3", [HasV7Ops,
@@ -289,16 +330,22 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
FeatureHWDiv, FeatureDSPThumb2,
FeatureT2XtPk, FeatureVFP4,
- FeatureVFPOnlySP, FeatureMClass]>;
+ FeatureVFPOnlySP, FeatureD16,
+ FeatureMClass]>;
// Swift uArch Processors.
def : ProcessorModel<"swift", SwiftModel,
[ProcSwift, HasV7Ops, FeatureNEON,
FeatureDB, FeatureDSPThumb2,
- FeatureHasRAS]>;
+ FeatureHasRAS, FeatureAClass]>;
// V8 Processors
-def : ProcNoItin<"cortex-a53", [HasV8Ops]>;
+def : ProcNoItin<"cortex-a53", [ProcA53, HasV8Ops, FeatureAClass,
+ FeatureDB, FeatureFPARMv8,
+ FeatureNEON, FeatureDSPThumb2]>;
+def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass,
+ FeatureDB, FeatureFPARMv8,
+ FeatureNEON, FeatureDSPThumb2]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 13a22b1..e79f88d 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -17,6 +17,7 @@
#include "ARM.h"
#include "ARMBuildAttrs.h"
#include "ARMConstantPoolValue.h"
+#include "ARMFPUName.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
@@ -55,164 +56,6 @@
#include <cctype>
using namespace llvm;
-namespace {
-
- // Per section and per symbol attributes are not supported.
- // To implement them we would need the ability to delay this emission
- // until the assembly file is fully parsed/generated as only then do we
- // know the symbol and section numbers.
- class AttributeEmitter {
- public:
- virtual void MaybeSwitchVendor(StringRef Vendor) = 0;
- virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0;
- virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0;
- virtual void Finish() = 0;
- virtual ~AttributeEmitter() {}
- };
-
- class AsmAttributeEmitter : public AttributeEmitter {
- MCStreamer &Streamer;
-
- public:
- AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {}
- void MaybeSwitchVendor(StringRef Vendor) { }
-
- void EmitAttribute(unsigned Attribute, unsigned Value) {
- Streamer.EmitRawText("\t.eabi_attribute " +
- Twine(Attribute) + ", " + Twine(Value));
- }
-
- void EmitTextAttribute(unsigned Attribute, StringRef String) {
- switch (Attribute) {
- default: llvm_unreachable("Unsupported Text attribute in ASM Mode");
- case ARMBuildAttrs::CPU_name:
- Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower());
- break;
- /* GAS requires .fpu to be emitted regardless of EABI attribute */
- case ARMBuildAttrs::Advanced_SIMD_arch:
- case ARMBuildAttrs::VFP_arch:
- Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower());
- break;
- }
- }
- void Finish() { }
- };
-
- class ObjectAttributeEmitter : public AttributeEmitter {
- // This structure holds all attributes, accounting for
- // their string/numeric value, so we can later emmit them
- // in declaration order, keeping all in the same vector
- struct AttributeItemType {
- enum {
- HiddenAttribute = 0,
- NumericAttribute,
- TextAttribute
- } Type;
- unsigned Tag;
- unsigned IntValue;
- StringRef StringValue;
- } AttributeItem;
-
- MCObjectStreamer &Streamer;
- StringRef CurrentVendor;
- SmallVector<AttributeItemType, 64> Contents;
-
- // Account for the ULEB/String size of each item,
- // not just the number of items
- size_t ContentsSize;
- // FIXME: this should be in a more generic place, but
- // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf
- size_t getULEBSize(int Value) {
- size_t Size = 0;
- do {
- Value >>= 7;
- Size += sizeof(int8_t); // Is this really necessary?
- } while (Value);
- return Size;
- }
-
- public:
- ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
- Streamer(Streamer_), CurrentVendor(""), ContentsSize(0) { }
-
- void MaybeSwitchVendor(StringRef Vendor) {
- assert(!Vendor.empty() && "Vendor cannot be empty.");
-
- if (CurrentVendor.empty())
- CurrentVendor = Vendor;
- else if (CurrentVendor == Vendor)
- return;
- else
- Finish();
-
- CurrentVendor = Vendor;
-
- assert(Contents.size() == 0);
- }
-
- void EmitAttribute(unsigned Attribute, unsigned Value) {
- AttributeItemType attr = {
- AttributeItemType::NumericAttribute,
- Attribute,
- Value,
- StringRef("")
- };
- ContentsSize += getULEBSize(Attribute);
- ContentsSize += getULEBSize(Value);
- Contents.push_back(attr);
- }
-
- void EmitTextAttribute(unsigned Attribute, StringRef String) {
- AttributeItemType attr = {
- AttributeItemType::TextAttribute,
- Attribute,
- 0,
- String
- };
- ContentsSize += getULEBSize(Attribute);
- // String + \0
- ContentsSize += String.size()+1;
-
- Contents.push_back(attr);
- }
-
- void Finish() {
- // Vendor size + Vendor name + '\0'
- const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
-
- // Tag + Tag Size
- const size_t TagHeaderSize = 1 + 4;
-
- Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
- Streamer.EmitBytes(CurrentVendor);
- Streamer.EmitIntValue(0, 1); // '\0'
-
- Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
- Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
-
- // Size should have been accounted for already, now
- // emit each field as its type (ULEB or String)
- for (unsigned int i=0; i<Contents.size(); ++i) {
- AttributeItemType item = Contents[i];
- Streamer.EmitULEB128IntValue(item.Tag);
- switch (item.Type) {
- default: llvm_unreachable("Invalid attribute type");
- case AttributeItemType::NumericAttribute:
- Streamer.EmitULEB128IntValue(item.IntValue);
- break;
- case AttributeItemType::TextAttribute:
- Streamer.EmitBytes(item.StringValue.upper());
- Streamer.EmitIntValue(0, 1); // '\0'
- break;
- }
- }
-
- Contents.clear();
- }
- };
-
-} // end of anonymous namespace
-
/// EmitDwarfRegOp - Emit dwarf register operation.
void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc,
bool Indirect) const {
@@ -302,7 +145,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
assert(GV && "C++ constructor pointer was not a GlobalValue!");
- const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ const MCExpr *E = MCSymbolRefExpr::Create(getSymbol(GV),
(Subtarget->isTargetDarwin()
? MCSymbolRefExpr::VK_None
: MCSymbolRefExpr::VK_ARM_TARGET1),
@@ -363,7 +206,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
(TF & ARMII::MO_HI16))
O << ":upper16:";
- O << *Mang->getSymbol(GV);
+ O << *getSymbol(GV);
printOffset(MO.getOffset(), O);
if (TF == ARMII::MO_PLT)
@@ -496,6 +339,23 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (!FlagsOP.isImm())
return true;
unsigned Flags = FlagsOP.getImm();
+
+ // This operand may not be the one that actually provides the register. If
+ // it's tied to a previous one then we should refer instead to that one
+ // for registers and their classes.
+ unsigned TiedIdx;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, TiedIdx)) {
+ for (OpNum = InlineAsm::MIOp_FirstOperand; TiedIdx; --TiedIdx) {
+ unsigned OpFlags = MI->getOperand(OpNum).getImm();
+ OpNum += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+ Flags = MI->getOperand(OpNum).getImm();
+
+ // Later code expects OpNum to be pointing at the register rather than
+ // the flags.
+ OpNum += 1;
+ }
+
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
unsigned RC;
InlineAsm::hasRegClassConstraint(Flags, RC);
@@ -714,11 +574,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
- // FIXME: This should eventually end up somewhere else where more
- // intelligent flag decisions can be made. For now we are just maintaining
- // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
- if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&OutStreamer))
- MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
}
//===----------------------------------------------------------------------===//
@@ -728,150 +583,150 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// to appear in the .ARM.attributes section in ELF.
// Instead of subclassing the MCELFStreamer, we do the work here.
-void ARMAsmPrinter::emitAttributes() {
-
- emitARMAttributeSection();
-
- /* GAS expect .fpu to be emitted, regardless of VFP build attribute */
- bool emitFPU = false;
- AttributeEmitter *AttrEmitter;
- if (OutStreamer.hasRawTextSupport()) {
- AttrEmitter = new AsmAttributeEmitter(OutStreamer);
- emitFPU = true;
- } else {
- MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
- AttrEmitter = new ObjectAttributeEmitter(O);
- }
-
- AttrEmitter->MaybeSwitchVendor("aeabi");
-
- std::string CPUString = Subtarget->getCPUString();
+static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
+ const ARMSubtarget *Subtarget) {
+ if (CPU == "xscale")
+ return ARMBuildAttrs::v5TEJ;
- if (CPUString == "cortex-a8" ||
- Subtarget->isCortexA8()) {
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8");
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::ApplicationProfile);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
- ARMBuildAttrs::Allowed);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumb32);
- // Fixme: figure out when this is emitted.
- //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch,
- // ARMBuildAttrs::AllowWMMXv1);
- //
-
- /// ADD additional Else-cases here!
- } else if (CPUString == "xscale") {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TEJ);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
- ARMBuildAttrs::Allowed);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::Allowed);
- } else if (Subtarget->hasV8Ops())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v8);
+ if (Subtarget->hasV8Ops())
+ return ARMBuildAttrs::v8;
else if (Subtarget->hasV7Ops()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumb32);
+ if (Subtarget->isMClass() && Subtarget->hasThumb2DSP())
+ return ARMBuildAttrs::v7E_M;
+ return ARMBuildAttrs::v7;
} else if (Subtarget->hasV6T2Ops())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2);
+ return ARMBuildAttrs::v6T2;
+ else if (Subtarget->hasV6MOps())
+ return ARMBuildAttrs::v6S_M;
else if (Subtarget->hasV6Ops())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6);
+ return ARMBuildAttrs::v6;
else if (Subtarget->hasV5TEOps())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE);
+ return ARMBuildAttrs::v5TE;
else if (Subtarget->hasV5TOps())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T);
+ return ARMBuildAttrs::v5T;
else if (Subtarget->hasV4TOps())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+ return ARMBuildAttrs::v4T;
else
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4);
+ return ARMBuildAttrs::v4;
+}
- if (Subtarget->hasNEON() && emitFPU) {
- /* NEON is not exactly a VFP architecture, but GAS emit one of
- * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
- if (Subtarget->hasVFP4())
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- "neon-vfpv4");
- else
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
- /* If emitted for NEON, omit from VFP below, since you can have both
- * NEON and VFP in build attributes but only one .fpu */
- emitFPU = false;
+void ARMAsmPrinter::emitAttributes() {
+ MCTargetStreamer &TS = OutStreamer.getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+
+ ATS.switchVendor("aeabi");
+
+ std::string CPUString = Subtarget->getCPUString();
+
+ if (CPUString != "generic")
+ ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
+
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch,
+ getArchForCPU(CPUString, Subtarget));
+
+ if (Subtarget->isAClass()) {
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ } else if (Subtarget->isRClass()) {
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::RealTimeProfile);
+ } else if (Subtarget->isMClass()){
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::MicroControllerProfile);
}
- /* V8FP + .fpu */
- if (Subtarget->hasV8FP()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
- ARMBuildAttrs::AllowV8FPA);
- if (emitFPU)
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "v8fp");
- /* VFPv4 + .fpu */
- } else if (Subtarget->hasVFP4()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
- ARMBuildAttrs::AllowFPv4A);
- if (emitFPU)
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4");
-
- /* VFPv3 + .fpu */
- } else if (Subtarget->hasVFP3()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
- ARMBuildAttrs::AllowFPv3A);
- if (emitFPU)
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3");
-
- /* VFPv2 + .fpu */
- } else if (Subtarget->hasVFP2()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
- ARMBuildAttrs::AllowFPv2);
- if (emitFPU)
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2");
+ ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, Subtarget->hasARMOps() ?
+ ARMBuildAttrs::Allowed : ARMBuildAttrs::Not_Allowed);
+ if (Subtarget->isThumb1Only()) {
+ ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::Allowed);
+ } else if (Subtarget->hasThumb2()) {
+ ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
}
- /* TODO: ARMBuildAttrs::Allowed is not completely accurate,
- * since NEON can have 1 (allowed) or 2 (MAC operations) */
if (Subtarget->hasNEON()) {
- if (Subtarget->hasV8Ops())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- ARMBuildAttrs::AllowedNeonV8);
+ /* NEON is not exactly a VFP architecture, but GAS emit one of
+ * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+ if (Subtarget->hasFPARMv8()) {
+ if (Subtarget->hasCrypto())
+ ATS.emitFPU(ARM::CRYPTO_NEON_FP_ARMV8);
+ else
+ ATS.emitFPU(ARM::NEON_FP_ARMV8);
+ }
+ else if (Subtarget->hasVFP4())
+ ATS.emitFPU(ARM::NEON_VFPV4);
else
- AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- ARMBuildAttrs::Allowed);
+ ATS.emitFPU(ARM::NEON);
+ // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
+ if (Subtarget->hasV8Ops())
+ ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeonARMv8);
+ } else {
+ if (Subtarget->hasFPARMv8())
+ ATS.emitFPU(ARM::FP_ARMV8);
+ else if (Subtarget->hasVFP4())
+ ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4);
+ else if (Subtarget->hasVFP3())
+ ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV3_D16 : ARM::VFPV3);
+ else if (Subtarget->hasVFP2())
+ ATS.emitFPU(ARM::VFPV2);
}
// Signal various FP modes.
if (!TM.Options.UnsafeFPMath) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
- ARMBuildAttrs::Allowed);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
- ARMBuildAttrs::Allowed);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+ ARMBuildAttrs::Allowed);
}
if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
- ARMBuildAttrs::Allowed);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::Allowed);
else
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
- ARMBuildAttrs::AllowIEE754);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::AllowIEE754);
// FIXME: add more flags to ARMBuildAttrs.h
// 8-bytes alignment stuff.
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+
+ // ABI_HardFP_use attribute to indicate single precision FP.
+ if (Subtarget->isFPOnlySP())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
+ ARMBuildAttrs::HardFPSinglePrecision);
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
- if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
- }
+ if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard)
+ ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS);
+
// FIXME: Should we signal R9 usage?
- if (Subtarget->hasDivide())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1);
+ if (Subtarget->hasFP16())
+ ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
+
+ if (Subtarget->hasMPExtension())
+ ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
+
+ if (Subtarget->hasDivide()) {
+ // Check if hardware divide is only available in thumb2 or ARM as well.
+ ATS.emitAttribute(ARMBuildAttrs::DIV_use,
+ Subtarget->hasDivideInARMMode() ? ARMBuildAttrs::AllowDIVExt :
+ ARMBuildAttrs::AllowDIVIfExists);
+ }
- AttrEmitter->Finish();
- delete AttrEmitter;
+ if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZVirtualization);
+ else if (Subtarget->hasTrustZone())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZ);
+ else if (Subtarget->hasVirtualization())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowVirtualization);
+
+ ATS.finishAttributeSection();
}
void ARMAsmPrinter::emitARMAttributeSection() {
@@ -923,7 +778,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
bool isIndirect = Subtarget->isTargetDarwin() &&
Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
if (!isIndirect)
- return Mang->getSymbol(GV);
+ return getSymbol(GV);
// FIXME: Remove this when Darwin transition to @GOT like syntax.
MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
@@ -934,7 +789,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
MMIMachO.getGVStubEntry(MCSym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
return MCSym;
}
@@ -1111,6 +966,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(MI->getFlag(MachineInstr::FrameSetup) &&
"Only instruction which are involved into frame setup code are allowed");
+ MCTargetStreamer &TS = OutStreamer.getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
const MachineFunction &MF = *MI->getParent()->getParent();
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
@@ -1173,7 +1030,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
RegList.push_back(SrcReg);
break;
}
- OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+ ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
} else {
// Changes of stack / frame pointer.
if (SrcReg == ARM::SP) {
@@ -1221,11 +1078,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
if (DstReg == FramePtr && FramePtr != ARM::SP)
// Set-up of the frame pointer. Positive values correspond to "add"
// instruction.
- OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset);
+ ATS.emitSetFP(FramePtr, ARM::SP, -Offset);
else if (DstReg == ARM::SP) {
// Change of SP by an offset. Positive values correspond to "sub"
// instruction.
- OutStreamer.EmitPad(Offset);
+ ATS.emitPad(Offset);
} else {
MI->dump();
llvm_unreachable("Unsupported opcode for unwinding information");
@@ -1366,7 +1223,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(0));
const GlobalValue *GV = MI->getOperand(0).getGlobal();
- MCSymbol *GVSym = Mang->getSymbol(GV);
+ MCSymbol *GVSym = getSymbol(GV);
const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc)
.addExpr(GVSymExpr)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 977d936..f835a4e 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMBaseInstrInfo.h"
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
+#include "ARMFeatures.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
@@ -36,7 +37,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "ARMGenInstrInfo.inc"
using namespace llvm;
@@ -520,11 +521,17 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
if (!MI->isPredicable())
return false;
- if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
- ARMFunctionInfo *AFI =
- MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
- return AFI->isThumb2Function();
+ ARMFunctionInfo *AFI =
+ MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+
+ if (AFI->isThumb2Function()) {
+ if (getSubtarget().restrictIT())
+ return isV8EligibleForIT(MI);
+ } else { // non-Thumb
+ if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
+ return false;
}
+
return true;
}
@@ -645,16 +652,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
bool GPRDest = ARM::GPRRegClass.contains(DestReg);
- bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
if (GPRDest && GPRSrc) {
AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))));
+ .addReg(SrcReg, getKillRegState(KillSrc))));
return;
}
bool SPRDest = ARM::SPRRegClass.contains(DestReg);
- bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
unsigned Opc = 0;
if (SPRDest && SPRSrc)
@@ -683,26 +690,47 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
int Spacing = 1;
// Use VORRq when possible.
- if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2;
- else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4;
+ if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VORRq;
+ BeginIdx = ARM::qsub_0;
+ SubRegs = 2;
+ } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VORRq;
+ BeginIdx = ARM::qsub_0;
+ SubRegs = 4;
// Fall back to VMOVD.
- else if (ARM::DPairRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2;
- else if (ARM::DTripleRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
- else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
- else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg))
- Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2;
-
- else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
- else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2;
- else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
+ } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 2;
+ } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 3;
+ } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 4;
+ } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
+ Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
+ BeginIdx = ARM::gsub_0;
+ SubRegs = 2;
+ } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 2;
+ Spacing = 2;
+ } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 3;
+ Spacing = 2;
+ } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 4;
+ Spacing = 2;
+ }
assert(Opc && "Impossible reg-to-reg copy");
@@ -711,22 +739,21 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy register tuples backward when the first Dest reg overlaps with SrcReg.
if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
- BeginIdx = BeginIdx + ((SubRegs-1)*Spacing);
+ BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
Spacing = -Spacing;
}
#ifndef NDEBUG
SmallSet<unsigned, 4> DstRegs;
#endif
for (unsigned i = 0; i != SubRegs; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
- unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
+ unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
+ unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
assert(Dst && Src && "Bad sub-register");
#ifndef NDEBUG
assert(!DstRegs.count(Src) && "destructive vector copy");
DstRegs.insert(Dst);
#endif
- Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
- .addReg(Src);
+ Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
// VORR takes two source operands.
if (Opc == ARM::VORRq)
Mov.addReg(Src);
@@ -1404,9 +1431,11 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
+ case ARM::t2LDRBi8:
case ARM::t2LDRDi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
+ case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
@@ -1423,8 +1452,10 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
+ case ARM::t2LDRBi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
+ case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
@@ -1471,7 +1502,16 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
if ((Offset2 - Offset1) / 8 > 64)
return false;
- if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+ // Check if the machine opcodes are different. If they are different
+ // then we consider them to not be of the same base address,
+ // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
+ // In this case, they are considered to be the same because they are different
+ // encoding forms of the same basic instruction.
+ if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
+ !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
+ Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
+ (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
+ Load2->getMachineOpcode() == ARM::t2LDRBi8)))
return false; // FIXME: overly conservative?
// Four loads in a row should be sufficient.
@@ -1686,7 +1726,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
bool PreferFalse) const {
assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
- const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
bool Invert = !DefMI;
if (!DefMI)
@@ -1694,11 +1734,17 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
if (!DefMI)
return 0;
+ // Find new register class to use.
+ MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
+ unsigned DestReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
+ if (!MRI.constrainRegClass(DestReg, PreviousClass))
+ return 0;
+
// Create a new predicated version of DefMI.
// Rfalse is the first use.
MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- DefMI->getDesc(),
- MI->getOperand(0).getReg());
+ DefMI->getDesc(), DestReg);
// Copy all the DefMI operands, excluding its (null) predicate.
const MCInstrDesc &DefDesc = DefMI->getDesc();
@@ -1721,7 +1767,6 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
// register operand tied to the first def.
// The tie makes the register allocator ensure the FalseReg is allocated the
// same register as operand 0.
- MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
FalseReg.setImplicit();
NewMI.addOperand(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
@@ -1781,6 +1826,14 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ if (NumBytes == 0 && DestReg != BaseReg) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ return;
+ }
+
bool isSub = NumBytes < 0;
if (isSub) NumBytes = -NumBytes;
@@ -1804,6 +1857,115 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
}
}
+bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF,
+ MachineInstr *MI,
+ unsigned NumBytes) {
+ // This optimisation potentially adds lots of load and store
+ // micro-operations, it's only really a great benefit to code-size.
+ if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
+ return false;
+
+ // If only one register is pushed/popped, LLVM can use an LDR/STR
+ // instead. We can't modify those so make sure we're dealing with an
+ // instruction we understand.
+ bool IsPop = isPopOpcode(MI->getOpcode());
+ bool IsPush = isPushOpcode(MI->getOpcode());
+ if (!IsPush && !IsPop)
+ return false;
+
+ bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
+ MI->getOpcode() == ARM::VLDMDIA_UPD;
+ bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
+ MI->getOpcode() == ARM::tPOP ||
+ MI->getOpcode() == ARM::tPOP_RET;
+
+ assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
+ MI->getOperand(1).getReg() == ARM::SP)) &&
+ "trying to fold sp update into non-sp-updating push/pop");
+
+ // The VFP push & pop act on D-registers, so we can only fold an adjustment
+ // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
+ // if this is violated.
+ if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
+ return false;
+
+ // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
+ // pred) so the list starts at 4. Thumb1 starts after the predicate.
+ int RegListIdx = IsT1PushPop ? 2 : 4;
+
+ // Calculate the space we'll need in terms of registers.
+ unsigned FirstReg = MI->getOperand(RegListIdx).getReg();
+ unsigned RD0Reg, RegsNeeded;
+ if (IsVFPPushPop) {
+ RD0Reg = ARM::D0;
+ RegsNeeded = NumBytes / 8;
+ } else {
+ RD0Reg = ARM::R0;
+ RegsNeeded = NumBytes / 4;
+ }
+
+ // We're going to have to strip all list operands off before
+ // re-adding them since the order matters, so save the existing ones
+ // for later.
+ SmallVector<MachineOperand, 4> RegList;
+ for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
+ RegList.push_back(MI->getOperand(i));
+
+ MachineBasicBlock *MBB = MI->getParent();
+ const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+
+ // Now try to find enough space in the reglist to allocate NumBytes.
+ for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
+ --CurReg) {
+ if (!IsPop) {
+ // Pushing any register is completely harmless, mark the
+ // register involved as undef since we don't care about it in
+ // the slightest.
+ RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
+ false, false, true));
+ --RegsNeeded;
+ continue;
+ }
+
+ // However, we can only pop an extra register if it's not live. For
+ // registers live within the function we might clobber a return value
+ // register; the other way a register can be live here is if it's
+ // callee-saved.
+ if (isCalleeSavedRegister(CurReg, CSRegs) ||
+ MBB->computeRegisterLiveness(TRI, CurReg, MI) !=
+ MachineBasicBlock::LQR_Dead) {
+ // VFP pops don't allow holes in the register list, so any skip is fatal
+ // for our transformation. GPR pops do, so we should just keep looking.
+ if (IsVFPPushPop)
+ return false;
+ else
+ continue;
+ }
+
+ // Mark the unimportant registers as <def,dead> in the POP.
+ RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
+ true));
+ --RegsNeeded;
+ }
+
+ if (RegsNeeded > 0)
+ return false;
+
+ // Finally we know we can profitably perform the optimisation so go
+ // ahead: strip all existing registers off and add them back again
+ // in the right order.
+ for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
+ MI->RemoveOperand(i);
+
+ // Add the complete list back in.
+ MachineInstrBuilder MIB(MF, &*MI);
+ for (int i = RegList.size() - 1; i >= 0; --i)
+ MIB.addOperand(RegList[i]);
+
+ return true;
+}
+
bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
const ARMBaseInstrInfo &TII) {
@@ -2210,8 +2372,32 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
isSafe = true;
break;
}
- // Condition code is after the operand before CPSR.
- ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
+ // Condition code is after the operand before CPSR except for VSELs.
+ ARMCC::CondCodes CC;
+ bool IsInstrVSel = true;
+ switch (Instr.getOpcode()) {
+ default:
+ IsInstrVSel = false;
+ CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
+ break;
+ case ARM::VSELEQD:
+ case ARM::VSELEQS:
+ CC = ARMCC::EQ;
+ break;
+ case ARM::VSELGTD:
+ case ARM::VSELGTS:
+ CC = ARMCC::GT;
+ break;
+ case ARM::VSELGED:
+ case ARM::VSELGES:
+ CC = ARMCC::GE;
+ break;
+ case ARM::VSELVSS:
+ case ARM::VSELVSD:
+ CC = ARMCC::VS;
+ break;
+ }
+
if (Sub) {
ARMCC::CondCodes NewCC = getSwappedCondition(CC);
if (NewCC == ARMCC::AL)
@@ -2222,11 +2408,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// If it is safe to remove CmpInstr, the condition code of these
// operands will be modified.
if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg)
- OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
- NewCC));
- }
- else
+ Sub->getOperand(2).getReg() == SrcReg) {
+ // VSel doesn't support condition code update.
+ if (IsInstrVSel)
+ return false;
+ OperandsToUpdate.push_back(
+ std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
+ }
+ } else
switch (CC) {
default:
// CPSR can be used multiple times, we should continue.
@@ -3582,6 +3771,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 0;
+
+ if (MI->isBundle())
+ return 0;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ return 1;
+ }
+ return 0;
+}
+
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -4114,7 +4321,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
// FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
// the full D-register by loading the same value to both lanes. The
// instruction is micro-coded with 2 uops, so don't do this until we can
- // properly schedule micro-coded instuctions. The dispatcher stalls cause
+ // properly schedule micro-coded instructions. The dispatcher stalls cause
// too big regressions.
// Insert the dependency-breaking FCONSTD before MI.
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 96f8637..93e5964 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -264,6 +264,8 @@ private:
const MCInstrDesc &UseMCID,
unsigned UseIdx, unsigned UseAlign) const;
+ unsigned getPredicationCost(const MachineInstr *MI) const;
+
unsigned getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost = 0) const;
@@ -360,6 +362,17 @@ bool isIndirectBranchOpcode(int Opc) {
return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
}
+static inline bool isPopOpcode(int Opc) {
+ return Opc == ARM::tPOP_RET || Opc == ARM::LDMIA_RET ||
+ Opc == ARM::t2LDMIA_RET || Opc == ARM::tPOP || Opc == ARM::LDMIA_UPD ||
+ Opc == ARM::t2LDMIA_UPD || Opc == ARM::VLDMDIA_UPD;
+}
+
+static inline bool isPushOpcode(int Opc) {
+ return Opc == ARM::tPUSH || Opc == ARM::t2STMDB_UPD ||
+ Opc == ARM::STMDB_UPD || Opc == ARM::VSTMDDB_UPD;
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
@@ -399,6 +412,13 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
const ARMBaseRegisterInfo& MRI,
unsigned MIFlags = 0);
+/// Tries to add registers to the reglist of a given base-updating
+/// push/pop instruction to adjust the stack by an additional
+/// NumBytes. This can save a few bytes per function in code-size, but
+/// obviously generates more memory traffic. As such, it only takes
+/// effect in functions being optimised for size.
+bool tryFoldSPUpdateIntoPushPop(MachineFunction &MF, MachineInstr *MI,
+ unsigned NumBytes);
/// rewriteARMFrameIndex / rewriteT2FrameIndex -
/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 58c06e3..8717dc0 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -51,20 +51,34 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
const uint16_t*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- bool ghcCall = false;
-
- if (MF) {
- const Function *F = MF->getFunction();
- ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
- }
-
- if (ghcCall)
+ const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_SaveList
+ : CSR_AAPCS_SaveList;
+
+ if (!MF) return RegList;
+
+ const Function *F = MF->getFunction();
+ if (F->getCallingConv() == CallingConv::GHC) {
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_NoRegs_SaveList;
- else
- return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
- ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+ } else if (F->hasFnAttribute("interrupt")) {
+ if (STI.isMClass()) {
+ // M-class CPUs have hardware which saves the registers needed to allow a
+ // function conforming to the AAPCS to function as a handler.
+ return CSR_AAPCS_SaveList;
+ } else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") {
+ // Fast interrupt mode gives the handler a private copy of R8-R14, so less
+ // need to be saved to restore user-mode state.
+ return CSR_FIQ_SaveList;
+ } else {
+ // Generally only R13-R14 (i.e. SP, LR) are automatically preserved by
+ // exception handling.
+ return CSR_GenericInt_SaveList;
+ }
+ }
+
+ return RegList;
}
const uint32_t*
@@ -371,14 +385,6 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return ARM::SP;
}
-unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
-}
-
-unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
-}
-
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
void ARMBaseRegisterInfo::
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index cdaad05..e28fff6 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -72,6 +72,14 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
}
}
+static inline bool isCalleeSavedRegister(unsigned Reg,
+ const MCPhysReg *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
protected:
const ARMSubtarget &STI;
@@ -149,10 +157,6 @@ public:
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getBaseRegister() const { return BasePtr; }
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
-
bool isLowRegister(unsigned Reg) const;
diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h
index f614dca..b16d4ef 100644
--- a/lib/Target/ARM/ARMBuildAttrs.h
+++ b/lib/Target/ARM/ARMBuildAttrs.h
@@ -15,11 +15,13 @@
#ifndef __TARGET_ARMBUILDATTRS_H__
#define __TARGET_ARMBUILDATTRS_H__
+namespace llvm {
namespace ARMBuildAttrs {
+
enum SpecialAttr {
// This is for the .cpu asm attr. It translates into one or more
// AttrType (below) entries in the .ARM.attributes section in the ELF.
- SEL_CPU
+ SEL_CPU
};
enum AttrType {
@@ -57,7 +59,7 @@ namespace ARMBuildAttrs {
ABI_FP_optimization_goals = 31,
compatibility = 32,
CPU_unaligned_access = 34,
- VFP_HP_extension = 36,
+ FP_HP_extension = 36,
ABI_FP_16bit_format = 38,
MPextension_use = 42, // was 70, 2.08 ABI
DIV_use = 44,
@@ -93,7 +95,7 @@ namespace ARMBuildAttrs {
v8 = 14 // v8, AArch32
};
- enum CPUArchProfile { // (=7), uleb128
+ enum CPUArchProfile { // (=7), uleb128
Not_Applicable = 0, // pre v7, or cross-profile code
ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8)
RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4)
@@ -102,34 +104,67 @@ namespace ARMBuildAttrs {
};
// The following have a lot of common use cases
- enum {
- //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128
+ enum {
Not_Allowed = 0,
Allowed = 1,
- AllowedNeonV8 = 3,
- // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
+ // Tag_ARM_ISA_use (=8), uleb128
+
+ // Tag_THUMB_ISA_use, (=9), uleb128
+ AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
+
+ // Tag_FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA)
AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA)
- AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31
- AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA)
+ AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31
+ AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA)
AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31
- AllowV8FPA = 7, // Use of the ARM v8-A FP ISA was permitted
- AllowV8FPB = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31
+ AllowFPARMv8A = 7, // Use of the ARM v8-A FP ISA was permitted
+ AllowFPARMv8B = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31
// Tag_WMMX_arch, (=11), uleb128
- AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
-
- // Tag_WMMX_arch, (=11), uleb128
- AllowWMMXv1 = 2, // The user permitted this entity to use WMMX v2
+ AllowWMMXv1 = 1, // The user permitted this entity to use WMMX v1
+ AllowWMMXv2 = 2, // The user permitted this entity to use WMMX v2
+
+ // Tag_Advanced_SIMD_arch, (=12), uleb128
+ AllowNeon = 1, // SIMDv1 was permitted
+ AllowNeon2 = 2, // SIMDv2 was permitted (Half-precision FP, MAC operations)
+ AllowNeonARMv8 = 3, // ARM v8-A SIMD was permitted
- // Tag_ABI_FP_denormal, (=20), uleb128
+ // Tag_ABI_FP_denormal, (=20), uleb128
PreserveFPSign = 2, // sign when flushed-to-zero is preserved
// Tag_ABI_FP_number_model, (=23), uleb128
AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI])
- AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings
+ AllowIEE754 = 3, // this code to use all the IEEE 754-defined FP encodings
+
+ // Tag_ABI_HardFP_use, (=27), uleb128
+ HardFPImplied = 0, // FP use should be implied by Tag_FP_arch
+ HardFPSinglePrecision = 1, // Single-precision only
+
+ // Tag_ABI_VFP_args, (=28), uleb128
+ BaseAAPCS = 0,
+ HardFPAAPCS = 1,
+
+ // Tag_FP_HP_extension, (=36), uleb128
+ AllowHPFP = 1, // Allow use of Half Precision FP
+
+ // Tag_MPextension_use, (=42), uleb128
+ AllowMP = 1, // Allow use of MP extensions
+
+ // Tag_DIV_use, (=44), uleb128
+ AllowDIVIfExists = 0, // Allow hardware divide if available in arch, or no info exists.
+ DisallowDIV = 1, // Hardware divide explicitly disallowed
+ AllowDIVExt = 2, // Allow hardware divide as optional architecture extension above
+ // the base arch specified by Tag_CPU_arch and Tag_CPU_arch_profile.
+
+ // Tag_Virtualization_use, (=68), uleb128
+ AllowTZ = 1,
+ AllowVirtualization = 2,
+ AllowTZVirtualization = 3
};
-}
+
+} // namespace ARMBuildAttrs
+} // namespace llvm
#endif // __TARGET_ARMBUILDATTRS_H__
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 89c5223..9bea4b2 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -207,4 +207,24 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
- (sub CSR_AAPCS_ThisReturn, R9))>;
+ (sub CSR_AAPCS_ThisReturn, R9))>;
+
+// The "interrupt" attribute is used to generate code that is acceptable in
+// exception-handlers of various kinds. It makes us use a different return
+// instruction (handled elsewhere) and affects which registers we must return to
+// our "caller" in the same state as we receive them.
+
+// For most interrupts, all registers except SP and LR are shared with
+// user-space. We mark LR to be saved anyway, since this is what the ARM backend
+// generally does rather than tracking its liveness as a normal register.
+def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>;
+
+// The fast interrupt handlers have more private state and get their own copies
+// of R8-R12, in addition to SP and LR. As before, mark LR for saving too.
+
+// FIXME: we mark R11 as callee-saved since it's often the frame-pointer, and
+// current frame lowering expects to encounter it while processing callee-saved
+// registers.
+def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>;
+
+
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 4e703ec..7d41c69 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -163,21 +163,7 @@ const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const {
int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- if (Constants[i].isMachineConstantPoolEntry() &&
- (Constants[i].getAlignment() & AlignMask) == 0) {
- ARMConstantPoolValue *CPV =
- (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
- ARMConstantPoolConstant *APC = dyn_cast<ARMConstantPoolConstant>(CPV);
- if (!APC) continue;
- if (APC->CVal == CVal && equals(APC))
- return i;
- }
- }
-
- return -1;
+ return getExistingMachineCPValueImpl<ARMConstantPoolConstant>(CP, Alignment);
}
bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) {
@@ -216,22 +202,7 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- if (Constants[i].isMachineConstantPoolEntry() &&
- (Constants[i].getAlignment() & AlignMask) == 0) {
- ARMConstantPoolValue *CPV =
- (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
- ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV);
- if (!APS) continue;
-
- if (APS->S == S && equals(APS))
- return i;
- }
- }
-
- return -1;
+ return getExistingMachineCPValueImpl<ARMConstantPoolSymbol>(CP, Alignment);
}
bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) {
@@ -271,22 +242,7 @@ ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C,
int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- if (Constants[i].isMachineConstantPoolEntry() &&
- (Constants[i].getAlignment() & AlignMask) == 0) {
- ARMConstantPoolValue *CPV =
- (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
- ARMConstantPoolMBB *APMBB = dyn_cast<ARMConstantPoolMBB>(CPV);
- if (!APMBB) continue;
-
- if (APMBB->MBB == MBB && equals(APMBB))
- return i;
- }
- }
-
- return -1;
+ return getExistingMachineCPValueImpl<ARMConstantPoolMBB>(CP, Alignment);
}
bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) {
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 93812fe..7ae7bf4 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -15,6 +15,7 @@
#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstddef>
@@ -64,6 +65,26 @@ protected:
ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind,
unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
bool AddCurrentAddress);
+
+ template <typename Derived>
+ int getExistingMachineCPValueImpl(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ if (Derived *APC = dyn_cast<Derived>(CPV))
+ if (cast<Derived>(this)->equals(APC))
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
public:
virtual ~ARMConstantPoolValue();
@@ -156,6 +177,10 @@ public:
static bool classof(const ARMConstantPoolValue *APV) {
return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
}
+
+ bool equals(const ARMConstantPoolConstant *A) const {
+ return CVal == A->CVal && ARMConstantPoolValue::equals(A);
+ }
};
/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
@@ -187,6 +212,10 @@ public:
static bool classof(const ARMConstantPoolValue *ACPV) {
return ACPV->isExtSymbol();
}
+
+ bool equals(const ARMConstantPoolSymbol *A) const {
+ return S == A->S && ARMConstantPoolValue::equals(A);
+ }
};
/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic
@@ -219,6 +248,10 @@ public:
static bool classof(const ARMConstantPoolValue *ACPV) {
return ACPV->isMachineBasicBlock();
}
+
+ bool equals(const ARMConstantPoolMBB *A) const {
+ return MBB == A->MBB && ARMConstantPoolValue::equals(A);
+ }
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index beb843c..e6f7f86 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -692,10 +692,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
MI.getOperand(1).getReg())
- .addReg(MI.getOperand(2).getReg(),
- getKillRegState(MI.getOperand(2).isKill()))
+ .addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg());
+ .addOperand(MI.getOperand(4));
MI.eraseFromParent();
return true;
@@ -705,10 +704,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
- .addReg(MI.getOperand(2).getReg(),
- getKillRegState(MI.getOperand(2).isKill()))
+ .addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg())
+ .addOperand(MI.getOperand(4))
.addReg(0); // 's' bit
MI.eraseFromParent();
@@ -717,39 +715,36 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVCCsi: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
(MI.getOperand(1).getReg()))
- .addReg(MI.getOperand(2).getReg(),
- getKillRegState(MI.getOperand(2).isKill()))
+ .addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm())
.addImm(MI.getOperand(4).getImm()) // 'pred'
- .addReg(MI.getOperand(5).getReg())
+ .addOperand(MI.getOperand(5))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
-
case ARM::MOVCCsr: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
(MI.getOperand(1).getReg()))
- .addReg(MI.getOperand(2).getReg(),
- getKillRegState(MI.getOperand(2).isKill()))
- .addReg(MI.getOperand(3).getReg(),
- getKillRegState(MI.getOperand(3).isKill()))
+ .addOperand(MI.getOperand(2))
+ .addOperand(MI.getOperand(3))
.addImm(MI.getOperand(4).getImm())
.addImm(MI.getOperand(5).getImm()) // 'pred'
- .addReg(MI.getOperand(6).getReg())
+ .addOperand(MI.getOperand(6))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
+ case ARM::t2MOVCCi16:
case ARM::MOVCCi16: {
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16),
+ unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
MI.getOperand(1).getReg())
.addImm(MI.getOperand(2).getImm())
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg());
-
+ .addOperand(MI.getOperand(4));
MI.eraseFromParent();
return true;
}
@@ -760,23 +755,47 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.getOperand(1).getReg())
.addImm(MI.getOperand(2).getImm())
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg())
+ .addOperand(MI.getOperand(4))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
+ case ARM::t2MVNCCi:
case ARM::MVNCCi: {
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi),
+ unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
.addImm(MI.getOperand(2).getImm())
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg())
+ .addOperand(MI.getOperand(4))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
+ case ARM::t2MOVCClsl:
+ case ARM::t2MOVCClsr:
+ case ARM::t2MOVCCasr:
+ case ARM::t2MOVCCror: {
+ unsigned NewOpc;
+ switch (Opcode) {
+ case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break;
+ case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break;
+ case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break;
+ case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break;
+ default: llvm_unreachable("unexpeced conditional move");
+ }
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
+ MI.getOperand(1).getReg())
+ .addOperand(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .addOperand(MI.getOperand(5))
+ .addReg(0); // 's' bit
+ MI.eraseFromParent();
+ return true;
+ }
case ARM::Int_eh_sjlj_dispatchsetup: {
MachineFunction &MF = *MI.getParent()->getParent();
const ARMBaseInstrInfo *AII =
@@ -823,7 +842,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVsrl_flag:
case ARM::MOVsra_flag: {
- // These are just fancy MOVs insructions.
+ // These are just fancy MOVs instructions.
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
@@ -938,6 +957,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandMOV32BitImm(MBB, MBBI);
return true;
+ case ARM::SUBS_PC_LR: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
+ .addReg(ARM::LR)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(2))
+ .addReg(ARM::CPSR, RegState::Undef);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
case ARM::VLDMQIA: {
unsigned NewOpc = ARM::VLDMDIA;
MachineInstrBuilder MIB =
diff --git a/lib/Target/ARM/ARMFPUName.def b/lib/Target/ARM/ARMFPUName.def
new file mode 100644
index 0000000..9a1bbe7
--- /dev/null
+++ b/lib/Target/ARM/ARMFPUName.def
@@ -0,0 +1,32 @@
+//===-- ARMFPUName.def - List of the ARM FPU names --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the list of the supported ARM FPU names.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef ARM_FPU_NAME
+#error "You must define ARM_FPU_NAME(NAME, ID) before including ARMFPUName.h"
+#endif
+
+ARM_FPU_NAME("vfp", VFP)
+ARM_FPU_NAME("vfpv2", VFPV2)
+ARM_FPU_NAME("vfpv3", VFPV3)
+ARM_FPU_NAME("vfpv3-d16", VFPV3_D16)
+ARM_FPU_NAME("vfpv4", VFPV4)
+ARM_FPU_NAME("vfpv4-d16", VFPV4_D16)
+ARM_FPU_NAME("fp-armv8", FP_ARMV8)
+ARM_FPU_NAME("neon", NEON)
+ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4)
+ARM_FPU_NAME("neon-fp-armv8", NEON_FP_ARMV8)
+ARM_FPU_NAME("crypto-neon-fp-armv8", CRYPTO_NEON_FP_ARMV8)
+
+#undef ARM_FPU_NAME
diff --git a/lib/Target/ARM/ARMFPUName.h b/lib/Target/ARM/ARMFPUName.h
new file mode 100644
index 0000000..2a64cce
--- /dev/null
+++ b/lib/Target/ARM/ARMFPUName.h
@@ -0,0 +1,26 @@
+//===-- ARMFPUName.h - List of the ARM FPU names ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMFPUNAME_H
+#define ARMFPUNAME_H
+
+namespace llvm {
+namespace ARM {
+
+enum FPUKind {
+ INVALID_FPU = 0
+
+#define ARM_FPU_NAME(NAME, ID) , ID
+#include "ARMFPUName.def"
+};
+
+} // namespace ARM
+} // namespace llvm
+
+#endif // ARMFPUNAME_H
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index ed054aa..a4004f3 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -176,6 +176,8 @@ class ARMFastISel : public FastISel {
// Utility routines.
private:
+ unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned OpNum,
+ unsigned Op);
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
@@ -252,10 +254,10 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
const MCInstrDesc &MCID = MI->getDesc();
- // If we're a thumb2 or not NEON function we were handled via isPredicable.
+ // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
AFI->isThumb2Function())
- return false;
+ return MI->isPredicable();
for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
if (MCID.OpInfo[i].isPredicate())
@@ -276,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
// Do we use a predicate? or...
// Are we NEON in ARM mode and have a predicate operand? If so, I know
// we're not predicable but add it anyways.
- if (TII.isPredicable(MI) || isARMNEONPred(MI))
+ if (isARMNEONPred(MI))
AddDefaultPred(MIB);
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
@@ -291,6 +293,23 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
return MIB;
}
+unsigned ARMFastISel::constrainOperandRegClass(const MCInstrDesc &II,
+ unsigned Op, unsigned OpNum) {
+ if (TargetRegisterInfo::isVirtualRegister(Op)) {
+ const TargetRegisterClass *RegClass =
+ TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
+ if (!MRI.constrainRegClass(Op, RegClass)) {
+ // If it's not legal to COPY between the register classes, something
+ // has gone very wrong before we got here.
+ unsigned NewOp = createResultReg(RegClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), NewOp).addReg(Op));
+ return NewOp;
+ }
+ }
+ return Op;
+}
+
unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
const TargetRegisterClass* RC) {
unsigned ResultReg = createResultReg(RC);
@@ -306,6 +325,9 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operand is sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill));
@@ -326,6 +348,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operands are sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ Op1 = constrainOperandRegClass(II, Op1, 2);
+
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -349,6 +376,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operands are sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ Op1 = constrainOperandRegClass(II, Op1, 2);
+ Op2 = constrainOperandRegClass(II, Op1, 3);
+
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -373,6 +406,9 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operand is sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -395,6 +431,9 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operand is sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -418,6 +457,10 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operands are sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ Op1 = constrainOperandRegClass(II, Op1, 2);
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -610,6 +653,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
.addConstantPoolIndex(Idx));
else
// The extra immediate is for addrmode2.
+ DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::LDRcp), DestReg)
.addConstantPoolIndex(Idx)
@@ -685,6 +729,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
AddOptionalDefs(MIB);
} else {
// The extra immediate is for addrmode2.
+ DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
DestReg)
.addConstantPoolIndex(Idx)
@@ -855,13 +900,8 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
TmpOffset += CI->getSExtValue() * S;
break;
}
- if (isa<AddOperator>(Op) &&
- (!isa<Instruction>(Op) ||
- FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
- == FuncInfo.MBB) &&
- isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
- // An add (in the same block) with a constant operand. Fold the
- // constant.
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
@@ -1139,6 +1179,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
(const TargetRegisterClass*)&ARM::tGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass);
unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
+ SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), Res)
.addReg(SrcReg).addImm(1));
@@ -1210,6 +1251,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
ARMSimplifyAddress(Addr, VT, useAM3);
// Create the base instruction, then add the operands.
+ SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(StrOpc))
.addReg(SrcReg);
@@ -1333,6 +1375,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
(isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
unsigned OpReg = getRegForValue(TI->getOperand(0));
+ OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TstOpc))
.addReg(OpReg).addImm(1));
@@ -1370,6 +1413,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// and it left a value for us in a virtual register. Ergo, we test
// the one-bit value left in the virtual register.
unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
+ CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
.addReg(CmpReg).addImm(1));
@@ -1494,13 +1538,15 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
}
}
+ const MCInstrDesc &II = TII.get(CmpOpc);
+ SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);
if (!UseImm) {
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CmpOpc))
+ SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(SrcReg1).addReg(SrcReg2));
} else {
MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(SrcReg1);
// Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
@@ -1699,6 +1745,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
}
unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
+ CondReg = constrainOperandRegClass(TII.get(CmpOpc), CondReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
.addReg(CondReg).addImm(0));
@@ -1715,12 +1762,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
}
unsigned ResultReg = createResultReg(RC);
- if (!UseImm)
+ if (!UseImm) {
+ Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
+ Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
.addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR);
- else
+ } else {
+ Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
.addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ }
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1806,6 +1857,8 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
if (SrcReg2 == 0) return false;
unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
+ SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
+ SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2));
@@ -1933,7 +1986,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
!VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
return false;
} else {
- switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
+ switch (ArgVT.SimpleTy) {
default:
return false;
case MVT::i1:
@@ -2410,15 +2463,22 @@ bool ARMFastISel::SelectCall(const Instruction *I,
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(CallOpc));
+ unsigned char OpFlags = 0;
+
+ // Add MO_PLT for global address or external symbol in the PIC relocation
+ // model.
+ if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+
// ARM calls don't take a predicate, but tBL / tBLX do.
if(isThumb2)
AddDefaultPred(MIB);
if (UseReg)
MIB.addReg(CalleeReg);
else if (!IntrMemName)
- MIB.addGlobalAddress(GV, 0, 0);
+ MIB.addGlobalAddress(GV, 0, OpFlags);
else
- MIB.addExternalSymbol(IntrMemName, 0);
+ MIB.addExternalSymbol(IntrMemName, OpFlags);
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
@@ -2731,6 +2791,7 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg);
if (setsCPSR)
MIB.addReg(ARM::CPSR, RegState::Define);
+ SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc));
if (hasS)
AddDefaultCC(MIB);
@@ -2965,12 +3026,14 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
// Load value.
if (isThumb2) {
+ DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::t2LDRpci), DestReg1)
.addConstantPoolIndex(Idx));
Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
} else {
// The extra immediate is for addrmode2.
+ DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(ARM::LDRcp), DestReg1)
.addConstantPoolIndex(Idx).addImm(0));
@@ -2984,6 +3047,9 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
}
unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
+ DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0);
+ DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1);
+ GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(Opc), DestReg2)
.addReg(DestReg1)
@@ -3049,7 +3115,7 @@ bool ARMFastISel::FastLowerArguments() {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
- const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+ const TargetRegisterClass *RC = &ARM::rGPRRegClass;
Idx = 0;
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I, ++Idx) {
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
new file mode 100644
index 0000000..dafc4b3
--- /dev/null
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -0,0 +1,93 @@
+//===-- ARMFeatures.h - Checks for ARM instruction features ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the code shared between ARM CodeGen and ARM MC
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_FEATURES_H
+#define TARGET_ARM_FEATURES_H
+
+#include "ARM.h"
+
+namespace llvm {
+
+template<typename InstrType> // could be MachineInstr or MCInst
+inline bool isV8EligibleForIT(InstrType *Instr, int BLXOperandIndex = 0) {
+ switch (Instr->getOpcode()) {
+ default:
+ return false;
+ case ARM::tADC:
+ case ARM::tADDi3:
+ case ARM::tADDi8:
+ case ARM::tADDrSPi:
+ case ARM::tADDrr:
+ case ARM::tAND:
+ case ARM::tASRri:
+ case ARM::tASRrr:
+ case ARM::tBIC:
+ case ARM::tCMNz:
+ case ARM::tCMPi8:
+ case ARM::tCMPr:
+ case ARM::tEOR:
+ case ARM::tLDRBi:
+ case ARM::tLDRBr:
+ case ARM::tLDRHi:
+ case ARM::tLDRHr:
+ case ARM::tLDRSB:
+ case ARM::tLDRSH:
+ case ARM::tLDRi:
+ case ARM::tLDRr:
+ case ARM::tLDRspi:
+ case ARM::tLSLri:
+ case ARM::tLSLrr:
+ case ARM::tLSRri:
+ case ARM::tLSRrr:
+ case ARM::tMOVi8:
+ case ARM::tMUL:
+ case ARM::tMVN:
+ case ARM::tORR:
+ case ARM::tROR:
+ case ARM::tRSB:
+ case ARM::tSBC:
+ case ARM::tSTRBi:
+ case ARM::tSTRBr:
+ case ARM::tSTRHi:
+ case ARM::tSTRHr:
+ case ARM::tSTRi:
+ case ARM::tSTRr:
+ case ARM::tSTRspi:
+ case ARM::tSUBi3:
+ case ARM::tSUBi8:
+ case ARM::tSUBrr:
+ case ARM::tTST:
+ return true;
+// there are some "conditionally deprecated" opcodes
+ case ARM::tADDspr:
+ return Instr->getOperand(2).getReg() != ARM::PC;
+ // ADD PC, SP and BLX PC were always unpredictable,
+ // now on top of it they're deprecated
+ case ARM::tADDrSP:
+ case ARM::tBX:
+ return Instr->getOperand(0).getReg() != ARM::PC;
+ case ARM::tBLXr:
+ return Instr->getOperand(BLXOperandIndex).getReg() != ARM::PC;
+ case ARM::tADDhirr:
+ return Instr->getOperand(0).getReg() != ARM::PC &&
+ Instr->getOperand(2).getReg() != ARM::PC;
+ case ARM::tCMPhir:
+ case ARM::tMOVr:
+ return Instr->getOperand(0).getReg() != ARM::PC &&
+ Instr->getOperand(1).getReg() != ARM::PC;
+ }
+}
+
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index c8637be..d32bdbc 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -82,22 +82,11 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
}
-static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
- for (unsigned i = 0; CSRegs[i]; ++i)
- if (Reg == CSRegs[i])
- return true;
- return false;
-}
-
static bool isCSRestore(MachineInstr *MI,
const ARMBaseInstrInfo &TII,
const uint16_t *CSRegs) {
// Integer spill area is handled with "pop".
- if (MI->getOpcode() == ARM::LDMIA_RET ||
- MI->getOpcode() == ARM::t2LDMIA_RET ||
- MI->getOpcode() == ARM::LDMIA_UPD ||
- MI->getOpcode() == ARM::t2LDMIA_UPD ||
- MI->getOpcode() == ARM::VLDMDIA_UPD) {
+ if (isPopOpcode(MI->getOpcode())) {
// The first two operands are predicates. The last two are
// imp-def and imp-use of SP. Check everything in between.
for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
@@ -115,20 +104,31 @@ static bool isCSRestore(MachineInstr *MI,
return false;
}
-static void
-emitSPUpdate(bool isARM,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- DebugLoc dl, const ARMBaseInstrInfo &TII,
- int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
- ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ const ARMBaseInstrInfo &TII, unsigned DestReg,
+ unsigned SrcReg, int NumBytes,
+ unsigned MIFlags = MachineInstr::NoFlags,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0) {
if (isARM)
- emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
Pred, PredReg, TII, MIFlags);
else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
Pred, PredReg, TII, MIFlags);
}
+static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ const ARMBaseInstrInfo &TII, int NumBytes,
+ unsigned MIFlags = MachineInstr::NoFlags,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0) {
+ emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
+ MIFlags, Pred, PredReg);
+}
+
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -175,6 +175,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned Reg = CSI[i].getReg();
int FI = CSI[i].getFrameIdx();
switch (Reg) {
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
case ARM::R4:
case ARM::R5:
case ARM::R6:
@@ -182,73 +186,61 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::LR:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- AFI->addGPRCalleeSavedArea1Frame(FI);
GPRCS1Size += 4;
break;
case ARM::R8:
case ARM::R9:
case ARM::R10:
case ARM::R11:
+ case ARM::R12:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- if (STI.isTargetIOS()) {
- AFI->addGPRCalleeSavedArea2Frame(FI);
+ if (STI.isTargetIOS())
GPRCS2Size += 4;
- } else {
- AFI->addGPRCalleeSavedArea1Frame(FI);
+ else
GPRCS1Size += 4;
- }
break;
default:
// This is a DPR. Exclude the aligned DPRCS2 spills.
if (Reg == ARM::D8)
D8SpillFI = FI;
- if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) {
- AFI->addDPRCalleeSavedAreaFrame(FI);
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
DPRCSSize += 8;
- }
}
}
// Move past area 1.
- if (GPRCS1Size > 0) MBBI++;
-
- // Set FP to point to the stack slot that contains the previous FP.
- // For iOS, FP is R7, which has now been stored in spill area 1.
- // Otherwise, if this is not iOS, all the callee-saved registers go
- // into spill area 1, including the FP in R11. In either case, it is
- // now safe to emit this assignment.
- bool HasFP = hasFP(MF);
- if (HasFP) {
- unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
- .addFrameIndex(FramePtrSpillFI).addImm(0)
- .setMIFlag(MachineInstr::FrameSetup);
- AddDefaultCC(AddDefaultPred(MIB));
- }
-
- // Move past area 2.
- if (GPRCS2Size > 0) MBBI++;
+ MachineBasicBlock::iterator LastPush = MBB.end(), FramePtrPush;
+ if (GPRCS1Size > 0)
+ FramePtrPush = LastPush = MBBI++;
// Determine starting offsets of spill areas.
+ bool HasFP = hasFP(MF);
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- if (HasFP)
+ int FramePtrOffsetInPush = 0;
+ if (HasFP) {
+ FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size;
AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
NumBytes);
+ }
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+ // Move past area 2.
+ if (GPRCS2Size > 0) {
+ LastPush = MBBI++;
+ }
+
// Move past area 3.
if (DPRCSSize > 0) {
- MBBI++;
+ LastPush = MBBI++;
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
- MBBI++;
+ LastPush = MBBI++;
}
// Move past the aligned DPRCS2 area.
@@ -264,8 +256,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
- MachineInstr::FrameSetup);
+ if (tryFoldSPUpdateIntoPushPop(MF, LastPush, NumBytes)) {
+ if (LastPush == FramePtrPush)
+ FramePtrOffsetInPush += NumBytes;
+ } else
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+ MachineInstr::FrameSetup);
+
if (HasFP && isARM)
// Restore from fp only in ARM mode: e.g. sub sp, r7, #24
// Note it's not safe to do this in Thumb2 mode because it would have
@@ -278,6 +275,18 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
+ // Set FP to point to the stack slot that contains the previous FP.
+ // For iOS, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not iOS, all the callee-saved registers go
+ // into spill area 1, including the FP in R11. In either case, it
+ // is in area one and the adjustment needs to take place just after
+ // that push.
+ if (HasFP)
+ emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, ++FramePtrPush, dl, TII,
+ FramePtr, ARM::SP, FramePtrOffsetInPush,
+ MachineInstr::FrameSetup);
+
+
if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
@@ -373,11 +382,11 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
if (MBBI != MBB.begin()) {
- do
+ do {
--MBBI;
- while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+ } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
if (!isCSRestore(MBBI, TII, CSRegs))
++MBBI;
}
@@ -421,8 +430,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
ARM::SP)
.addReg(FramePtr));
}
- } else if (NumBytes)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+ } else if (NumBytes && !tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes))
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
// Increment past our save areas.
if (AFI->getDPRCalleeSavedAreaSize()) {
@@ -501,12 +510,6 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
FrameReg = ARM::SP;
Offset += SPAdj;
- if (AFI->isGPRCalleeSavedArea1Frame(FI))
- return Offset - AFI->getGPRCalleeSavedArea1Offset();
- else if (AFI->isGPRCalleeSavedArea2Frame(FI))
- return Offset - AFI->getGPRCalleeSavedArea2Offset();
- else if (AFI->isDPRCalleeSavedAreaFrame(FI))
- return Offset - AFI->getDPRCalleeSavedAreaOffset();
// SP can move around if there are allocas. We may also lose track of SP
// when emergency spilling inside a non-reserved call frame setup.
@@ -658,6 +661,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
unsigned RetOpcode = MI->getOpcode();
bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
RetOpcode == ARM::TCRETURNri);
+ bool isInterrupt =
+ RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
SmallVector<unsigned, 4> Regs;
unsigned i = CSI.size();
@@ -672,7 +677,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
continue;
- if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+ if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
+ STI.hasV5TOps()) {
Reg = ARM::PC;
LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
// Fold the return instruction into the LDM.
@@ -1199,7 +1205,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
@@ -1226,6 +1232,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
case ARM::LR:
LRSpilled = true;
// Fallthrough
+ case ARM::R0: case ARM::R1:
+ case ARM::R2: case ARM::R3:
case ARM::R4: case ARM::R5:
case ARM::R6: case ARM::R7:
CS1Spilled = true;
@@ -1240,6 +1248,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
switch (Reg) {
+ case ARM::R0: case ARM::R1:
+ case ARM::R2: case ARM::R3:
case ARM::R4: case ARM::R5:
case ARM::R6: case ARM::R7:
case ARM::LR:
@@ -1295,8 +1305,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (!LRSpilled && CS1Spilled) {
MRI.setPhysRegUsed(ARM::LR);
NumGPRSpills++;
- UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
- UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+ SmallVectorImpl<unsigned>::iterator LRPos;
+ LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
+ (unsigned)ARM::LR);
+ if (LRPos != UnspilledCS1GPRs.end())
+ UnspilledCS1GPRs.erase(LRPos);
+
ForceLRSpill = false;
ExtraCSSpill = true;
}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 4ca3af6..87d1522 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -130,6 +130,13 @@ public:
return true;
}
+ bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
+ const ConstantSDNode *CN = cast<ConstantSDNode>(N);
+ Pred = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
+ return true;
+ }
+
bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
@@ -239,21 +246,6 @@ private:
/// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
- /// SelectCMOVOp - Select CMOV instructions for ARM.
- SDNode *SelectCMOVOp(SDNode *N);
- SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR,
- SDValue InFlag);
- SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR,
- SDValue InFlag);
- SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR,
- SDValue InFlag);
- SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR,
- SDValue InFlag);
-
// Select special operations if node forms integer ABS pattern
SDNode *SelectABSOp(SDNode *N);
@@ -261,7 +253,7 @@ private:
SDNode *SelectConcatVector(SDNode *N);
- SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+ SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
@@ -2321,204 +2313,6 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
return NULL;
}
-SDNode *ARMDAGToDAGISel::
-SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
- SDValue CPTmp0;
- SDValue CPTmp1;
- if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
- unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
- unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
- unsigned Opc = 0;
- switch (SOShOp) {
- case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
- case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
- case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
- case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
- default:
- llvm_unreachable("Unknown so_reg opcode!");
- }
- SDValue SOShImm =
- CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
- }
- return 0;
-}
-
-SDNode *ARMDAGToDAGISel::
-SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
- SDValue CPTmp0;
- SDValue CPTmp1;
- SDValue CPTmp2;
- if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6);
- }
-
- if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7);
- }
- return 0;
-}
-
-SDNode *ARMDAGToDAGISel::
-SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
- ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
- if (!T)
- return 0;
-
- unsigned Opc = 0;
- unsigned TrueImm = T->getZExtValue();
- if (is_t2_so_imm(TrueImm)) {
- Opc = ARM::t2MOVCCi;
- } else if (TrueImm <= 0xffff) {
- Opc = ARM::t2MOVCCi16;
- } else if (is_t2_so_imm_not(TrueImm)) {
- TrueImm = ~TrueImm;
- Opc = ARM::t2MVNCCi;
- } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
- // Large immediate.
- Opc = ARM::t2MOVCCi32imm;
- }
-
- if (Opc) {
- SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
- }
-
- return 0;
-}
-
-SDNode *ARMDAGToDAGISel::
-SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
- ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
- if (!T)
- return 0;
-
- unsigned Opc = 0;
- unsigned TrueImm = T->getZExtValue();
- bool isSoImm = is_so_imm(TrueImm);
- if (isSoImm) {
- Opc = ARM::MOVCCi;
- } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
- Opc = ARM::MOVCCi16;
- } else if (is_so_imm_not(TrueImm)) {
- TrueImm = ~TrueImm;
- Opc = ARM::MVNCCi;
- } else if (TrueVal.getNode()->hasOneUse() &&
- (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
- // Large immediate.
- Opc = ARM::MOVCCi32imm;
- }
-
- if (Opc) {
- SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
- }
-
- return 0;
-}
-
-SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
- EVT VT = N->getValueType(0);
- SDValue FalseVal = N->getOperand(0);
- SDValue TrueVal = N->getOperand(1);
- SDValue CC = N->getOperand(2);
- SDValue CCR = N->getOperand(3);
- SDValue InFlag = N->getOperand(4);
- assert(CC.getOpcode() == ISD::Constant);
- assert(CCR.getOpcode() == ISD::Register);
- ARMCC::CondCodes CCVal =
- (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
-
- if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
- // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
- // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
- // Pattern complexity = 18 cost = 1 size = 0
- if (Subtarget->isThumb()) {
- SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
- CCVal, CCR, InFlag);
- if (!Res)
- Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
- ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
- if (Res)
- return Res;
- } else {
- SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
- CCVal, CCR, InFlag);
- if (!Res)
- Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
- ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
- if (Res)
- return Res;
- }
-
- // Pattern: (ARMcmov:i32 GPR:i32:$false,
- // (imm:i32)<<P:Pred_so_imm>>:$true,
- // (imm:i32):$cc)
- // Emits: (MOVCCi:i32 GPR:i32:$false,
- // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
- // Pattern complexity = 10 cost = 1 size = 0
- if (Subtarget->isThumb()) {
- SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
- CCVal, CCR, InFlag);
- if (!Res)
- Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
- ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
- if (Res)
- return Res;
- } else {
- SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
- CCVal, CCR, InFlag);
- if (!Res)
- Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
- ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
- if (Res)
- return Res;
- }
- }
-
- // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Pattern complexity = 6 cost = 1 size = 0
- //
- // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Pattern complexity = 6 cost = 11 size = 0
- //
- // Also VMOVScc and VMOVDcc.
- SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
- unsigned Opc = 0;
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Illegal conditional move type!");
- case MVT::i32:
- Opc = Subtarget->isThumb()
- ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
- : ARM::MOVCCr;
- break;
- case MVT::f32:
- Opc = ARM::VMOVScc;
- break;
- case MVT::f64:
- Opc = ARM::VMOVDcc;
- break;
- }
- return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
-}
-
/// Target-specific DAG combining for ISD::XOR.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
@@ -2567,30 +2361,45 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
}
-SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
+ unsigned Op16,unsigned Op32,
+ unsigned Op64) {
+ // Mostly direct translation to the given operations, except that we preserve
+ // the AtomicOrdering for use later on.
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
+ EVT VT = AN->getMemoryVT();
+
+ unsigned Op;
+ SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other);
+ if (VT == MVT::i8)
+ Op = Op8;
+ else if (VT == MVT::i16)
+ Op = Op16;
+ else if (VT == MVT::i32)
+ Op = Op32;
+ else if (VT == MVT::i64) {
+ Op = Op64;
+ VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other);
+ } else
+ llvm_unreachable("Unexpected atomic operation");
+
SmallVector<SDValue, 6> Ops;
- Ops.push_back(Node->getOperand(1)); // Ptr
- Ops.push_back(Node->getOperand(2)); // Low part of Val1
- Ops.push_back(Node->getOperand(3)); // High part of Val1
- if (Opc == ARM::ATOMCMPXCHG6432) {
- Ops.push_back(Node->getOperand(4)); // Low part of Val2
- Ops.push_back(Node->getOperand(5)); // High part of Val2
- }
- Ops.push_back(Node->getOperand(0)); // Chain
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
- SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node),
- MVT::i32, MVT::i32, MVT::Other,
- Ops);
- cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
- return ResNode;
+ for (unsigned i = 1; i < AN->getNumOperands(); ++i)
+ Ops.push_back(AN->getOperand(i));
+
+ Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
+ Ops.push_back(AN->getOperand(0)); // Chain moves to the end
+
+ return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size());
}
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
- if (N->isMachineOpcode())
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
return NULL; // Already selected.
+ }
switch (N->getOpcode()) {
default: break;
@@ -2882,8 +2691,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue(Chain.getNode(), Chain.getResNo()));
return NULL;
}
- case ARMISD::CMOV:
- return SelectCMOVOp(N);
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
@@ -3457,31 +3264,90 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
- case ARMISD::ATOMOR64_DAG:
- return SelectAtomic64(N, ARM::ATOMOR6432);
- case ARMISD::ATOMXOR64_DAG:
- return SelectAtomic64(N, ARM::ATOMXOR6432);
- case ARMISD::ATOMADD64_DAG:
- return SelectAtomic64(N, ARM::ATOMADD6432);
- case ARMISD::ATOMSUB64_DAG:
- return SelectAtomic64(N, ARM::ATOMSUB6432);
- case ARMISD::ATOMNAND64_DAG:
- return SelectAtomic64(N, ARM::ATOMNAND6432);
- case ARMISD::ATOMAND64_DAG:
- return SelectAtomic64(N, ARM::ATOMAND6432);
- case ARMISD::ATOMSWAP64_DAG:
- return SelectAtomic64(N, ARM::ATOMSWAP6432);
- case ARMISD::ATOMCMPXCHG64_DAG:
- return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
-
- case ARMISD::ATOMMIN64_DAG:
- return SelectAtomic64(N, ARM::ATOMMIN6432);
- case ARMISD::ATOMUMIN64_DAG:
- return SelectAtomic64(N, ARM::ATOMUMIN6432);
- case ARMISD::ATOMMAX64_DAG:
- return SelectAtomic64(N, ARM::ATOMMAX6432);
- case ARMISD::ATOMUMAX64_DAG:
- return SelectAtomic64(N, ARM::ATOMUMAX6432);
+ case ISD::ATOMIC_LOAD:
+ if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
+ return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64);
+ else
+ break;
+
+ case ISD::ATOMIC_STORE:
+ if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
+ return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_STORE_I64);
+ else
+ break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_ADD_I8,
+ ARM::ATOMIC_LOAD_ADD_I16,
+ ARM::ATOMIC_LOAD_ADD_I32,
+ ARM::ATOMIC_LOAD_ADD_I64);
+ case ISD::ATOMIC_LOAD_SUB:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_SUB_I8,
+ ARM::ATOMIC_LOAD_SUB_I16,
+ ARM::ATOMIC_LOAD_SUB_I32,
+ ARM::ATOMIC_LOAD_SUB_I64);
+ case ISD::ATOMIC_LOAD_AND:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_AND_I8,
+ ARM::ATOMIC_LOAD_AND_I16,
+ ARM::ATOMIC_LOAD_AND_I32,
+ ARM::ATOMIC_LOAD_AND_I64);
+ case ISD::ATOMIC_LOAD_OR:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_OR_I8,
+ ARM::ATOMIC_LOAD_OR_I16,
+ ARM::ATOMIC_LOAD_OR_I32,
+ ARM::ATOMIC_LOAD_OR_I64);
+ case ISD::ATOMIC_LOAD_XOR:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_XOR_I8,
+ ARM::ATOMIC_LOAD_XOR_I16,
+ ARM::ATOMIC_LOAD_XOR_I32,
+ ARM::ATOMIC_LOAD_XOR_I64);
+ case ISD::ATOMIC_LOAD_NAND:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_NAND_I8,
+ ARM::ATOMIC_LOAD_NAND_I16,
+ ARM::ATOMIC_LOAD_NAND_I32,
+ ARM::ATOMIC_LOAD_NAND_I64);
+ case ISD::ATOMIC_LOAD_MIN:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_MIN_I8,
+ ARM::ATOMIC_LOAD_MIN_I16,
+ ARM::ATOMIC_LOAD_MIN_I32,
+ ARM::ATOMIC_LOAD_MIN_I64);
+ case ISD::ATOMIC_LOAD_MAX:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_MAX_I8,
+ ARM::ATOMIC_LOAD_MAX_I16,
+ ARM::ATOMIC_LOAD_MAX_I32,
+ ARM::ATOMIC_LOAD_MAX_I64);
+ case ISD::ATOMIC_LOAD_UMIN:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_UMIN_I8,
+ ARM::ATOMIC_LOAD_UMIN_I16,
+ ARM::ATOMIC_LOAD_UMIN_I32,
+ ARM::ATOMIC_LOAD_UMIN_I64);
+ case ISD::ATOMIC_LOAD_UMAX:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_UMAX_I8,
+ ARM::ATOMIC_LOAD_UMAX_I16,
+ ARM::ATOMIC_LOAD_UMAX_I32,
+ ARM::ATOMIC_LOAD_UMAX_I64);
+ case ISD::ATOMIC_SWAP:
+ return SelectAtomic(N,
+ ARM::ATOMIC_SWAP_I8,
+ ARM::ATOMIC_SWAP_I16,
+ ARM::ATOMIC_SWAP_I32,
+ ARM::ATOMIC_SWAP_I64);
+ case ISD::ATOMIC_CMP_SWAP:
+ return SelectAtomic(N,
+ ARM::ATOMIC_CMP_SWAP_I8,
+ ARM::ATOMIC_CMP_SWAP_I16,
+ ARM::ATOMIC_CMP_SWAP_I32,
+ ARM::ATOMIC_CMP_SWAP_I64);
}
return SelectCode(N);
@@ -3614,7 +3480,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
if(PairedReg.getNode()) {
OpChanged[OpChanged.size() -1 ] = true;
Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
- Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
+ if (IsTiedToChangedOp)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
+ else
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
// Replace the current flag.
AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
Flag, MVT::i32);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index caec11e..76a0a83 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -48,6 +48,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
+#include <utility>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
@@ -174,9 +175,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- if (Subtarget->isTargetDarwin()) {
+ if (Subtarget->isTargetIOS()) {
// Uses VFP for Thumb libfuncs if available.
- if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
+ Subtarget->hasARMOps()) {
// Single-precision floating-point arithmetic.
setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
@@ -421,7 +423,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
- if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
+ if (Subtarget->getTargetTriple().isiOS() &&
!Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
@@ -452,6 +454,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
@@ -564,16 +567,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
- // Custom expand long extensions to vectors.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
-
// NEON does not have single instruction CTPOP for vectors with element
// types wider than 8-bits. However, custom lowering can leverage the
// v8i8/v16i8 vcnt instruction.
@@ -750,12 +743,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
- // FIXME: This should be checking for v6k, not just v6.
- if (Subtarget->hasDataBarrier() ||
- (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
- // membarrier needs custom lowering; the rest are legal and handled
- // normally.
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
+ // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and
+ // handled normally.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom lowering for 64-bit ops
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
@@ -768,11 +759,20 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
- // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
- setInsertFencesForAtomic(true);
+ // On v8, we have particularly efficient implementations of atomic fences
+ // if they can be combined with nearby atomic loads and stores.
+ if (!Subtarget->hasV8Ops()) {
+ // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+ setInsertFencesForAtomic(true);
+ }
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
} else {
+ // If there's anything we can use as a barrier, go through custom lowering
+ // for ATOMIC_FENCE.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
+ Subtarget->hasAnyDataBarrier() ? Custom : Expand);
+
// Set them all for expansion, which will force libcalls.
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
@@ -869,6 +869,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
}
}
+
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+ // For iOS, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
@@ -908,6 +920,44 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
+static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
+ bool isThumb2, unsigned &LdrOpc,
+ unsigned &StrOpc) {
+ static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB},
+ {ARM::LDREXH, ARM::t2LDREXH},
+ {ARM::LDREX, ARM::t2LDREX},
+ {ARM::LDREXD, ARM::t2LDREXD}};
+ static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB},
+ {ARM::LDAEXH, ARM::t2LDAEXH},
+ {ARM::LDAEX, ARM::t2LDAEX},
+ {ARM::LDAEXD, ARM::t2LDAEXD}};
+ static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB},
+ {ARM::STREXH, ARM::t2STREXH},
+ {ARM::STREX, ARM::t2STREX},
+ {ARM::STREXD, ARM::t2STREXD}};
+ static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB},
+ {ARM::STLEXH, ARM::t2STLEXH},
+ {ARM::STLEX, ARM::t2STLEX},
+ {ARM::STLEXD, ARM::t2STLEXD}};
+
+ const unsigned (*LoadOps)[2], (*StoreOps)[2];
+ if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ LoadOps = LoadAcqs;
+ else
+ LoadOps = LoadBares;
+
+ if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ StoreOps = StoreRels;
+ else
+ StoreOps = StoreBares;
+
+ assert(isPowerOf2_32(Size) && Size <= 8 &&
+ "unsupported size for atomic binary op!");
+
+ LdrOpc = LoadOps[Log2_32(Size)][isThumb2];
+ StrOpc = StoreOps[Log2_32(Size)][isThumb2];
+}
+
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
@@ -970,6 +1020,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMN: return "ARMISD::CMN";
@@ -1009,7 +1060,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
- case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
@@ -1068,6 +1118,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::VMAXNM: return "ARMISD::VMAX";
+ case ARMISD::VMINNM: return "ARMISD::VMIN";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
@@ -1092,19 +1144,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
-
- case ARMISD::ATOMADD64_DAG: return "ATOMADD64_DAG";
- case ARMISD::ATOMSUB64_DAG: return "ATOMSUB64_DAG";
- case ARMISD::ATOMOR64_DAG: return "ATOMOR64_DAG";
- case ARMISD::ATOMXOR64_DAG: return "ATOMXOR64_DAG";
- case ARMISD::ATOMAND64_DAG: return "ATOMAND64_DAG";
- case ARMISD::ATOMNAND64_DAG: return "ATOMNAND64_DAG";
- case ARMISD::ATOMSWAP64_DAG: return "ATOMSWAP64_DAG";
- case ARMISD::ATOMCMPXCHG64_DAG: return "ATOMCMPXCHG64_DAG";
- case ARMISD::ATOMMIN64_DAG: return "ATOMMIN64_DAG";
- case ARMISD::ATOMUMIN64_DAG: return "ATOMUMIN64_DAG";
- case ARMISD::ATOMMAX64_DAG: return "ATOMMAX64_DAG";
- case ARMISD::ATOMUMAX64_DAG: return "ATOMUMAX64_DAG";
}
}
@@ -1536,7 +1575,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
- false, false, false, 0);
+ false, false, false,
+ DAG.InferPtrAlignment(AddArg));
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
@@ -1745,24 +1785,26 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const uint32_t *Mask;
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
- if (isThisReturn) {
- // For 'this' returns, use the R0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(CallConv);
- if (!Mask) {
- // Set isThisReturn to false if the calling convention is not one that
- // allows 'returned' to be modeled in this way, so LowerCallResult does
- // not try to pass 'this' straight through
- isThisReturn = false;
+ if (!isTailCall) {
+ const uint32_t *Mask;
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+ if (isThisReturn) {
+ // For 'this' returns, use the R0-preserving mask if applicable
+ Mask = ARI->getThisReturnPreservedMask(CallConv);
+ if (!Mask) {
+ // Set isThisReturn to false if the calling convention is not one that
+ // allows 'returned' to be modeled in this way, so LowerCallResult does
+ // not try to pass 'this' straight through
+ isThisReturn = false;
+ Mask = ARI->getCallPreservedMask(CallConv);
+ }
+ } else
Mask = ARI->getCallPreservedMask(CallConv);
- }
- } else
- Mask = ARI->getCallPreservedMask(CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -1933,6 +1975,12 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isVarArg && !Outs.empty())
return false;
+ // Exception-handling functions need a special set of instructions to indicate
+ // a return to the hardware. Tail-calling another function would probably
+ // break this.
+ if (CallerF->hasFnAttribute("interrupt"))
+ return false;
+
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
@@ -2061,6 +2109,39 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
isVarArg));
}
+static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
+ SDLoc DL, SelectionDAG &DAG) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+
+ StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
+
+ // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
+ // version of the "preferred return address". These offsets affect the return
+ // instruction if this is a return from PL1 without hypervisor extensions.
+ // IRQ/FIQ: +4 "subs pc, lr, #4"
+ // SWI: 0 "subs pc, lr, #0"
+ // ABORT: +4 "subs pc, lr, #4"
+ // UNDEF: +4/+2 "subs pc, lr, #0"
+ // UNDEF varies depending on where the exception came from ARM or Thumb
+ // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
+
+ int64_t LROffset;
+ if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
+ IntKind == "ABORT")
+ LROffset = 4;
+ else if (IntKind == "SWI" || IntKind == "UNDEF")
+ LROffset = 0;
+ else
+ report_fatal_error("Unsupported interrupt attribute. If present, value "
+ "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
+
+ RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
+
+ return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other,
+ RetOps.data(), RetOps.size());
+}
+
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -2146,6 +2227,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
+ // CPUs which aren't M-class use a special sequence to return from
+ // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
+ // though we use "subs pc, lr, #N").
+ //
+ // M-class CPUs actually use a normal return sequence with a special
+ // (hardware-provided) value in LR, so the normal code path works.
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
+ !Subtarget->isMClass()) {
+ if (Subtarget->isThumb1Only())
+ report_fatal_error("interrupt attribute is not supported in Thumb1");
+ return LowerInterruptReturn(RetOps, dl, DAG);
+ }
+
return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
RetOps.data(), RetOps.size());
}
@@ -2202,7 +2296,8 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
- if (UI->getOpcode() != ARMISD::RET_FLAG)
+ if (UI->getOpcode() != ARMISD::RET_FLAG &&
+ UI->getOpcode() != ARMISD::INTRET_FLAG)
return false;
HasRet = true;
}
@@ -2589,7 +2684,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
- "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
}
@@ -2597,14 +2692,18 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
unsigned Domain = ARM_MB::ISH;
- if (Subtarget->isSwift() && Ord == Release) {
+ if (Subtarget->isMClass()) {
+ // Only a full system barrier exists in the M-class architectures.
+ Domain = ARM_MB::SY;
+ } else if (Subtarget->isSwift() && Ord == Release) {
// Swift happens to implement ISHST barriers in a way that's compatible with
// Release semantics but weaker than ISH so we'd be fools not to use
// it. Beware: other processors probably don't!
Domain = ARM_MB::ISHST;
}
- return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(Intrinsic::arm_dmb, MVT::i32),
DAG.getConstant(Domain, MVT::i32));
}
@@ -3177,6 +3276,61 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SelectTrue, SelectFalse, ISD::SETNE);
}
+static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
+ if (CC == ISD::SETNE)
+ return ISD::SETEQ;
+ return ISD::getSetCCSwappedOperands(CC);
+}
+
+static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ bool &swpCmpOps, bool &swpVselOps) {
+ // Start by selecting the GE condition code for opcodes that return true for
+ // 'equality'
+ if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
+ CC == ISD::SETULE)
+ CondCode = ARMCC::GE;
+
+ // and GT for opcodes that return false for 'equality'.
+ else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ CondCode = ARMCC::GT;
+
+ // Since we are constrained to GE/GT, if the opcode contains 'less', we need
+ // to swap the compare operands.
+ if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ swpCmpOps = true;
+
+ // Both GT and GE are ordered comparisons, and return false for 'unordered'.
+ // If we have an unordered opcode, we need to swap the operands to the VSEL
+ // instruction (effectively negating the condition).
+ //
+ // This also has the effect of swapping which one of 'less' or 'greater'
+ // returns true, so we also swap the compare operands. It also switches
+ // whether we return true for 'equality', so we compensate by picking the
+ // opposite condition code to our original choice.
+ if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
+ CC == ISD::SETUGT) {
+ swpCmpOps = !swpCmpOps;
+ swpVselOps = !swpVselOps;
+ CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
+ }
+
+ // 'ordered' is 'anything but unordered', so use the VS condition code and
+ // swap the VSEL operands.
+ if (CC == ISD::SETO) {
+ CondCode = ARMCC::VS;
+ swpVselOps = true;
+ }
+
+ // 'unordered or not equal' is 'anything but equal', so use the EQ condition
+ // code and swap the VSEL operands.
+ if (CC == ISD::SETUNE) {
+ CondCode = ARMCC::EQ;
+ swpVselOps = true;
+ }
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -3187,15 +3341,66 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
if (LHS.getValueType() == MVT::i32) {
+ // Try to generate VSEL on ARMv8.
+ // The VSEL instruction can't use all the usual ARM condition
+ // codes: it only has two bits to select the condition code, so it's
+ // constrained to use only GE, GT, VS and EQ.
+ //
+ // To implement all the various ISD::SETXXX opcodes, we sometimes need to
+ // swap the operands of the previous compare instruction (effectively
+ // inverting the compare condition, swapping 'less' and 'greater') and
+ // sometimes need to swap the operands to the VSEL (which inverts the
+ // condition in the sense of firing whenever the previous condition didn't)
+ if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
+ CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
+ CC = getInverseCCForVSEL(CC);
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+ Cmp);
}
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
+ // Try to generate VSEL on ARMv8.
+ if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ // We can select VMAXNM/VMINNM from a compare followed by a select with the
+ // same operands, as follows:
+ // c = fcmp [ogt, olt, ugt, ult] a, b
+ // select c, a, b
+ // We only do this in unsafe-fp-math, because signed zeros and NaNs are
+ // handled differently than the original code sequence.
+ if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
+ RHS == FalseVal) {
+ if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+ if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ }
+
+ bool swpCmpOps = false;
+ bool swpVselOps = false;
+ checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
+
+ if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
+ CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
+ if (swpCmpOps)
+ std::swap(LHS, RHS);
+ if (swpVselOps)
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
@@ -3627,47 +3832,6 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
-/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
-/// and size(DestVec) > 128-bits.
-/// This is achieved by doing the one extension from the SrcVec, splitting the
-/// result, extending these parts, and then concatenating these into the
-/// destination.
-static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
- SDValue Op = N->getOperand(0);
- EVT SrcVT = Op.getValueType();
- EVT DestVT = N->getValueType(0);
-
- assert(DestVT.getSizeInBits() > 128 &&
- "Custom sext/zext expansion needs >128-bit vector.");
- // If this is a normal length extension, use the default expansion.
- if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
- SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
- return SDValue();
-
- SDLoc dl(N);
- unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
- unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
- unsigned NumElts = SrcVT.getVectorNumElements();
- LLVMContext &Ctx = *DAG.getContext();
- SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
-
- EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
- NumElts);
- EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
- NumElts/2);
- EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
- NumElts/2);
-
- Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
- SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
- DAG.getIntPtrConstant(0));
- SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
- DAG.getIntPtrConstant(NumElts/2));
- ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
- ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
-}
-
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -4271,17 +4435,25 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const {
- if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+ if (!ST->hasVFP3())
return SDValue();
+ bool IsDouble = Op.getValueType() == MVT::f64;
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
- assert(Op.getValueType() == MVT::f32 &&
- "ConstantFP custom lowering should only occur for f32.");
// Try splatting with a VMOV.f32...
APFloat FPVal = CFP->getValueAPF();
- int ImmVal = ARM_AM::getFP32Imm(FPVal);
+ int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
+
if (ImmVal != -1) {
+ if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
+ // We have code in place to select a valid ConstantFP already, no need to
+ // do any mangling.
+ return Op;
+ }
+
+ // It's a float and we are trying to use NEON operations where
+ // possible. Lower it to a splat followed by an extract.
SDLoc DL(Op);
SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
@@ -4290,15 +4462,31 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(0, MVT::i32));
}
- // If that fails, try a VMOV.i32
+ // The rest of our options are NEON only, make sure that's allowed before
+ // proceeding..
+ if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
+ return SDValue();
+
EVT VMovVT;
- unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
- SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
- VMOVModImm);
+ uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
+
+ // It wouldn't really be worth bothering for doubles except for one very
+ // important value, which does happen to match: 0.0. So make sure we don't do
+ // anything stupid.
+ if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
+ return SDValue();
+
+ // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
+ SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+ false, VMOVModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
NewVal);
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
@@ -4306,11 +4494,16 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
}
// Finally, try a VMVN.i32
- NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
- VMVNModImm);
+ NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+ false, VMVNModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
@@ -5769,6 +5962,70 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
+SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin());
+
+ // For iOS, we want to call an alternative entry point: __sincos_stret,
+ // return values are passed via sret.
+ SDLoc dl(Op);
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Pair of floats / doubles used to pass the result.
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+
+ // Create stack object for sret.
+ const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
+ const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+
+ Entry.Node = SRet;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isSRet = true;
+ Args.push_back(Entry);
+
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ const char *LibcallName = (ArgVT == MVT::f64)
+ ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0,
+ CallingConv::C, /*isTaillCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed*/false,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
+ MachinePointerInfo(), false, false, false, 0);
+
+ // Address of cos field.
+ SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
+ DAG.getIntPtrConstant(ArgVT.getStoreSize()));
+ SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
+ MachinePointerInfo(), false, false, false, 0);
+
+ SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
+ LoadSin.getValue(0), LoadCos.getValue(0));
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
// Monotonic load/store is legal for all targets
if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
@@ -5781,32 +6038,28 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
static void
ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
- SelectionDAG &DAG, unsigned NewOp) {
+ SelectionDAG &DAG) {
SDLoc dl(Node);
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
SmallVector<SDValue, 6> Ops;
Ops.push_back(Node->getOperand(0)); // Chain
Ops.push_back(Node->getOperand(1)); // Ptr
- // Low part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(0)));
- // High part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(1)));
- if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
- // High part of Val1
+ for(unsigned i=2; i<Node->getNumOperands(); i++) {
+ // Low part
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(3), DAG.getIntPtrConstant(0)));
- // High part of Val2
+ Node->getOperand(i), DAG.getIntPtrConstant(0)));
+ // High part
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(3), DAG.getIntPtrConstant(1)));
+ Node->getOperand(i), DAG.getIntPtrConstant(1)));
}
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue Result =
- DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
- cast<MemSDNode>(Node)->getMemOperand());
+ DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(),
+ cast<MemSDNode>(Node)->getMemOperand(), AN->getOrdering(),
+ AN->getSynchScope());
SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
Results.push_back(Result.getValue(2));
@@ -5904,6 +6157,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
case ISD::SDIVREM:
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
}
@@ -5921,10 +6175,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- Res = ExpandVectorExtension(N, DAG);
- break;
case ISD::SRL:
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
@@ -5932,41 +6182,21 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
+ case ISD::ATOMIC_STORE:
+ case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_LOAD_ADD:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
- return;
case ISD::ATOMIC_LOAD_AND:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_NAND:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_OR:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
- return;
case ISD::ATOMIC_LOAD_SUB:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
- return;
case ISD::ATOMIC_LOAD_XOR:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
- return;
case ISD::ATOMIC_SWAP:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
- return;
case ISD::ATOMIC_CMP_SWAP:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
- return;
case ISD::ATOMIC_LOAD_MIN:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
- return;
case ISD::ATOMIC_LOAD_UMIN:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
- return;
case ISD::ATOMIC_LOAD_MAX:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
- return;
case ISD::ATOMIC_LOAD_UMAX:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
+ ReplaceATOMIC_OP_64(N, Results, DAG);
return;
}
if (Res.getNode())
@@ -5986,6 +6216,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
unsigned oldval = MI->getOperand(2).getReg();
unsigned newval = MI->getOperand(3).getReg();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6001,21 +6232,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
}
unsigned ldrOpc, strOpc;
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- break;
- case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- break;
- case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
- break;
- }
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -6095,6 +6312,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6102,24 +6320,11 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
if (isThumb2) {
MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc;
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- break;
- case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- break;
- case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
- break;
- }
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -6203,6 +6408,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
unsigned oldval = dest;
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6210,24 +6416,20 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
if (isThumb2) {
MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc, extendOpc;
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!");
case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
break;
case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
break;
case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
extendOpc = 0;
break;
}
@@ -6271,7 +6473,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
// Sign extend the value, if necessary.
if (signExtend && extendOpc) {
- oldval = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass
+ : &ARM::GPRnopcRegClass);
+ if (!isThumb2)
+ MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass);
AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
.addReg(dest)
.addImm(0));
@@ -6309,7 +6514,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Op1, unsigned Op2,
bool NeedsCarry, bool IsCmpxchg,
bool IsMinMax, ARMCC::CondCodes CC) const {
- // This also handles ATOMIC_SWAP, indicated by Op1==0.
+ // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -6317,11 +6522,15 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MachineFunction::iterator It = BB;
++It;
+ bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64);
+ unsigned offset = (isStore ? -2 : 0);
unsigned destlo = MI->getOperand(0).getReg();
unsigned desthi = MI->getOperand(1).getReg();
- unsigned ptr = MI->getOperand(2).getReg();
- unsigned vallo = MI->getOperand(3).getReg();
- unsigned valhi = MI->getOperand(4).getReg();
+ unsigned ptr = MI->getOperand(offset+2).getReg();
+ unsigned vallo = MI->getOperand(offset+3).getReg();
+ unsigned valhi = MI->getOperand(offset+4).getReg();
+ unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5);
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(OrdIdx).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6330,8 +6539,13 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(vallo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(valhi, &ARM::rGPRRegClass);
}
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
+
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *contBB = 0, *cont2BB = 0;
if (IsCmpxchg || IsMinMax)
@@ -6371,21 +6585,23 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// fallthrough --> exitMBB
BB = loopMBB;
- // Load
- if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
- .addReg(destlo, RegState::Define)
- .addReg(desthi, RegState::Define)
- .addReg(ptr));
- } else {
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
- .addReg(GPRPair0, RegState::Define).addReg(ptr));
- // Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
+ if (!isStore) {
+ // Load
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr));
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(GPRPair0, RegState::Define).addReg(ptr));
+ // Copy r2/r3 into dest. (This copy will normally be coalesced.)
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
}
unsigned StoreLo, StoreHi;
@@ -6437,7 +6653,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// Store
if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
+ MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(StoreLo).addReg(StoreHi).addReg(ptr));
} else {
// Marshal a pair...
@@ -6455,7 +6673,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
.addImm(ARM::gsub_1);
// ...and store it
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(StorePair).addReg(ptr));
}
// Cmp+jump
@@ -6476,6 +6694,51 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const {
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ unsigned destlo = MI->getOperand(0).getReg();
+ unsigned desthi = MI->getOperand(1).getReg();
+ unsigned ptr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ }
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
+
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc));
+
+ if (isThumb2) {
+ MIB.addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr);
+
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ MIB.addReg(GPRPair0, RegState::Define).addReg(ptr);
+
+ // Copy GPRPair0 into dest. (This copy will normally be coalesced.)
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
+ AddDefaultPred(MIB);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::
@@ -7007,8 +7270,109 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
llvm_unreachable("Expecting a BB with two successors!");
}
-MachineBasicBlock *ARMTargetLowering::
-EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
+/// Return the load opcode for a given load size. If load size >= 8,
+/// neon opcode will be returned.
+static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
+ if (LdSize >= 8)
+ return LdSize == 16 ? ARM::VLD1q32wb_fixed
+ : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
+ if (IsThumb1)
+ return LdSize == 4 ? ARM::tLDRi
+ : LdSize == 2 ? ARM::tLDRHi
+ : LdSize == 1 ? ARM::tLDRBi : 0;
+ if (IsThumb2)
+ return LdSize == 4 ? ARM::t2LDR_POST
+ : LdSize == 2 ? ARM::t2LDRH_POST
+ : LdSize == 1 ? ARM::t2LDRB_POST : 0;
+ return LdSize == 4 ? ARM::LDR_POST_IMM
+ : LdSize == 2 ? ARM::LDRH_POST
+ : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
+}
+
+/// Return the store opcode for a given store size. If store size >= 8,
+/// neon opcode will be returned.
+static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
+ if (StSize >= 8)
+ return StSize == 16 ? ARM::VST1q32wb_fixed
+ : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
+ if (IsThumb1)
+ return StSize == 4 ? ARM::tSTRi
+ : StSize == 2 ? ARM::tSTRHi
+ : StSize == 1 ? ARM::tSTRBi : 0;
+ if (IsThumb2)
+ return StSize == 4 ? ARM::t2STR_POST
+ : StSize == 2 ? ARM::t2STRH_POST
+ : StSize == 1 ? ARM::t2STRB_POST : 0;
+ return StSize == 4 ? ARM::STR_POST_IMM
+ : StSize == 2 ? ARM::STRH_POST
+ : StSize == 1 ? ARM::STRB_POST_IMM : 0;
+}
+
+/// Emit a post-increment load operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned LdSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
+ assert(LdOpc != 0 && "Should have a load opcode");
+ if (LdSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(0));
+ } else if (IsThumb1) {
+ // load + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrIn).addImm(0));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(LdSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(LdSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addReg(0).addImm(LdSize));
+ }
+}
+
+/// Emit a post-increment store operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned StSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
+ assert(StOpc != 0 && "Should have a store opcode");
+ if (StSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(AddrIn).addImm(0).addReg(Data));
+ } else if (IsThumb1) {
+ // store + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
+ .addReg(AddrIn).addImm(0));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(StSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addImm(StSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addReg(0)
+ .addImm(StSize));
+ }
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
@@ -7023,23 +7387,18 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
unsigned Align = MI->getOperand(3).getImm();
DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned ldrOpc, strOpc, UnitSize = 0;
+ unsigned UnitSize = 0;
+ const TargetRegisterClass *TRC = 0;
+ const TargetRegisterClass *VecTRC = 0;
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- const TargetRegisterClass *TRC_Vec = 0;
+ bool IsThumb1 = Subtarget->isThumb1Only();
+ bool IsThumb2 = Subtarget->isThumb2();
if (Align & 1) {
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
UnitSize = 1;
} else if (Align & 2) {
- ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
- strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
@@ -7047,27 +7406,27 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
hasAttribute(AttributeSet::FunctionIndex,
Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
- if ((Align % 16 == 0) && SizeVal >= 16) {
- ldrOpc = ARM::VLD1q32wb_fixed;
- strOpc = ARM::VST1q32wb_fixed;
+ if ((Align % 16 == 0) && SizeVal >= 16)
UnitSize = 16;
- TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
- }
- else if ((Align % 8 == 0) && SizeVal >= 8) {
- ldrOpc = ARM::VLD1d32wb_fixed;
- strOpc = ARM::VST1d32wb_fixed;
+ else if ((Align % 8 == 0) && SizeVal >= 8)
UnitSize = 8;
- TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
- }
}
// Can't use NEON instructions.
- if (UnitSize == 0) {
- ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+ if (UnitSize == 0)
UnitSize = 4;
- }
}
+ // Select the correct opcode and register class for unit size load/store
+ bool IsNeon = UnitSize >= 8;
+ TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass
+ : (const TargetRegisterClass *)&ARM::GPRRegClass;
+ if (IsNeon)
+ VecTRC = UnitSize == 16
+ ? (const TargetRegisterClass *)&ARM::DPairRegClass
+ : UnitSize == 8
+ ? (const TargetRegisterClass *)&ARM::DPRRegClass
+ : 0;
+
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
@@ -7078,34 +7437,13 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
- unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (UnitSize >= 8) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(destIn).addImm(0).addReg(scratch));
- } else if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(UnitSize));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
- .addImm(UnitSize));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(UnitSize));
- }
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
@@ -7113,30 +7451,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// Handle the leftover bytes with LDRB and STRB.
// [scratch, srcOut] = LDRB_POST(srcIn, 1)
// [destOut] = STRB_POST(scratch, destIn, 1)
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn)
- .addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
@@ -7177,17 +7499,16 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- unsigned VReg1 = varEnd;
+ if (IsThumb2) {
+ unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
- VReg1 = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
- .addImm(LoopSize & 0xFFFF));
+ Vtmp = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
+ .addImm(LoopSize & 0xFFFF));
if ((LoopSize & 0xFFFF0000) != 0)
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
- .addReg(VReg1)
- .addImm(LoopSize >> 16));
+ .addReg(Vtmp).addImm(LoopSize >> 16));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -7199,10 +7520,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
Align = getDataLayout()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
- .addReg(varEnd, RegState::Define)
- .addConstantPoolIndex(Idx)
- .addImm(0));
+ if (IsThumb1)
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx));
+ else
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
}
BB->addSuccessor(loopMBB);
@@ -7231,39 +7554,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
- unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
- if (UnitSize >= 8) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(destPhi).addImm(0).addReg(scratch));
- } else if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addImm(UnitSize));
- } else {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
- .addImm(UnitSize));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addReg(0).addImm(UnitSize));
- }
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
+ IsThumb1, IsThumb2);
// Decrement loop variable by UnitSize.
- MachineInstrBuilder MIB = BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
- AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
- MIB->getOperand(5).setReg(ARM::CPSR);
- MIB->getOperand(5).setIsDef(true);
-
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ if (IsThumb1) {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(varPhi).addImm(UnitSize);
+ AddDefaultPred(MIB);
+ } else {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+ }
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
// loopMBB can loop back to loopMBB or fall through to exitMBB.
BB->addSuccessor(loopMBB);
@@ -7272,34 +7586,19 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// Add epilogue to handle BytesLeft.
BB = exitMBB;
MachineInstr *StartOfExit = exitMBB->begin();
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
unsigned srcIn = srcLoop;
unsigned destIn = destLoop;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
@@ -7449,46 +7748,49 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+ case ARM::ATOMIC_LOAD_I64:
+ return EmitAtomicLoad64(MI, BB);
- case ARM::ATOMADD6432:
+ case ARM::ATOMIC_LOAD_ADD_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
/*NeedsCarry*/ true);
- case ARM::ATOMSUB6432:
+ case ARM::ATOMIC_LOAD_SUB_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true);
- case ARM::ATOMOR6432:
+ case ARM::ATOMIC_LOAD_OR_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMXOR6432:
+ case ARM::ATOMIC_LOAD_XOR_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMAND6432:
+ case ARM::ATOMIC_LOAD_AND_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMSWAP6432:
+ case ARM::ATOMIC_STORE_I64:
+ case ARM::ATOMIC_SWAP_I64:
return EmitAtomicBinary64(MI, BB, 0, 0, false);
- case ARM::ATOMCMPXCHG6432:
+ case ARM::ATOMIC_CMP_SWAP_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ false, /*IsCmpxchg*/true);
- case ARM::ATOMMIN6432:
+ case ARM::ATOMIC_LOAD_MIN_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::LT);
- case ARM::ATOMMAX6432:
+ case ARM::ATOMIC_LOAD_MAX_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::GE);
- case ARM::ATOMUMIN6432:
+ case ARM::ATOMIC_LOAD_UMIN_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::LO);
- case ARM::ATOMUMAX6432:
+ case ARM::ATOMIC_LOAD_UMAX_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
@@ -8197,6 +8499,13 @@ static SDValue PerformSUBCombine(SDNode *N,
/// is faster than
/// vadd d3, d0, d1
/// vmul d3, d3, d2
+// However, for (A + B) * (A + B),
+// vadd d2, d0, d1
+// vmul d3, d0, d2
+// vmla d3, d1, d2
+// is slower than
+// vadd d2, d0, d1
+// vmul d3, d2, d2
static SDValue PerformVMULCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
@@ -8216,6 +8525,9 @@ static SDValue PerformVMULCombine(SDNode *N,
std::swap(N0, N1);
}
+ if (N0 == N1)
+ return SDValue();
+
EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue N00 = N0->getOperand(0);
@@ -10548,6 +10860,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
case 'r':
return RCPair(0U, &ARM::GPRRegClass);
case 'w':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
@@ -10556,6 +10870,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return RCPair(0U, &ARM::QPRRegClass);
break;
case 'x':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPR_8RegClass);
if (VT.getSizeInBits() == 64)
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 44c769f..90facdd 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -52,6 +52,7 @@ namespace llvm {
BR_JT, // Jumptable branch.
BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
RET_FLAG, // Return with a flag operand.
+ INTRET_FLAG, // Interrupt return with an LR-offset and a flag operand.
PIC_ADD, // Add with a PC operand and a PIC label.
@@ -94,7 +95,6 @@ namespace llvm {
DYN_ALLOC, // Dynamic allocation on the stack.
- MEMBARRIER, // Memory barrier (DMB)
MEMBARRIER_MCR, // Memory barrier (MCR)
PRELOAD, // Preload
@@ -186,6 +186,8 @@ namespace llvm {
// Floating-point max and min:
FMAX,
FMIN,
+ VMAXNM,
+ VMINNM,
// Bit-field insert
BFI,
@@ -222,21 +224,7 @@ namespace llvm {
VST4_UPD,
VST2LN_UPD,
VST3LN_UPD,
- VST4LN_UPD,
-
- // 64-bit atomic ops (value split into two registers)
- ATOMADD64_DAG,
- ATOMSUB64_DAG,
- ATOMOR64_DAG,
- ATOMXOR64_DAG,
- ATOMAND64_DAG,
- ATOMNAND64_DAG,
- ATOMSWAP64_DAG,
- ATOMCMPXCHG64_DAG,
- ATOMMIN64_DAG,
- ATOMUMIN64_DAG,
- ATOMMAX64_DAG,
- ATOMUMAX64_DAG
+ VST4LN_UPD
};
}
@@ -375,6 +363,12 @@ namespace llvm {
/// be used for loads / stores from the global.
virtual unsigned getMaximalGlobalOffset() const;
+ /// Returns true if a cast between SrcAS and DestAS is a noop.
+ virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
+ // Addrspacecasts are always noops.
+ return true;
+ }
+
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -460,6 +454,7 @@ namespace llvm {
const ARMSubtarget *ST) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const;
+ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
@@ -573,6 +568,8 @@ namespace llvm {
unsigned Size,
bool signExtend,
ARMCC::CondCodes Cond) const;
+ MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
void SetupEntryBlockForSjLj(MachineInstr *MI,
MachineBasicBlock *MBB,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 1349476..f93504f 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -155,6 +155,16 @@ def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm),
let DecoderMethod = "DecodePredicateOperand";
}
+// Selectable predicate operand for CMOV instructions. We can't use a normal
+// predicate because the default values interfere with instruction selection. In
+// all other respects it is identical though: pseudo-instruction expansion
+// relies on the MachineOperands being compatible.
+def cmovpred : Operand<i32>, PredicateOp,
+ ComplexPattern<i32, 2, "SelectCMOVPred"> {
+ let MIOperandInfo = (ops i32imm, i32imm);
+ let PrintMethod = "printPredicateOperand";
+}
+
// Conditional code result for instructions whose 's' bit is set, e.g. subs.
def CCOutOperand : AsmOperandClass { let Name = "CCOut"; }
def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
@@ -237,6 +247,8 @@ class t2InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>;
class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
+class VFP2DPInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2,HasDPVFP]>;
class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
@@ -490,8 +502,7 @@ class JTI<dag oops, dag iops, InstrItinClass itin,
: XI<oops, iops, AddrModeNone, 0, IndexModeNone, BrMiscFrm, itin,
asm, "", pattern>;
-// Atomic load/store instructions
-class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+class AIldr_ex_or_acq<bits<2> opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
@@ -502,23 +513,52 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{20} = 1;
let Inst{19-16} = addr;
let Inst{15-12} = Rt;
- let Inst{11-0} = 0b111110011111;
+ let Inst{11-10} = 0b11;
+ let Inst{9-8} = opcod2;
+ let Inst{7-0} = 0b10011111;
}
-class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+class AIstr_ex_or_rel<bits<2> opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
- bits<4> Rd;
bits<4> Rt;
bits<4> addr;
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
let Inst{20} = 0;
let Inst{19-16} = addr;
- let Inst{15-12} = Rd;
- let Inst{11-4} = 0b11111001;
+ let Inst{11-10} = 0b11;
+ let Inst{9-8} = opcod2;
+ let Inst{7-4} = 0b1001;
let Inst{3-0} = Rt;
}
+// Atomic load/store instructions
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIldr_ex_or_acq<opcod, 0b11, oops, iops, itin, opc, asm, pattern>;
+
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIstr_ex_or_rel<opcod, 0b11, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{15-12} = Rd;
+}
+
+// Exclusive load/store instructions
+
+class AIldaex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIldr_ex_or_acq<opcod, 0b10, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]>;
+
+class AIstlex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIstr_ex_or_rel<opcod, 0b10, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]> {
+ bits<4> Rd;
+ let Inst{15-12} = Rd;
+}
+
class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
: AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, $addr", pattern> {
bits<4> Rt;
@@ -535,6 +575,18 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
let Unpredictable{11-8} = 0b1111;
let DecoderMethod = "DecodeSwap";
}
+// Acquire/Release load/store instructions
+class AIldracq<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIldr_ex_or_acq<opcod, 0b00, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]>;
+
+class AIstrrel<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIstr_ex_or_rel<opcod, 0b00, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]> {
+ let Inst{15-12} = 0b1111;
+}
// addrmode1 instructions
class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
@@ -1520,6 +1572,8 @@ class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
let Inst{8} = 1; // Double precision
let Inst{7-6} = opcod4;
let Inst{4} = opcod5;
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
// Double precision, unary, not-predicated
@@ -1572,6 +1626,8 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{8} = 1; // Double precision
let Inst{6} = op6;
let Inst{4} = op4;
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
// FP, binary, not predicated
@@ -1601,6 +1657,8 @@ class ADbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
let Inst{8} = 1; // double precision
let Inst{6} = opcod3;
let Inst{4} = 0;
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
// Single precision, unary, predicated
@@ -1965,7 +2023,7 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
}
// Same as N2V but not predicated.
-class N2Vnp<bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+class N2Vnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
dag oops, dag iops, InstrItinClass itin, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy, list<dag> pattern>
: NeonInp<oops, iops, AddrModeNone, IndexModeNone, N2RegFrm, itin,
@@ -1982,7 +2040,7 @@ class N2Vnp<bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
// Encode constant bits
let Inst{27-23} = 0b00111;
let Inst{21-20} = 0b11;
- let Inst{19-18} = 0b10;
+ let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
let Inst{11} = 0;
let Inst{10-8} = op10_8;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 8cdb853..df867b4 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -119,17 +120,29 @@ namespace {
MachineBasicBlock &FirstMBB = MF.front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
- unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ unsigned TempReg =
+ MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
ARM::t2LDRpci : ARM::LDRcp;
const TargetInstrInfo &TII = *TM->getInstrInfo();
MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
- TII.get(Opc), GlobalBaseReg)
+ TII.get(Opc), TempReg)
.addConstantPoolIndex(Idx);
if (Opc == ARM::LDRcp)
MIB.addImm(0);
AddDefaultPred(MIB);
+ // Fix the GOT address by adding pc.
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? ARM::tPICADD
+ : ARM::PICADD;
+ MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg)
+ .addReg(TempReg)
+ .addImm(ARMPCLabelIndex);
+ if (Opc == ARM::PICADD)
+ AddDefaultPred(MIB);
+
+
return true;
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c243402..2042c04 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -71,6 +71,9 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
+def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
+
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
@@ -118,7 +121,8 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-
+def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
@@ -162,8 +166,6 @@ def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp,
[SDNPHasChain, SDNPSideEffect]>;
-def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
- [SDNPHasChain, SDNPSideEffect]>;
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
[SDNPHasChain, SDNPSideEffect]>;
def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
@@ -174,9 +176,11 @@ def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-
def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>;
+def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@@ -189,6 +193,9 @@ def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
AssemblerPredicate<"HasV6Ops", "armv6">;
def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6M : Predicate<"Subtarget->hasV6MOps()">,
+ AssemblerPredicate<"HasV6MOps",
+ "armv6m or armv6t2">;
def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
@@ -196,6 +203,8 @@ def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
AssemblerPredicate<"HasV7Ops", "armv7">;
def HasV8 : Predicate<"Subtarget->hasV8Ops()">,
AssemblerPredicate<"HasV8Ops", "armv8">;
+def PreV8 : Predicate<"!Subtarget->hasV8Ops()">,
+ AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -203,16 +212,23 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
AssemblerPredicate<"FeatureVFP3", "VFP3">;
def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
AssemblerPredicate<"FeatureVFP4", "VFP4">;
-def HasV8FP : Predicate<"Subtarget->hasV8FP()">,
- AssemblerPredicate<"FeatureV8FP", "V8FP">;
+def HasDPVFP : Predicate<"!Subtarget->isFPOnlySP()">,
+ AssemblerPredicate<"!FeatureVFPOnlySP",
+ "double precision VFP">;
+def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
+ AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "NEON">;
+def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
+ AssemblerPredicate<"FeatureCrypto", "crypto">;
+def HasCRC : Predicate<"Subtarget->hasCRC()">,
+ AssemblerPredicate<"FeatureCRC", "crc">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
- AssemblerPredicate<"FeatureHWDiv", "divide">;
+ AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
- AssemblerPredicate<"FeatureHWDivARM">;
+ AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
AssemblerPredicate<"FeatureT2XtPk",
"pack/extract">;
@@ -237,10 +253,10 @@ def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
AssemblerPredicate<"ModeThumb,FeatureThumb2",
"thumb2">;
def IsMClass : Predicate<"Subtarget->isMClass()">,
- AssemblerPredicate<"FeatureMClass", "armv7m">;
-def IsARClass : Predicate<"!Subtarget->isMClass()">,
+ AssemblerPredicate<"FeatureMClass", "armv*m">;
+def IsNotMClass : Predicate<"!Subtarget->isMClass()">,
AssemblerPredicate<"!FeatureMClass",
- "armv7a/r">;
+ "!armv*m">;
def IsARM : Predicate<"!Subtarget->isThumb()">,
AssemblerPredicate<"!ModeThumb", "arm-mode">;
def IsIOS : Predicate<"Subtarget->isTargetIOS()">;
@@ -587,17 +603,6 @@ def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
-/// imm0_4 predicate - Immediate in the range [0,4].
-def Imm0_4AsmOperand : ImmAsmOperand
-{
- let Name = "Imm0_4";
- let DiagnosticType = "ImmRange0_4";
-}
-def imm0_4 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 5; }]> {
- let ParserMatchClass = Imm0_4AsmOperand;
- let DecoderMethod = "DecodeImm0_4";
-}
-
/// imm0_7 predicate - Immediate in the range [0,7].
def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -677,6 +682,15 @@ def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_63AsmOperand;
}
+/// imm0_239 predicate - Immediate in the range [0,239].
+def Imm0_239AsmOperand : ImmAsmOperand {
+ let Name = "Imm0_239";
+ let DiagnosticType = "ImmRange0_239";
+}
+def imm0_239 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 240; }]> {
+ let ParserMatchClass = Imm0_239AsmOperand;
+}
+
/// imm0_255 predicate - Immediate in the range [0,255].
def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
@@ -708,6 +722,11 @@ def imm0_65535_expr : Operand<i32> {
let ParserMatchClass = Imm0_65535ExprAsmOperand;
}
+def Imm256_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm256_65535Expr"; }
+def imm256_65535_expr : Operand<i32> {
+ let ParserMatchClass = Imm256_65535ExprAsmOperand;
+}
+
/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; }
def imm24b : Operand<i32>, ImmLeaf<i32, [{
@@ -1665,53 +1684,11 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
[(ARMcallseq_start timm:$amt)]>;
}
-// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
-// (These pseudos use a hand-written selection code).
-let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in {
-def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
- GPR:$set1, GPR:$set2),
- NoItinerary, []>;
-def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-}
-
-def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary,
+def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary,
"hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
- bits<3> imm;
- let Inst{27-3} = 0b0011001000001111000000000;
- let Inst{2-0} = imm;
+ bits<8> imm;
+ let Inst{27-8} = 0b00110010000011110000;
+ let Inst{7-0} = imm;
}
def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
@@ -1719,6 +1696,9 @@ def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>;
+
+def : Pat<(int_arm_sevl), (HINT 5)>;
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
"\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
@@ -1746,6 +1726,16 @@ def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
let Inst{7-4} = 0b0111;
}
+def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
+ "hlt", "\t$val", []>, Requires<[IsARM, HasV8]> {
+ bits<16> val;
+ let Inst{3-0} = val{3-0};
+ let Inst{19-8} = val{15-4};
+ let Inst{27-20} = 0b00010000;
+ let Inst{31-28} = 0xe; // AL
+ let Inst{7-4} = 0b0111;
+}
+
// Change Processor State
// FIXME: We should use InstAlias to handle the optional operands.
class CPS<dag iops, string asm_ops>
@@ -1820,7 +1810,7 @@ defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>;
def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary,
- "setend\t$end", []>, Requires<[IsARM]> {
+ "setend\t$end", []>, Requires<[IsARM]>, Deprecated<HasV8Ops> {
bits<1> end;
let Inst{31-10} = 0b1111000100000001000000;
let Inst{9} = end;
@@ -1953,6 +1943,12 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> {
let Inst{27-0} = 0b0001101000001111000000001110;
}
+
+ // Exception return: N.b. doesn't set CPSR as far as we're concerned (it sets
+ // the user-space one).
+ def SUBS_PC_LR : ARMPseudoInst<(outs), (ins i32imm:$offset, pred:$p),
+ 4, IIC_Br,
+ [(ARMintretflag imm:$offset)]>;
}
// Indirect branches
@@ -2283,6 +2279,13 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
[]>, Requires<[IsARM, HasV5TE]>;
}
+def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "lda", "\t$Rt, $addr", []>;
+def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldab", "\t$Rt, $addr", []>;
+def LDAH : AIldracq<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldah", "\t$Rt, $addr", []>;
+
// Indexed loads
multiclass AI2_ldridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
@@ -2825,6 +2828,12 @@ multiclass AI3strT<bits<4> op, string opc> {
defm STRHT : AI3strT<0b1011, "strht">;
+def STL : AIstrrel<0b00, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stl", "\t$Rt, $addr", []>;
+def STLB : AIstrrel<0b10, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlb", "\t$Rt, $addr", []>;
+def STLH : AIstrrel<0b11, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlh", "\t$Rt, $addr", []>;
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
@@ -4014,6 +4023,45 @@ def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
(PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>;
//===----------------------------------------------------------------------===//
+// CRC Instructions
+//
+// Polynomials:
+// + CRC32{B,H,W} 0x04C11DB7
+// + CRC32C{B,H,W} 0x1EDC6F41
+//
+
+class AI_crc32<bit C, bits<2> sz, string suffix, SDPatternOperator builtin>
+ : AInoP<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), MiscFrm, NoItinerary,
+ !strconcat("crc32", suffix), "\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (builtin GPRnopc:$Rn, GPRnopc:$Rm))]>,
+ Requires<[IsARM, HasV8, HasCRC]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-28} = 0b1110;
+ let Inst{27-23} = 0b00010;
+ let Inst{22-21} = sz;
+ let Inst{20} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = 0b00;
+ let Inst{9} = C;
+ let Inst{8} = 0;
+ let Inst{7-4} = 0b0100;
+ let Inst{3-0} = Rm;
+
+ let Unpredictable{11-8} = 0b1101;
+}
+
+def CRC32B : AI_crc32<0, 0b00, "b", int_arm_crc32b>;
+def CRC32CB : AI_crc32<1, 0b00, "cb", int_arm_crc32cb>;
+def CRC32H : AI_crc32<0, 0b01, "h", int_arm_crc32h>;
+def CRC32CH : AI_crc32<1, 0b01, "ch", int_arm_crc32ch>;
+def CRC32W : AI_crc32<0, 0b10, "w", int_arm_crc32w>;
+def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>;
+
+//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
@@ -4139,56 +4187,65 @@ def BCCZi64 : PseudoInst<(outs),
// Conditional moves
-// FIXME: should be able to write a pattern for ARMcmov, but can't use
-// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
let isCommutable = 1, isSelect = 1 in
-def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
+def MOVCCr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, GPR:$Rm, cmovpred:$p),
4, IIC_iCMOVr,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+ [(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_reg_imm:$shift, pred:$p),
- 4, IIC_iCMOVsr,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift,
- imm:$cc, CCR:$ccr))*/]>,
+ (ins GPR:$false, so_reg_imm:$shift, cmovpred:$p),
+ 4, IIC_iCMOVsr,
+ [(set GPR:$Rd,
+ (ARMcmov GPR:$false, so_reg_imm:$shift,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_reg_reg:$shift, pred:$p),
+ (ins GPR:$false, so_reg_reg:$shift, cmovpred:$p),
4, IIC_iCMOVsr,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
- imm:$cc, CCR:$ccr))*/]>,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
-def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, imm0_65535_expr:$imm, pred:$p),
- 4, IIC_iMOVi,
- []>,
+def MOVCCi16
+ : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, imm0_65535_expr:$imm, cmovpred:$p),
+ 4, IIC_iMOVi,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, imm0_65535:$imm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
Sched<[WriteALU]>;
let isMoveImm = 1 in
def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_imm:$imm, pred:$p),
+ (ins GPR:$false, so_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
// Two instruction predicate mov immediate.
let isMoveImm = 1 in
-def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, i32imm:$src, pred:$p),
- 8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+def MOVCCi32imm
+ : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, i32imm:$src, cmovpred:$p),
+ 8, IIC_iCMOVix2,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
let isMoveImm = 1 in
def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_imm:$imm, pred:$p),
+ (ins GPR:$false, so_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
} // neverHasSideEffects
@@ -4221,7 +4278,7 @@ def instsyncb_opt : Operand<i32> {
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
- "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
Requires<[IsARM, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf57ff05;
@@ -4230,7 +4287,7 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
}
def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
- "dsb", "\t$opt", []>,
+ "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
Requires<[IsARM, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf57ff04;
@@ -4246,124 +4303,219 @@ def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary,
let Inst{3-0} = opt;
}
+let usesCustomInserter = 1, Defs = [CPSR] in {
+
// Pseudo instruction that combines movs + predicated rsbmi
// to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR] in
-def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
+ def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
-let usesCustomInserter = 1 in {
- let Defs = [CPSR] in {
+// Atomic pseudo-insts which will be lowered to ldrex/strex loops.
+// (64-bit pseudos use a hand-written selection code).
+ let mayLoad = 1, mayStore = 1 in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_SUB_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_AND_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_OR_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_XOR_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_NAND_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MIN_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MAX_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_SUB_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_AND_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_OR_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_XOR_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_NAND_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MIN_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MAX_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_SUB_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_AND_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_OR_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_XOR_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_NAND_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MIN_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MAX_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>;
-
- def ATOMIC_SWAP_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
- def ATOMIC_SWAP_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_SWAP_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
-
- def ATOMIC_CMP_SWAP_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
- def ATOMIC_CMP_SWAP_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_CMP_SWAP_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
-}
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_ADD_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_SUB_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_AND_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_OR_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_XOR_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_NAND_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_MIN_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_MAX_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_UMIN_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_UMAX_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_SWAP_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_CMP_SWAP_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
+ GPR:$set1, GPR:$set2, i32imm:$ordering),
+ NoItinerary, []>;
+ }
+ let mayLoad = 1 in
+ def ATOMIC_LOAD_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, i32imm:$ordering),
+ NoItinerary, []>;
+ let mayStore = 1 in
+ def ATOMIC_STORE_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
}
let usesCustomInserter = 1 in {
@@ -4402,8 +4554,7 @@ def strex_4 : PatFrag<(ops node:$val, node:$ptr),
let mayLoad = 1 in {
def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
- NoItinerary,
- "ldrexb", "\t$Rt, $addr",
+ NoItinerary, "ldrexb", "\t$Rt, $addr",
[(set GPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>;
def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary, "ldrexh", "\t$Rt, $addr",
@@ -4412,10 +4563,22 @@ def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary, "ldrex", "\t$Rt, $addr",
[(set GPR:$Rt, (ldrex_4 addr_offset_none:$addr))]>;
let hasExtraDefRegAllocReq = 1 in
-def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+def LDREXD : AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
NoItinerary, "ldrexd", "\t$Rt, $addr", []> {
let DecoderMethod = "DecodeDoubleRegLoad";
}
+
+def LDAEXB : AIldaex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldaexb", "\t$Rt, $addr", []>;
+def LDAEXH : AIldaex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldaexh", "\t$Rt, $addr", []>;
+def LDAEX : AIldaex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldaex", "\t$Rt, $addr", []>;
+let hasExtraDefRegAllocReq = 1 in
+def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+ NoItinerary, "ldaexd", "\t$Rt, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegLoad";
+}
}
let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
@@ -4434,8 +4597,22 @@ def STREXD : AIstrex<0b01, (outs GPR:$Rd),
NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> {
let DecoderMethod = "DecodeDoubleRegStore";
}
+def STLEXB: AIstlex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlexb", "\t$Rd, $Rt, $addr",
+ []>;
+def STLEXH: AIstlex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlexh", "\t$Rd, $Rt, $addr",
+ []>;
+def STLEX : AIstlex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlex", "\t$Rd, $Rt, $addr",
+ []>;
+let hasExtraSrcRegAllocReq = 1 in
+def STLEXD : AIstlex<0b01, (outs GPR:$Rd),
+ (ins GPRPairOp:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlexd", "\t$Rd, $Rt, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegStore";
+}
}
-
def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
[(int_arm_clrex)]>,
@@ -4452,12 +4629,43 @@ def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr),
def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
(STREXH GPR:$Rt, addr_offset_none:$addr)>;
+class acquiring_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return Ordering == Acquire || Ordering == SequentiallyConsistent;
+}]>;
+
+def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
+def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
+def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
+
+class releasing_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return Ordering == Release || Ordering == SequentiallyConsistent;
+}]>;
+
+def atomic_store_release_8 : releasing_store<atomic_store_8>;
+def atomic_store_release_16 : releasing_store<atomic_store_16>;
+def atomic_store_release_32 : releasing_store<atomic_store_32>;
+
+let AddedComplexity = 8 in {
+ def : ARMPat<(atomic_load_acquire_8 addr_offset_none:$addr), (LDAB addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_load_acquire_16 addr_offset_none:$addr), (LDAH addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_load_acquire_32 addr_offset_none:$addr), (LDA addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (STLB GPR:$val, addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (STLH GPR:$val, addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>;
+}
+
// SWP/SWPB are deprecated in V6/V7.
let mayLoad = 1, mayStore = 1 in {
def SWP : AIswp<0, (outs GPRnopc:$Rt),
- (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>;
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>,
+ Requires<[PreV8]>;
def SWPB: AIswp<1, (outs GPRnopc:$Rt),
- (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>;
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>,
+ Requires<[PreV8]>;
}
//===----------------------------------------------------------------------===//
@@ -4468,7 +4676,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
- imm:$CRm, imm:$opc2)]> {
+ imm:$CRm, imm:$opc2)]>,
+ Requires<[PreV8]> {
bits<4> opc1;
bits<4> CRn;
bits<4> CRd;
@@ -4489,7 +4698,8 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
- imm:$CRm, imm:$opc2)]> {
+ imm:$CRm, imm:$opc2)]>,
+ Requires<[PreV8]> {
let Inst{31-28} = 0b1111;
bits<4> opc1;
bits<4> CRn;
@@ -4667,10 +4877,10 @@ defm LDC : LdStCop <1, 0, "ldc">;
defm LDCL : LdStCop <1, 1, "ldcl">;
defm STC : LdStCop <0, 0, "stc">;
defm STCL : LdStCop <0, 1, "stcl">;
-defm LDC2 : LdSt2Cop<1, 0, "ldc2">;
-defm LDC2L : LdSt2Cop<1, 1, "ldc2l">;
-defm STC2 : LdSt2Cop<0, 0, "stc2">;
-defm STC2L : LdSt2Cop<0, 1, "stc2l">;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2">, Requires<[PreV8]>;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l">, Requires<[PreV8]>;
+defm STC2 : LdSt2Cop<0, 0, "stc2">, Requires<[PreV8]>;
+defm STC2L : LdSt2Cop<0, 1, "stc2l">, Requires<[PreV8]>;
//===----------------------------------------------------------------------===//
// Move between coprocessor and ARM core register.
@@ -4703,7 +4913,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>;
+ imm:$CRm, imm:$opc2)]>,
+ ComplexDeprecationPredicate<"MCR">;
def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
@@ -4746,14 +4957,16 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>;
+ imm:$CRm, imm:$opc2)]>,
+ Requires<[PreV8]>;
def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0)>;
def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(outs GPRwithAPSR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
- imm0_7:$opc2), []>;
+ imm0_7:$opc2), []>,
+ Requires<[PreV8]>;
def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
(MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0)>;
@@ -4790,7 +5003,8 @@ def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
: ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary,
- !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>,
+ Requires<[PreV8]> {
let Inst{31-28} = 0b1111;
let Inst{23-21} = 0b010;
let Inst{20} = direction;
@@ -5341,4 +5555,5 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
// 'it' blocks in ARM mode just validate the predicates. The IT itself
// is discarded.
-def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
+def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
+ ComplexDeprecationPredicate<"IT">;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index af4f4d1..43bd4c2 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -2355,17 +2355,36 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
- : N2Vnp<op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
itin, OpcodeStr, Dt, ResTy, OpTy,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
- : N2Vnp<op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
+ : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
itin, OpcodeStr, Dt, ResTy, OpTy,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
+class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
+ bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
+ itin, OpcodeStr, Dt, ResTy, OpTy,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Same as N2VQIntXnp but with Vd as a src register.
+class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
+ bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
+ (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
+ itin, OpcodeStr, Dt, ResTy, OpTy,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
+ let Constraints = "$src = $Vd";
+}
+
// Narrow 2-register operations.
class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -2534,7 +2553,7 @@ class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
- (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
ResTy, OpTy, IntOp, Commutable,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
@@ -2592,6 +2611,19 @@ class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
ResTy, OpTy, IntOp, Commutable,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+// Same as N3VQIntnp but with Vd as a src register.
+class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, Format f, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr,
+ Dt, ResTy, OpTy, IntOp, Commutable,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
+ (OpTy QPR:$Vm))))]> {
+ let Constraints = "$src = $Vd";
+}
+
class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
@@ -2842,6 +2874,7 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
[(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}
+
class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
@@ -2897,6 +2930,17 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
[(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}
+
+// Same as above, but not predicated.
+class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
+ ResTy, OpTy, IntOp, Commutable,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
@@ -3973,12 +4017,18 @@ defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vqadd", "u", int_arm_neon_vqaddu, 1>;
// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
-defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
- int_arm_neon_vaddhn, 1>;
+defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
int_arm_neon_vraddhn, 1>;
+def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+ (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+ (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+ (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
+
// Vector Multiply Operations.
// VMUL : Vector Multiply (integer, polynomial and floating-point)
@@ -4016,6 +4066,17 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+ (VMULslfd DPR:$Rn,
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
+ (i32 0))>;
+def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+ (VMULslfq QPR:$Rn,
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
+ (i32 0))>;
+
+
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
@@ -4061,12 +4122,18 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "s", NEONvmulls, 1>;
-defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "u", NEONvmullu, 1>;
-def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
- v8i16, v8i8, int_arm_neon_vmullp, 1>;
+let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
+ DecoderNamespace = "NEONData" in {
+ defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+ defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
+ def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+ v8i16, v8i8, int_arm_neon_vmullp, 1>;
+ def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
+ "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
+ Requires<[HasV8, HasCrypto]>;
+}
defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
@@ -4133,8 +4200,27 @@ defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
- "vqdmlal", "s", int_arm_neon_vqdmlal>;
-defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
+ "vqdmlal", "s", null_frag>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
+
+def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
+def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
// VMLS : Vector Multiply Subtract (integer and floating-point)
defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -4190,25 +4276,44 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
- "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
-defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+ "vqdmlsl", "s", null_frag>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>;
+
+def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
+def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
// Fused Vector Multiply Subtract (floating-point)
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
@@ -4256,12 +4361,18 @@ defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vqsub", "u", int_arm_neon_vqsubu, 0>;
// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
-defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
- int_arm_neon_vsubhn, 0>;
+defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
int_arm_neon_vrsubhn, 0>;
+def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+ (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+ (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+ (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
+
// Vector Comparisons.
// VCEQ : Vector Compare Equal
@@ -5047,10 +5158,10 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
// Vector Move Operations.
// VMOV : Vector Move (Register)
-def : InstAlias<"vmov${p} $Vd, $Vm",
- (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
-def : InstAlias<"vmov${p} $Vd, $Vm",
- (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p} $Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p} $Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
// VMOV : Vector Move (Immediate)
@@ -5461,6 +5572,25 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
}
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
+ (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
+ (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
+ (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
+ (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
+ (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
+ (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
+ (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
+ (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+
+
// VCVT : Vector Convert Between Half-Precision and Single-Precision.
def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
IIC_VUNAQ, "vcvt", "f16.f32",
@@ -5725,9 +5855,9 @@ multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
}
}
- def : InstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
+ def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
(!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
- def : InstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
+ def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
(!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>;
}
@@ -5738,6 +5868,49 @@ defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
+// Cryptography instructions
+let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
+ DecoderNamespace = "v8Crypto" in {
+ class AES<string op, bit op7, bit op6, SDPatternOperator Int>
+ : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
+ !strconcat("aes", op), "8", v16i8, v16i8, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
+ : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
+ !strconcat("aes", op), "8", v16i8, v16i8, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+ SDPatternOperator Int>
+ : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+ SDPatternOperator Int>
+ : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
+ : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
+ Requires<[HasV8, HasCrypto]>;
+}
+
+def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
+def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
+def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
+def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
+
+def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>;
+def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
+def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
+def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>;
+def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>;
+def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>;
+def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
+def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
+def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
+def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
+
//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index e7218c6..af5ef53 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -269,25 +269,26 @@ class T1SystemEncoding<bits<8> opc>
let Inst{7-0} = opc;
}
-def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>,
- T1SystemEncoding<0x00>, // A8.6.110
- Requires<[IsThumb2]>;
-
-def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>,
- T1SystemEncoding<0x10>, // A8.6.410
- Requires<[IsThumb2]>;
-
-def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>,
- T1SystemEncoding<0x20>, // A8.6.408
- Requires<[IsThumb2]>;
+def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", []>,
+ T1SystemEncoding<0x00>,
+ Requires<[IsThumb, HasV6M]> {
+ bits<4> imm;
+ let Inst{7-4} = imm;
+}
-def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>,
- T1SystemEncoding<0x30>, // A8.6.409
- Requires<[IsThumb2]>;
+class tHintAlias<string Asm, dag Result> : tInstAlias<Asm, Result> {
+ let Predicates = [IsThumb, HasV6M];
+}
-def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>,
- T1SystemEncoding<0x40>, // A8.6.157
- Requires<[IsThumb2]>;
+def : tHintAlias<"nop$p", (tHINT 0, pred:$p)>; // A8.6.110
+def : tHintAlias<"yield$p", (tHINT 1, pred:$p)>; // A8.6.410
+def : tHintAlias<"wfe$p", (tHINT 2, pred:$p)>; // A8.6.408
+def : tHintAlias<"wfi$p", (tHINT 3, pred:$p)>; // A8.6.409
+def : tHintAlias<"sev$p", (tHINT 4, pred:$p)>; // A8.6.157
+def : tInstAlias<"sevl$p", (tHINT 5, pred:$p)> {
+ let Predicates = [IsThumb2, HasV8];
+}
+def : T2Pat<(int_arm_sevl), (tHINT 5)>;
// The imm operand $val can be used by a debugger to store more information
// about the breakpoint.
@@ -300,8 +301,15 @@ def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val",
let Inst{7-0} = val;
}
+def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val",
+ []>, T1Encoding<0b101110>, Requires<[IsThumb, HasV8]> {
+ let Inst{9-6} = 0b1010;
+ bits<6> val;
+ let Inst{5-0} = val;
+}
+
def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end",
- []>, T1Encoding<0b101101> {
+ []>, T1Encoding<0b101101>, Deprecated<HasV8Ops> {
bits<1> end;
// A8.6.156
let Inst{9-5} = 0b10010;
@@ -491,7 +499,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
T1Encoding<{1,1,1,0,0,?}>, Sched<[WriteBr]> {
bits<11> target;
let Inst{10-0} = target;
- }
+ let AsmMatchConverter = "cvtThumbBranches";
+ }
// Far jump
// Just a pseudo for a tBL instruction. Needed to let regalloc know about
@@ -521,6 +530,7 @@ let isBranch = 1, isTerminator = 1 in
bits<8> target;
let Inst{11-8} = p;
let Inst{7-0} = target;
+ let AsmMatchConverter = "cvtThumbBranches";
}
@@ -660,9 +670,6 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
let Inst{7-0} = addr;
}
-def : tInstAlias<"ldr${p}.n $Rt, $addr",
- (tLDRpci tGPR:$Rt, t_addrmode_pc:$addr, pred:$p), 0>;
-
// A8.6.194 & A8.6.192
defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
t_addrmode_is4, AddrModeT1_4,
@@ -1205,9 +1212,9 @@ def tUXTH : // A8.6.264
// Expanded after instruction selection into a branch sequence.
let usesCustomInserter = 1 in // Expanded after instruction selection.
def tMOVCCr_pseudo :
- PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
- NoItinerary,
- [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+ PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, cmovpred:$p),
+ NoItinerary,
+ [(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, cmovpred:$p))]>;
// tLEApcrel - Load a pc-relative address into a register without offending the
// assembler.
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 84086a5..48acffd 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -465,6 +465,18 @@ class T2ThreeReg<dag oops, dag iops, InstrItinClass itin,
let Inst{3-0} = Rm;
}
+class T2ThreeRegNoP<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : T2XI<oops, iops, itin, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
class T2sThreeReg<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: T2sI<oops, iops, itin, opc, asm, pattern> {
@@ -1396,6 +1408,32 @@ def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
+class T2Ildacq<bits<4> bits23_20, bits<2> bit54, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary,
+ opc, asm, "", pattern>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt;
+ bits<4> addr;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-24} = 0b000;
+ let Inst{23-20} = bits23_20;
+ let Inst{11-6} = 0b111110;
+ let Inst{5-4} = bit54;
+ let Inst{3-0} = 0b1111;
+
+ // Encode instruction operands
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+
+def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt),
+ (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>;
+def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt),
+ (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>;
+def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt),
+ (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>;
+
// Store
defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR,
BinOpFrag<(store node:$LHS, node:$RHS)>>;
@@ -1539,6 +1577,31 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm",
"$addr.base = $wb", []>;
+class T2Istrrel<bits<2> bit54, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, opc,
+ asm, "", pattern>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt;
+ bits<4> addr;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{11-6} = 0b111110;
+ let Inst{5-4} = bit54;
+ let Inst{3-0} = 0b1111;
+
+ // Encode instruction operands
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+
+def t2STL : T2Istrrel<0b10, (outs), (ins rGPR:$Rt, addr_offset_none:$addr),
+ "stl", "\t$Rt, $addr", []>;
+def t2STLB : T2Istrrel<0b00, (outs), (ins rGPR:$Rt, addr_offset_none:$addr),
+ "stlb", "\t$Rt, $addr", []>;
+def t2STLH : T2Istrrel<0b01, (outs), (ins rGPR:$Rt, addr_offset_none:$addr),
+ "stlh", "\t$Rt, $addr", []>;
+
// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
// data/instruction access.
// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
@@ -1855,6 +1918,9 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
let DecoderMethod = "DecodeT2MOVTWInstruction";
}
+def : t2InstAlias<"mov${p} $Rd, $imm",
+ (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p)>;
+
def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
(ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
@@ -2950,6 +3016,34 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
Requires<[HasT2ExtractPack, IsThumb2]>;
//===----------------------------------------------------------------------===//
+// CRC32 Instructions
+//
+// Polynomials:
+// + CRC32{B,H,W} 0x04C11DB7
+// + CRC32C{B,H,W} 0x1EDC6F41
+//
+
+class T2I_crc32<bit C, bits<2> sz, string suffix, SDPatternOperator builtin>
+ : T2ThreeRegNoP<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), NoItinerary,
+ !strconcat("crc32", suffix, "\t$Rd, $Rn, $Rm"),
+ [(set rGPR:$Rd, (builtin rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[IsThumb2, HasV8, HasCRC]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b010110;
+ let Inst{20} = C;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-6} = 0b10;
+ let Inst{5-4} = sz;
+}
+
+def t2CRC32B : T2I_crc32<0, 0b00, "b", int_arm_crc32b>;
+def t2CRC32CB : T2I_crc32<1, 0b00, "cb", int_arm_crc32cb>;
+def t2CRC32H : T2I_crc32<0, 0b01, "h", int_arm_crc32h>;
+def t2CRC32CH : T2I_crc32<1, 0b01, "ch", int_arm_crc32ch>;
+def t2CRC32W : T2I_crc32<0, 0b10, "w", int_arm_crc32w>;
+def t2CRC32CW : T2I_crc32<1, 0b10, "cw", int_arm_crc32cw>;
+
+//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
@@ -3029,93 +3123,67 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
// Conditional moves
-// FIXME: should be able to write a pattern for ARMcmov, but can't use
-// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
let isCommutable = 1, isSelect = 1 in
def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, pred:$p),
+ (ins rGPR:$false, rGPR:$Rm, cmovpred:$p),
4, IIC_iCMOVr,
- [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">,
- Sched<[WriteALU]>;
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
-def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
+def t2MOVCCi
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
-[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false,t2_so_imm:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
-// FIXME: Pseudo-ize these. For now, just mark codegen only.
let isCodeGenOnly = 1 in {
let isMoveImm = 1 in
-def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm),
- IIC_iCMOVi,
- "movw", "\t$Rd, $imm", []>,
- RegConstraint<"$false = $Rd">, Sched<[WriteALU]> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 1;
- let Inst{24-21} = 0b0010;
- let Inst{20} = 0; // The S bit.
- let Inst{15} = 0;
-
- bits<4> Rd;
- bits<16> imm;
-
- let Inst{11-8} = Rd;
- let Inst{19-16} = imm{15-12};
- let Inst{26} = imm{11};
- let Inst{14-12} = imm{10-8};
- let Inst{7-0} = imm{7-0};
-}
+def t2MOVCCi16
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, imm0_65535_expr:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false, imm0_65535:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
-def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
- (ins rGPR:$false, i32imm:$src, pred:$p),
- IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">;
+def t2MVNCCi
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set rGPR:$Rd,
+ (ARMcmov rGPR:$false, t2_so_imm_not:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+class MOVCCShPseudo<SDPatternOperator opnode, Operand ty>
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVsi,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false,
+ (opnode rGPR:$Rm, (i32 ty:$imm)),
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+def t2MOVCClsl : MOVCCShPseudo<shl, imm0_31>;
+def t2MOVCClsr : MOVCCShPseudo<srl, imm_sr>;
+def t2MOVCCasr : MOVCCShPseudo<sra, imm_sr>;
+def t2MOVCCror : MOVCCShPseudo<rotr, imm0_31>;
let isMoveImm = 1 in
-def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
- IIC_iCMOVi, "mvn", "\t$Rd, $imm",
-[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
- imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">, Sched<[WriteALU]> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 0;
- let Inst{24-21} = 0b0011;
- let Inst{20} = 0; // The S bit.
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15} = 0;
-}
-
-class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern>, Sched<[WriteALU]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = 0b0010;
- let Inst{20} = 0; // The S bit.
- let Inst{19-16} = 0b1111; // Rn
- let Inst{5-4} = opcod; // Shift type.
-}
-def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
- IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>,
- RegConstraint<"$false = $Rd">;
-def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
- IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>,
- RegConstraint<"$false = $Rd">;
-def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
- IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>,
- RegConstraint<"$false = $Rd">;
-def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
- IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
- RegConstraint<"$false = $Rd">;
+def t2MOVCCi32imm
+ : t2PseudoInst<(outs rGPR:$dst),
+ (ins rGPR:$false, i32imm:$src, cmovpred:$p),
+ 8, IIC_iCMOVix2,
+ [(set rGPR:$dst, (ARMcmov rGPR:$false, imm:$src,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $dst">;
} // isCodeGenOnly = 1
} // neverHasSideEffects
@@ -3127,7 +3195,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
- "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
Requires<[HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f5;
@@ -3136,7 +3204,8 @@ def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
}
def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
- "dsb", "\t$opt", []>, Requires<[HasDB]> {
+ "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
+ Requires<[HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f4;
let Inst{3-0} = opt;
@@ -3149,15 +3218,14 @@ def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary,
let Inst{3-0} = opt;
}
-class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin, string opc, string asm, string cstr,
list<dag> pattern, bits<4> rt2 = 0b1111>
: Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001101;
let Inst{11-8} = rt2;
- let Inst{7-6} = 0b01;
- let Inst{5-4} = opcod;
+ let Inst{7-4} = opcod;
let Inst{3-0} = 0b1111;
bits<4> addr;
@@ -3165,15 +3233,14 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
let Inst{19-16} = addr;
let Inst{15-12} = Rt;
}
-class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+class T2I_strex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin, string opc, string asm, string cstr,
list<dag> pattern, bits<4> rt2 = 0b1111>
: Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001100;
let Inst{11-8} = rt2;
- let Inst{7-6} = 0b01;
- let Inst{5-4} = opcod;
+ let Inst{7-4} = opcod;
bits<4> Rd;
bits<4> addr;
@@ -3184,11 +3251,11 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
}
let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+def t2LDREXB : T2I_ldrex<0b0100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexb", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>;
-def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexh", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>;
@@ -3206,7 +3273,7 @@ def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
let Inst{7-0} = addr{7-0};
}
let hasExtraDefRegAllocReq = 1 in
-def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
+def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2),
(ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexd", "\t$Rt, $Rt2, $addr", "",
@@ -3214,16 +3281,48 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
+def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaexb", "\t$Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]>;
+def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaexh", "\t$Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]>;
+def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaex", "\t$Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = 0b11101111;
+}
+let hasExtraDefRegAllocReq = 1 in
+def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaexd", "\t$Rt, $Rt2, $addr", "",
+ [], {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+
+ let Inst{7} = 1;
+}
}
let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
-def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd),
+def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd),
(ins rGPR:$Rt, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexb", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd, (strex_1 rGPR:$Rt,
addr_offset_none:$addr))]>;
-def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd),
+def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd),
(ins rGPR:$Rt, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexh", "\t$Rd, $Rt, $addr", "",
@@ -3247,7 +3346,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
let Inst{7-0} = addr{7-0};
}
let hasExtraSrcRegAllocReq = 1 in
-def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
+def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
@@ -3255,6 +3354,42 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
+def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlexb", "\t$Rd, $Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]>;
+
+def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlexh", "\t$Rd, $Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]>;
+
+def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
+ addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlex", "\t$Rd, $Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rd;
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-4} = 0b11111110;
+ let Inst{3-0} = Rd;
+}
+let hasExtraSrcRegAllocReq = 1 in
+def t2STLEXD : T2I_strex<0b1111, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
+ {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
}
def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>,
@@ -3336,13 +3471,14 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
let Inst{12} = 1;
bits<24> target;
- let Inst{26} = target{19};
- let Inst{11} = target{18};
- let Inst{13} = target{17};
+ let Inst{26} = target{23};
+ let Inst{13} = target{22};
+ let Inst{11} = target{21};
let Inst{25-16} = target{20-11};
let Inst{10-0} = target{10-0};
let DecoderMethod = "DecodeT2BInstruction";
-}
+ let AsmMatchConverter = "cvtThumbBranches";
+}
let isNotDuplicable = 1, isIndirectBranch = 1 in {
def t2BR_JT : t2PseudoInst<(outs),
@@ -3410,6 +3546,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
let Inst{10-0} = target{11-1};
let DecoderMethod = "DecodeThumb2BCCInstruction";
+ let AsmMatchConverter = "cvtThumbBranches";
}
// Tail calls. The IOS version of thumb tail calls uses a t2 branch, so
@@ -3428,7 +3565,8 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
AddrModeNone, 2, IIC_iALUx,
- "it$mask\t$cc", "", []> {
+ "it$mask\t$cc", "", []>,
+ ComplexDeprecationPredicate<"IT"> {
// 16-bit instruction.
let Inst{31-16} = 0x0000;
let Inst{15-8} = 0b10111111;
@@ -3502,27 +3640,34 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
let M = 1 in
def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
- "$imod.w\t$iflags, $mode">;
+ "$imod\t$iflags, $mode">;
let mode = 0, M = 0 in
def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
"$imod.w\t$iflags">;
let imod = 0, iflags = 0, M = 1 in
def t2CPS1p : t2CPS<(ins imm0_31:$mode), "\t$mode">;
+def : t2InstAlias<"cps$imod.w $iflags, $mode",
+ (t2CPS3p imod_op:$imod, iflags_op:$iflags, i32imm:$mode), 0>;
+def : t2InstAlias<"cps.w $mode", (t2CPS1p imm0_31:$mode), 0>;
+
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-def t2HINT : T2I<(outs), (ins imm0_4:$imm), NoItinerary, "hint", "\t$imm",[]> {
- bits<3> imm;
+def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",[]> {
+ bits<8> imm;
let Inst{31-3} = 0b11110011101011111000000000000;
- let Inst{2-0} = imm;
+ let Inst{7-0} = imm;
}
-def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_4:$imm, pred:$p)>;
+def : t2InstAlias<"hint$p $imm", (t2HINT imm0_239:$imm, pred:$p)>;
def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>;
def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>;
+def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p)> {
+ let Predicates = [IsThumb2, HasV8];
+}
def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
bits<4> opt;
@@ -3545,6 +3690,20 @@ def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
let Inst{19-16} = opt;
}
+class T2DCPS<bits<2> opt, string opc>
+ : T2I<(outs), (ins), NoItinerary, opc, "", []>, Requires<[IsThumb2, HasV8]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26-20} = 0b1111000;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-2} = 0b0000000000;
+ let Inst{1-0} = opt;
+}
+
+def t2DCPS1 : T2DCPS<0b01, "dcps1">;
+def t2DCPS2 : T2DCPS<0b10, "dcps2">;
+def t2DCPS3 : T2DCPS<0b11, "dcps3">;
+
class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern> {
@@ -3600,9 +3759,12 @@ def t2RFEIA : T2RFE<0b111010011001,
[/* For disassembly only; pattern left blank */]>;
// B9.3.19 SUBS PC, LR, #imm (Thumb2) system instruction.
-let Defs = [PC], Uses = [LR] in
+// Exception return instruction is "subs pc, lr, #imm".
+let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary,
- "subs", "\tpc, lr, $imm", []>, Requires<[IsThumb2]> {
+ "subs", "\tpc, lr, $imm",
+ [(ARMintretflag imm0_255:$imm)]>,
+ Requires<[IsThumb2]> {
let Inst{31-8} = 0b111100111101111010001111;
bits<8> imm;
@@ -3752,10 +3914,10 @@ defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">;
defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">;
defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">;
defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">;
-defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">;
-defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">;
-defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">;
-defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">;
+defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8]>;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8]>;
+defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8]>;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8]>;
//===----------------------------------------------------------------------===//
@@ -3767,7 +3929,7 @@ defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">;
//
// A/R class can only move from CPSR or SPSR.
def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
- []>, Requires<[IsThumb2,IsARClass]> {
+ []>, Requires<[IsThumb2,IsNotMClass]> {
bits<4> Rd;
let Inst{31-12} = 0b11110011111011111000;
let Inst{11-8} = Rd;
@@ -3777,7 +3939,7 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
- []>, Requires<[IsThumb2,IsARClass]> {
+ []>, Requires<[IsThumb2,IsNotMClass]> {
bits<4> Rd;
let Inst{31-12} = 0b11110011111111111000;
let Inst{11-8} = Rd;
@@ -3810,7 +3972,7 @@ def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary,
// the mask with the fields to be accessed in the special register.
def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
NoItinerary, "msr", "\t$mask, $Rn", []>,
- Requires<[IsThumb2,IsARClass]> {
+ Requires<[IsThumb2,IsNotMClass]> {
bits<5> mask;
bits<4> Rn;
let Inst{31-21} = 0b11110011100;
@@ -3892,7 +4054,8 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>;
+ imm:$CRm, imm:$opc2)]>,
+ ComplexDeprecationPredicate<"MCR">;
def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
@@ -3900,7 +4063,9 @@ def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
(outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>;
+ imm:$CRm, imm:$opc2)]> {
+ let Predicates = [IsThumb2, PreV8];
+}
def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
@@ -3915,7 +4080,9 @@ def : t2InstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
(outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
- c_imm:$CRm, imm0_7:$opc2), []>;
+ c_imm:$CRm, imm0_7:$opc2), []> {
+ let Predicates = [IsThumb2, PreV8];
+}
def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
@@ -3933,17 +4100,22 @@ def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0,
imm:$CRm)]>;
def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0,
[(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
- GPR:$Rt2, imm:$CRm)]>;
+ GPR:$Rt2, imm:$CRm)]> {
+ let Predicates = [IsThumb2, PreV8];
+}
+
/* from coprocessor to ARM core register */
def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>;
-def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>;
+def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1> {
+ let Predicates = [IsThumb2, PreV8];
+}
//===----------------------------------------------------------------------===//
// Other Coprocessor Instructions.
//
-def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
"cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
@@ -3964,6 +4136,8 @@ def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{15-12} = CRd;
let Inst{19-16} = CRn;
let Inst{23-20} = opc1;
+
+ let Predicates = [IsThumb2, PreV8];
}
def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
@@ -3987,6 +4161,8 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{15-12} = CRd;
let Inst{19-16} = CRn;
let Inst{23-20} = opc1;
+
+ let Predicates = [IsThumb2, PreV8];
}
@@ -4060,6 +4236,15 @@ def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
(t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
+let AddedComplexity = 8 in {
+ def : T2Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>;
+}
+
//===----------------------------------------------------------------------===//
// Assembler aliases
@@ -4081,7 +4266,8 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm",
// Aliases for ADD without the ".w" optional width specifier.
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
- (t2ADDri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+ (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
(t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
@@ -4156,9 +4342,9 @@ def : t2InstAlias<"tst${p} $Rn, $Rm",
(t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
// Memory barriers
-def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p)>, Requires<[IsThumb2, HasDB]>;
-def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p)>, Requires<[IsThumb2, HasDB]>;
-def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p)>, Requires<[IsThumb2, HasDB]>;
+def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p)>, Requires<[HasDB]>;
+def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p)>, Requires<[HasDB]>;
+def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p)>, Requires<[HasDB]>;
// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
// width specifier.
@@ -4185,7 +4371,7 @@ def : t2InstAlias<"ldrsh${p} $Rt, $addr",
(t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
def : t2InstAlias<"ldr${p} $Rt, $addr",
- (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+ (t2LDRpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
def : t2InstAlias<"ldrb${p} $Rt, $addr",
(t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
def : t2InstAlias<"ldrh${p} $Rt, $addr",
@@ -4347,16 +4533,16 @@ def : t2InstAlias<"mvn${p} $Rd, $imm",
(t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
// Same for AND <--> BIC
def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
- (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ (t2ANDri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
- (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ (t2ANDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
- (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ (t2BICri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : t2InstAlias<"and${s}${p} $Rdn, $imm",
- (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ (t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
// Likewise, "add Rd, t2_so_imm_neg" -> sub
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
@@ -4398,7 +4584,7 @@ def : t2InstAlias<"adr${p} $Rd, $addr",
// LDR(literal) w/ alternate [pc, #imm] syntax.
def t2LDRpcrel : t2AsmPseudo<"ldr${p} $Rt, $addr",
- (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+ (ins GPR:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
def t2LDRBpcrel : t2AsmPseudo<"ldrb${p} $Rt, $addr",
(ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
def t2LDRHpcrel : t2AsmPseudo<"ldrh${p} $Rt, $addr",
@@ -4409,7 +4595,7 @@ def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr",
(ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
// Version w/ the .w suffix.
def : t2InstAlias<"ldr${p}.w $Rt, $addr",
- (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p), 0>;
+ (t2LDRpcrel GPR:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p), 0>;
def : t2InstAlias<"ldrb${p}.w $Rt, $addr",
(t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
def : t2InstAlias<"ldrh${p}.w $Rt, $addr",
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index f9cfa15..a8cdc5c 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -333,45 +333,52 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
let D = VFPNeonA8Domain;
}
-multiclass vsel_inst<string op, bits<2> opc> {
- let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
+multiclass vsel_inst<string op, bits<2> opc, int CC> {
+ let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
+ Uses = [CPSR], AddedComplexity = 4 in {
def S : ASbInp<0b11100, opc, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
- []>, Requires<[HasV8FP]>;
+ [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>,
+ Requires<[HasFPARMv8]>;
def D : ADbInp<0b11100, opc, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"),
- []>, Requires<[HasV8FP]>;
+ [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
}
}
-defm VSELGT : vsel_inst<"gt", 0b11>;
-defm VSELGE : vsel_inst<"ge", 0b10>;
-defm VSELEQ : vsel_inst<"eq", 0b00>;
-defm VSELVS : vsel_inst<"vs", 0b01>;
+// The CC constants here match ARMCC::CondCodes.
+defm VSELGT : vsel_inst<"gt", 0b11, 12>;
+defm VSELGE : vsel_inst<"ge", 0b10, 10>;
+defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
+defm VSELVS : vsel_inst<"vs", 0b01, 6>;
-multiclass vmaxmin_inst<string op, bit opc> {
+multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
def S : ASbInp<0b11101, 0b00, opc,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"),
- []>, Requires<[HasV8FP]>;
+ [(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>,
+ Requires<[HasFPARMv8]>;
def D : ADbInp<0b11101, 0b00, opc,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"),
- []>, Requires<[HasV8FP]>;
+ [(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
}
}
-defm VMAXNM : vmaxmin_inst<"vmaxnm", 0>;
-defm VMINNM : vmaxmin_inst<"vminnm", 1>;
+defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, ARMvmaxnm>;
+defm VMINNM : vmaxmin_inst<"vminnm", 1, ARMvminnm>;
// Match reassociated forms only if not sign dependent rounding.
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
- (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+ (VNMULD DPR:$a, DPR:$b)>,
+ Requires<[NoHonorSignDependentRounding,HasDPVFP]>;
def : Pat<(fmul (fneg SPR:$a), SPR:$b),
(VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
@@ -502,6 +509,8 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
let Inst{11-8} = 0b1011;
let Inst{7-6} = 0b11;
let Inst{4} = 0;
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
// Between half, single and double-precision. For disassembly only.
@@ -532,7 +541,7 @@ def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
bits<5> Sm;
@@ -544,7 +553,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm",
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
bits<5> Sd;
bits<5> Dm;
@@ -559,7 +568,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm",
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
bits<5> Sm;
@@ -571,7 +580,7 @@ def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm",
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
bits<5> Sd;
bits<5> Dm;
@@ -588,21 +597,21 @@ multiclass vcvt_inst<string opc, bits<2> rm> {
def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
let Inst{17-16} = rm;
@@ -616,7 +625,7 @@ multiclass vcvt_inst<string opc, bits<2> rm> {
def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
let Inst{17-16} = rm;
@@ -652,17 +661,24 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2> {
def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm",
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8]> {
let Inst{7} = op2;
let Inst{16} = op;
}
def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm",
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
let Inst{7} = op2;
let Inst{16} = op;
}
+
+ def : InstAlias<!strconcat("vrint", opc, "$p.f32.f32\t$Sd, $Sm"),
+ (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p)>,
+ Requires<[HasFPARMv8]>;
+ def : InstAlias<!strconcat("vrint", opc, "$p.f64.f64\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p)>,
+ Requires<[HasFPARMv8,HasDPVFP]>;
}
defm VRINTZ : vrint_inst_zrx<"z", 0, 1>;
@@ -674,21 +690,23 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm> {
def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"),
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"),
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
let Inst{17-16} = rm;
}
}
def : InstAlias<!strconcat("vrint", opc, ".f32.f32\t$Sd, $Sm"),
- (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm)>;
+ (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm)>,
+ Requires<[HasFPARMv8]>;
def : InstAlias<!strconcat("vrint", opc, ".f64.f64\t$Dd, $Dm"),
- (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
+ (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>,
+ Requires<[HasFPARMv8,HasDPVFP]>;
}
defm VRINTA : vrint_inst_anpm<"a", 0b00>;
@@ -885,6 +903,8 @@ class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{5} = Sm{0};
let Inst{15-12} = Dd{3-0};
let Inst{22} = Dd{4};
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -956,6 +976,8 @@ class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{5} = Dm{4};
let Inst{15-12} = Sd{4-1};
let Inst{22} = Sd{0};
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -1077,6 +1099,8 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{4};
let Inst{15-12} = dst{3-0};
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
@@ -1189,7 +1213,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1205,7 +1229,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
@@ -1216,7 +1240,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1232,7 +1256,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1243,7 +1267,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1259,7 +1283,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1270,7 +1294,7 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1285,7 +1309,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1299,7 +1323,7 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1314,7 +1338,7 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
@@ -1323,7 +1347,7 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
// (fma x, y, z) -> (vfms z, x, y)
def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
(VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1334,7 +1358,7 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1349,7 +1373,7 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
@@ -1358,14 +1382,14 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
// (fma (fneg x), y, z) -> (vfms z, x, y)
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
// (fma x, (fneg y), z) -> (vfms z, x, y)
def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1376,7 +1400,7 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1391,7 +1415,7 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
@@ -1400,14 +1424,14 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
// (fneg (fma x, y, z)) -> (vfnma z, x, y)
def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1418,7 +1442,7 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1432,7 +1456,7 @@ def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
@@ -1442,21 +1466,21 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
// (fma x, y, (fneg z)) -> (vfnms z, x, y))
def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1466,15 +1490,17 @@ def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
//
let neverHasSideEffects = 1 in {
-def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
- 4, IIC_fpUNA64,
- [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
- RegConstraint<"$Dn = $Dd">;
-
-def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
- 4, IIC_fpUNA32,
- [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
- RegConstraint<"$Sn = $Sd">;
+def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
+ IIC_fpUNA64,
+ [(set (f64 DPR:$Dd),
+ (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
+ RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>;
+
+def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
+ IIC_fpUNA32,
+ [(set (f32 SPR:$Sd),
+ (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
+ RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>;
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
@@ -1520,6 +1546,8 @@ let Uses = [FPSCR] in {
"vmrs", "\t$Rt, mvfr0", []>;
def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins),
"vmrs", "\t$Rt, mvfr1", []>;
+ def VMRS_MVFR2 : MovFromVFP<0b0101 /* mvfr2 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr2", []>, Requires<[HasFPARMv8]>;
def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPR:$Rt), (ins),
"vmrs", "\t$Rt, fpinst", []>;
def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPR:$Rt), (ins),
@@ -1573,7 +1601,8 @@ let isReMaterializable = 1 in {
def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
VFPMiscFrm, IIC_fpUNA64,
"vmov", ".f64\t$Dd, $imm",
- [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ [(set DPR:$Dd, vfp_f64imm:$imm)]>,
+ Requires<[HasVFP3,HasDPVFP]> {
bits<5> Dd;
bits<8> imm;
@@ -1655,23 +1684,23 @@ def : VFP2MnemonicAlias<"fmrx", "vmrs">;
def : VFP2MnemonicAlias<"fmxr", "vmsr">;
// Be friendly and accept the old form of zero-compare
-def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
+def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
(VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
-def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm",
- (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm",
+ (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
(VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
-def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm",
- (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2DPInstAlias<"fsubd${p} $Dd, $Dn, $Dm",
+ (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
// No need for the size suffix on VSQRT. It's implied by the register classes.
def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
-def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : VFP2DPInstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
// VLDR/VSTR accept an optional type suffix.
def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 1803a8a..61596d5 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -90,6 +90,10 @@ namespace {
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
+ void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
+ const MemOpQueue &MemOps, unsigned DefReg,
+ unsigned RangeBegin, unsigned RangeEnd);
+
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
@@ -360,6 +364,62 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return true;
}
+/// \brief Find all instructions using a given imp-def within a range.
+///
+/// We are trying to combine a range of instructions, one of which (located at
+/// position RangeBegin) implicitly defines a register. The final LDM/STM will
+/// be placed at RangeEnd, and so any uses of this definition between RangeStart
+/// and RangeEnd must be modified to use an undefined value.
+///
+/// The live range continues until we find a second definition or one of the
+/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
+/// we must consider all uses and decide which are relevant in a second pass.
+void ARMLoadStoreOpt::findUsesOfImpDef(
+ SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
+ unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
+ std::map<unsigned, MachineOperand *> Uses;
+ unsigned LastLivePos = RangeEnd;
+
+ // First we find all uses of this register with Position between RangeBegin
+ // and RangeEnd, any or all of these could be uses of a definition at
+ // RangeBegin. We also record the latest position a definition at RangeBegin
+ // would be considered live.
+ for (unsigned i = 0; i < MemOps.size(); ++i) {
+ MachineInstr &MI = *MemOps[i].MBBI;
+ unsigned MIPosition = MemOps[i].Position;
+ if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
+ continue;
+
+ // If this instruction defines the register, then any later use will be of
+ // that definition rather than ours.
+ if (MI.definesRegister(DefReg))
+ LastLivePos = std::min(LastLivePos, MIPosition);
+
+ MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
+ if (!UseOp)
+ continue;
+
+ // If this instruction kills the register then (assuming liveness is
+ // correct when we start) we don't need to think about anything after here.
+ if (UseOp->isKill())
+ LastLivePos = std::min(LastLivePos, MIPosition);
+
+ Uses[MIPosition] = UseOp;
+ }
+
+ // Now we traverse the list of all uses, and append the ones that actually use
+ // our definition to the requested list.
+ for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
+ E = Uses.end();
+ I != E; ++I) {
+ // List is sorted by position so once we've found one out of range there
+ // will be no more to consider.
+ if (I->first > LastLivePos)
+ break;
+ UsesOfImpDefs.push_back(I->second);
+ }
+}
+
// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
// success.
void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
@@ -392,6 +452,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
SmallVector<std::pair<unsigned, bool>, 8> Regs;
SmallVector<unsigned, 8> ImpDefs;
+ SmallVector<MachineOperand *, 8> UsesOfImpDefs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
unsigned Reg = memOps[i].Reg;
// If we are inserting the merged operation after an operation that
@@ -406,6 +467,12 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
unsigned DefReg = MO->getReg();
if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
ImpDefs.push_back(DefReg);
+
+ // There may be other uses of the definition between this instruction and
+ // the eventual LDM/STM position. These should be marked undef if the
+ // merge takes place.
+ findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
+ insertPos);
}
}
@@ -418,6 +485,16 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
// Merge succeeded, update records.
Merges.push_back(prior(Loc));
+
+ // In gathering loads together, we may have moved the imp-def of a register
+ // past one of its uses. This is OK, since we know better than the rest of
+ // LLVM what's OK with ARM loads and stores; but we still have to adjust the
+ // affected uses.
+ for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
+ E = UsesOfImpDefs.end();
+ I != E; ++I)
+ (*I)->setIsUndef();
+
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
// Remove kill flags from any memops that come before insertPos.
if (Regs[i-memOpsBegin].second) {
@@ -489,7 +566,10 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
if (Reg != ARM::SP &&
NewOffset == Offset + (int)Size &&
((isNotVFP && RegNum > PRegNum) ||
- ((Count < Limit) && RegNum == PRegNum+1))) {
+ ((Count < Limit) && RegNum == PRegNum+1)) &&
+ // On Swift we don't want vldm/vstm to start with a odd register num
+ // because Q register unaligned vldm/vstm need more uops.
+ (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
Offset += Size;
PRegNum = RegNum;
++Count;
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index b641483..e12c9c6 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -82,7 +82,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MO.getMBB()->getSymbol(), OutContext));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal()));
break;
case MachineOperand::MO_ExternalSymbol:
MCOp = GetSymbolRef(MO,
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index d9ec4fd..010edf3 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -84,12 +84,6 @@ class ARMFunctionInfo : public MachineFunctionInfo {
unsigned GPRCS2Size;
unsigned DPRCSSize;
- /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
- /// which belong to these spill areas.
- BitVector GPRCS1Frames;
- BitVector GPRCS2Frames;
- BitVector DPRCSFrames;
-
/// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
/// the aligned portion of the stack frame. This is always a contiguous
/// sequence of D-registers starting from d8.
@@ -128,7 +122,6 @@ public:
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
NumAlignedDPRCS2Regs(0),
JumpTableUId(0), PICLabelUId(0),
VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
@@ -141,7 +134,6 @@ public:
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
JumpTableUId(0), PICLabelUId(0),
VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
@@ -190,59 +182,6 @@ public:
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
- bool isGPRCalleeSavedArea1Frame(int fi) const {
- if (fi < 0 || fi >= (int)GPRCS1Frames.size())
- return false;
- return GPRCS1Frames[fi];
- }
- bool isGPRCalleeSavedArea2Frame(int fi) const {
- if (fi < 0 || fi >= (int)GPRCS2Frames.size())
- return false;
- return GPRCS2Frames[fi];
- }
- bool isDPRCalleeSavedAreaFrame(int fi) const {
- if (fi < 0 || fi >= (int)DPRCSFrames.size())
- return false;
- return DPRCSFrames[fi];
- }
-
- void addGPRCalleeSavedArea1Frame(int fi) {
- if (fi >= 0) {
- int Size = GPRCS1Frames.size();
- if (fi >= Size) {
- Size *= 2;
- if (fi >= Size)
- Size = fi+1;
- GPRCS1Frames.resize(Size);
- }
- GPRCS1Frames[fi] = true;
- }
- }
- void addGPRCalleeSavedArea2Frame(int fi) {
- if (fi >= 0) {
- int Size = GPRCS2Frames.size();
- if (fi >= Size) {
- Size *= 2;
- if (fi >= Size)
- Size = fi+1;
- GPRCS2Frames.resize(Size);
- }
- GPRCS2Frames[fi] = true;
- }
- }
- void addDPRCalleeSavedAreaFrame(int fi) {
- if (fi >= 0) {
- int Size = DPRCSFrames.size();
- if (fi >= Size) {
- Size *= 2;
- if (fi >= Size)
- Size = fi+1;
- DPRCSFrames.resize(Size);
- }
- DPRCSFrames[fi] = true;
- }
- }
-
unsigned createJumpTableUId() {
return JumpTableUId++;
}
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index bb7d358..d045761 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -172,6 +172,7 @@ def ITSTATE : ARMReg<4, "itstate">;
// Special Registers - only available in privileged mode.
def FPSID : ARMReg<0, "fpsid">;
+def MVFR2 : ARMReg<5, "mvfr2">;
def MVFR1 : ARMReg<6, "mvfr1">;
def MVFR0 : ARMReg<7, "mvfr0">;
def FPEXC : ARMReg<8, "fpexc">;
@@ -251,7 +252,7 @@ def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
// to the saved value before the tail call, which would clobber a call address.
// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
// this class and the preceding one(!) This is what we want.
-def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
+def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R12)> {
let AltOrders = [(and tcGPR, tGPR)];
let AltOrderSelect = [{
return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 74ee50b..603e775 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1879,6 +1879,10 @@ def CortexA9Itineraries : ProcessorItineraries<
// The following definitions describe the simpler per-operand machine model.
// This works with MachineScheduler and will eventually replace itineraries.
+class A9WriteLMOpsListType<list<WriteSequence> writes> {
+ list <WriteSequence> Writes = writes;
+ SchedMachineModel SchedModel = ?;
+}
// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
def CortexA9Model : SchedMachineModel {
@@ -2011,7 +2015,7 @@ def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
// Define a predicate to select the LDM based on number of memory addresses.
def A9LMAdr#NumAddr#Pred :
- SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
+ SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>;
} // foreach NumAddr
@@ -2054,48 +2058,30 @@ def A9WriteL#NumAddr#Hi : WriteSequence<
//===----------------------------------------------------------------------===//
// LDM: Load multiple into 32-bit integer registers.
+def A9WriteLMOpsList : A9WriteLMOpsListType<
+ [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi,
+ A9WriteL7, A9WriteL7Hi,
+ A9WriteL8, A9WriteL8Hi]>;
+
// A9WriteLM variants expand into a pair of writes for each 64-bit
// value loaded. When the number of registers is odd, the last
// A9WriteLnHi is naturally ignored because the instruction has no
// following def operands. These variants take no issue resource, so
// they may need to be part of a WriteSequence that includes A9WriteIssue.
def A9WriteLM : SchedWriteVariant<[
- SchedVar<A9LMAdr1Pred, [A9WriteL1, A9WriteL1Hi]>,
- SchedVar<A9LMAdr2Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi]>,
- SchedVar<A9LMAdr3Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi]>,
- SchedVar<A9LMAdr4Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi]>,
- SchedVar<A9LMAdr5Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi]>,
- SchedVar<A9LMAdr6Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi,
- A9WriteL6, A9WriteL6Hi]>,
- SchedVar<A9LMAdr7Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi,
- A9WriteL6, A9WriteL6Hi,
- A9WriteL7, A9WriteL7Hi]>,
- SchedVar<A9LMAdr8Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi,
- A9WriteL6, A9WriteL6Hi,
- A9WriteL7, A9WriteL7Hi,
- A9WriteL8, A9WriteL8Hi]>,
+ SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
+ SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
+ SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
+ SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
+ SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
+ SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
+ SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
+ SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
// For unknown LDMs, define the maximum number of writes, but only
// make the first two consume resources.
SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
@@ -2177,49 +2163,39 @@ def A9WriteLMfp#NumAddr#Hi : WriteSequence<
// pair of writes for each 64-bit data loaded. When the number of
// registers is odd, the last WriteLMfpnHi is naturally ignored because
// the instruction has no following def operands.
+
+def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType<
+ [A9WriteLMfp1, A9WriteLMfp2, // 0-1
+ A9WriteLMfp3, A9WriteLMfp4, // 2-3
+ A9WriteLMfp5, A9WriteLMfp6, // 4-5
+ A9WriteLMfp7, A9WriteLMfp8, // 6-7
+ A9WriteLMfp1Hi, // 8-8
+ A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10
+ A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12
+ A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14
+ A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16
+ A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18
+ A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20
+ A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
+
def A9WriteLMfpPostRA : SchedWriteVariant<[
- SchedVar<A9LMAdr1Pred, [A9WriteLMfp1, A9WriteLMfp1Hi]>,
- SchedVar<A9LMAdr2Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi]>,
- SchedVar<A9LMAdr3Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi]>,
- SchedVar<A9LMAdr4Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi]>,
- SchedVar<A9LMAdr5Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi]>,
- SchedVar<A9LMAdr6Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi,
- A9WriteLMfp6, A9WriteLMfp6Hi]>,
- SchedVar<A9LMAdr7Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi,
- A9WriteLMfp6, A9WriteLMfp6Hi,
- A9WriteLMfp7, A9WriteLMfp7Hi]>,
- SchedVar<A9LMAdr8Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi,
- A9WriteLMfp6, A9WriteLMfp6Hi,
- A9WriteLMfp7, A9WriteLMfp7Hi,
- A9WriteLMfp8, A9WriteLMfp8Hi]>,
+ SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
+ SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
+ SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
+ SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
+ SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
+ SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
+ SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
+ SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
// For unknown LDMs, define the maximum number of writes, but only
- // make the first two consume resources.
- SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3Hi, A9WriteLMfp3Hi,
- A9WriteLMfp4Hi, A9WriteLMfp4Hi,
+ // make the first two consume resources. We are optimizing for the case
+ // where the operands are DPRs, and this determines the first eight
+ // types. The remaining eight types are filled to cover the case
+ // where the operands are SPRs.
+ SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2,
+ A9WriteLMfp3Hi, A9WriteLMfp4Hi,
+ A9WriteLMfp5Hi, A9WriteLMfp6Hi,
+ A9WriteLMfp7Hi, A9WriteLMfp8Hi,
A9WriteLMfp5Hi, A9WriteLMfp5Hi,
A9WriteLMfp6Hi, A9WriteLMfp6Hi,
A9WriteLMfp7Hi, A9WriteLMfp7Hi,
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index 2a41616..8d7dbc2 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -1345,20 +1345,25 @@ let SchedModel = SwiftModel in {
// 4.2.20 Integer Load Signextended
def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
let Latency = 3;
+ let NumMicroOps = 2;
}
def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
let Latency = 4;
+ let NumMicroOps = 2;
}
def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01,
SwiftUnitP01]> {
let Latency = 4;
+ let NumMicroOps = 3;
}
def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> {
let Latency = 3;
+ let NumMicroOps = 2;
}
def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2,
- SwiftUnitP01]> {
+ SwiftUnitP01]> {
let Latency = 3;
+ let NumMicroOps = 3;
}
def SwiftWrBackOne : SchedWriteRes<[]> {
let Latency = 1;
@@ -1399,7 +1404,10 @@ let SchedModel = SwiftModel in {
def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> {
let Latency = Lat;
}
- def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> { let Latency = Lat; }
+ def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> {
+ let Latency = Lat;
+ let NumMicroOps = 0;
+ }
}
// Predicate.
foreach NumAddr = 1-16 in {
@@ -1520,6 +1528,7 @@ let SchedModel = SwiftModel in {
// 4.2.25 Integer Store, Multiple
def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
let Latency = 0;
+ let NumMicroOps = 2;
}
foreach NumAddr = 1-16 in {
def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 0a0f30c..8351c63 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -32,7 +32,7 @@ ReserveR9("arm-reserve-r9", cl::Hidden,
cl::desc("Reserve R9, making it unavailable as GPR"));
static cl::opt<bool>
-DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
+ArmUseMOVT("arm-use-movt", cl::init(true), cl::Hidden);
static cl::opt<bool>
UseFusedMulOps("arm-use-mulops",
@@ -57,10 +57,28 @@ Align(cl::desc("Load/store alignment support"),
"Allow unaligned memory accesses"),
clEnumValEnd));
+enum ITMode {
+ DefaultIT,
+ RestrictedIT,
+ NoRestrictedIT
+};
+
+static cl::opt<ITMode>
+IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
+ cl::ZeroOrMore,
+ cl::values(clEnumValN(DefaultIT, "arm-default-it",
+ "Generate IT block based on arch"),
+ clEnumValN(RestrictedIT, "arm-restrict-it",
+ "Disallow deprecated IT based on ARMv8"),
+ clEnumValN(NoRestrictedIT, "arm-no-restrict-it",
+ "Allow IT blocks based on ARMv7"),
+ clEnumValEnd));
+
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, const TargetOptions &Options)
: ARMGenSubtargetInfo(TT, CPU, FS)
, ARMProcFamily(Others)
+ , ARMProcClass(None)
, stackAlignment(4)
, CPUString(CPU)
, TargetTriple(TT)
@@ -75,13 +93,14 @@ void ARMSubtarget::initializeEnvironment() {
HasV5TOps = false;
HasV5TEOps = false;
HasV6Ops = false;
+ HasV6MOps = false;
HasV6T2Ops = false;
HasV7Ops = false;
HasV8Ops = false;
HasVFPv2 = false;
HasVFPv3 = false;
HasVFPv4 = false;
- HasV8FP = false;
+ HasFPARMv8 = false;
HasNEON = false;
UseNEONForSinglePrecisionFP = false;
UseMulOps = UseFusedMulOps;
@@ -90,7 +109,6 @@ void ARMSubtarget::initializeEnvironment() {
SlowFPBrcc = false;
InThumbMode = false;
HasThumb2 = false;
- IsMClass = false;
NoARM = false;
PostRAScheduler = false;
IsR9Reserved = ReserveR9;
@@ -107,9 +125,12 @@ void ARMSubtarget::initializeEnvironment() {
AvoidMOVsShifterOperand = false;
HasRAS = false;
HasMPExtension = false;
+ HasVirtualization = false;
FPOnlySP = false;
HasPerfMon = false;
HasTrustZone = false;
+ HasCrypto = false;
+ HasCRC = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
@@ -133,8 +154,13 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
}
void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
- if (CPUString.empty())
- CPUString = "generic";
+ if (CPUString.empty()) {
+ if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s"))
+ // Default to the Swift CPU when targeting armv7s/thumbv7s.
+ CPUString = "swift";
+ else
+ CPUString = "generic";
+ }
// Insert the architecture feature derived from the target triple into the
// feature string. This is important for setting features that are implied
@@ -152,7 +178,7 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a
// ARM version or CPU and then remove this.
if (!HasV6T2Ops && hasThumb2())
- HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
+ HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6MOps = HasV6T2Ops = true;
// Keep a pointer to static instruction cost data for the specified CPU.
SchedModel = getSchedModelForCPU(CPUString);
@@ -169,11 +195,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isAAPCS_ABI())
stackAlignment = 8;
- if (!isTargetIOS())
- UseMovt = hasV6T2Ops();
- else {
+ UseMovt = hasV6T2Ops() && ArmUseMOVT;
+
+ if (!isTargetIOS()) {
+ IsR9Reserved = ReserveR9;
+ } else {
IsR9Reserved = ReserveR9 | !HasV6Ops;
- UseMovt = DarwinUseMOVT && hasV6T2Ops();
SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0);
}
@@ -207,6 +234,18 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
break;
}
+ switch (IT) {
+ case DefaultIT:
+ RestrictIT = hasV8Ops() ? true : false;
+ break;
+ case RestrictedIT:
+ RestrictIT = true;
+ break;
+ case NoRestrictedIT:
+ RestrictIT = false;
+ break;
+ }
+
// NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
uint64_t Bits = getFeatureBits();
if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
@@ -271,12 +310,15 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
return SchedModel->MispredictPenalty;
}
+bool ARMSubtarget::hasSinCos() const {
+ return getTargetTriple().getOS() == Triple::IOS &&
+ !getTargetTriple().isOSVersionLT(7, 0);
+}
+
bool ARMSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
- CriticalPathRCs.clear();
- CriticalPathRCs.push_back(&ARM::GPRRegClass);
+ Mode = TargetSubtargetInfo::ANTIDEP_NONE;
return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index ad7f1b3..5276901 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -31,29 +31,36 @@ class TargetOptions;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift
+ Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift, CortexA53, CortexA57
+ };
+ enum ARMProcClassEnum {
+ None, AClass, RClass, MClass
};
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
ARMProcFamilyEnum ARMProcFamily;
+ /// ARMProcClass - ARM processor class: None, AClass, RClass or MClass.
+ ARMProcClassEnum ARMProcClass;
+
/// HasV4TOps, HasV5TOps, HasV5TEOps,
- /// HasV6Ops, HasV6T2Ops, HasV7Ops, HasV8Ops -
+ /// HasV6Ops, HasV6MOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
/// Specify whether target support specific ARM ISA variants.
bool HasV4TOps;
bool HasV5TOps;
bool HasV5TEOps;
bool HasV6Ops;
+ bool HasV6MOps;
bool HasV6T2Ops;
bool HasV7Ops;
bool HasV8Ops;
- /// HasVFPv2, HasVFPv3, HasVFPv4, HasV8FP, HasNEON - Specify what
+ /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
/// floating point ISAs are supported.
bool HasVFPv2;
bool HasVFPv3;
bool HasVFPv4;
- bool HasV8FP;
+ bool HasFPARMv8;
bool HasNEON;
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
@@ -82,10 +89,6 @@ protected:
/// HasThumb2 - True if Thumb2 instructions are supported.
bool HasThumb2;
- /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
- /// v6m, v7m for example.
- bool IsMClass;
-
/// NoARM - True if subtarget does not support ARM mode execution.
bool NoARM;
@@ -147,6 +150,10 @@ protected:
/// extension (ARMv7 only).
bool HasMPExtension;
+ /// HasVirtualization - True if the subtarget supports the Virtualization
+ /// extension.
+ bool HasVirtualization;
+
/// FPOnlySP - If true, the floating point unit only supports single
/// precision.
bool FPOnlySP;
@@ -159,11 +166,21 @@ protected:
/// HasTrustZone - if true, processor supports TrustZone security extensions
bool HasTrustZone;
+ /// HasCrypto - if true, processor supports Cryptography extensions
+ bool HasCrypto;
+
+ /// HasCRC - if true, processor supports CRC instructions
+ bool HasCRC;
+
/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
/// accesses for some types. For details, see
/// ARMTargetLowering::allowsUnalignedMemoryAccesses().
bool AllowsUnalignedMem;
+ /// RestrictIT - If true, the subtarget disallows generation of deprecated IT
+ /// blocks to conform to ARMv8 rule.
+ bool RestrictIT;
+
/// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
/// and such) instructions in Thumb2 code.
bool Thumb2DSP;
@@ -228,6 +245,7 @@ public:
bool hasV5TOps() const { return HasV5TOps; }
bool hasV5TEOps() const { return HasV5TEOps; }
bool hasV6Ops() const { return HasV6Ops; }
+ bool hasV6MOps() const { return HasV6MOps; }
bool hasV6T2Ops() const { return HasV6T2Ops; }
bool hasV7Ops() const { return HasV7Ops; }
bool hasV8Ops() const { return HasV8Ops; }
@@ -246,8 +264,11 @@ public:
bool hasVFP2() const { return HasVFPv2; }
bool hasVFP3() const { return HasVFPv3; }
bool hasVFP4() const { return HasVFPv4; }
- bool hasV8FP() const { return HasV8FP; }
+ bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
+ bool hasCrypto() const { return HasCrypto; }
+ bool hasCRC() const { return HasCRC; }
+ bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
@@ -255,6 +276,9 @@ public:
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
+ bool hasAnyDataBarrier() const {
+ return HasDataBarrier || (hasV6Ops() && !isThumb());
+ }
bool useMulOps() const { return UseMulOps; }
bool useFPVMLx() const { return !SlowFPVMLx; }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
@@ -275,10 +299,10 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
- bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
+ bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
- bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; }
- bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+ bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
+ bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetELF() const { return !isTargetDarwin(); }
// ARM EABI is the bare-metal EABI described in ARM ABI documents and
// can be accessed via -target arm-none-eabi. This is NOT GNUEABI.
@@ -296,8 +320,9 @@ public:
bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
bool isThumb2() const { return InThumbMode && HasThumb2; }
bool hasThumb2() const { return HasThumb2; }
- bool isMClass() const { return IsMClass; }
- bool isARClass() const { return !IsMClass; }
+ bool isMClass() const { return ARMProcClass == MClass; }
+ bool isRClass() const { return ARMProcClass == RClass; }
+ bool isAClass() const { return ARMProcClass == AClass; }
bool isR9Reserved() const { return IsR9Reserved; }
@@ -306,9 +331,15 @@ public:
bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+ bool restrictIT() const { return RestrictIT; }
+
const std::string & getCPUString() const { return CPUString; }
unsigned getMispredictionPenalty() const;
+
+ /// This function returns true if the target has sincos() routine in its
+ /// compiler runtime or math libraries.
+ bool hasSinCos() const;
/// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index e6dbcb6..be84bf6 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -196,8 +196,13 @@ bool ARMPassConfig::addPreSched2() {
addPass(createARMExpandPseudoPass());
if (getOptLevel() != CodeGenOpt::None) {
- if (!getARMSubtarget().isThumb1Only())
+ if (!getARMSubtarget().isThumb1Only()) {
+ // in v8, IfConversion depends on Thumb instruction widths
+ if (getARMSubtarget().restrictIT() &&
+ !getARMSubtarget().prefers32BitThumb())
+ addPass(createThumb2SizeReductionPass());
addPass(&IfConverterID);
+ }
}
if (getARMSubtarget().isThumb2())
addPass(createThumb2ITBlockPass());
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index dfdf6ab..7ec71b2 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -47,7 +47,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
MCStreamer &Streamer) const {
assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
- return MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ return MCSymbolRefExpr::Create(getSymbol(*Mang, GV),
MCSymbolRefExpr::VK_ARM_TARGET2,
getContext());
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 34576ba..6bbb38f 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -129,6 +129,9 @@ public:
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
OperandValueKind Op1Info = OK_AnyValue,
OperandValueKind Op2Info = OK_AnyValue) const;
+
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const;
/// @}
};
@@ -182,7 +185,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
assert(ISD && "Invalid opcode");
// Single to/from double precision conversions.
- static const CostTblEntry<MVT> NEONFltDblTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> NEONFltDblTbl[] = {
// Vector fptrunc/fpext conversions.
{ ISD::FP_ROUND, MVT::v2f64, 2 },
{ ISD::FP_EXTEND, MVT::v2f32, 2 },
@@ -192,8 +195,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
ISD == ISD::FP_EXTEND)) {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
- int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl),
- ISD, LT.second);
+ int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
if (Idx != -1)
return LT.first * NEONFltDblTbl[Idx].Cost;
}
@@ -207,7 +209,8 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
// Some arithmetic, load and store operations have specific instructions
// to cast up/down their types automatically at no extra cost.
// TODO: Get these tables to know at least what the related operations are.
- static const TypeConversionCostTblEntry<MVT> NEONVectorConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ NEONVectorConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
@@ -283,15 +286,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
};
if (SrcTy.isVector() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup<MVT>(NEONVectorConversionTbl,
- array_lengthof(NEONVectorConversionTbl),
- ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return NEONVectorConversionTbl[Idx].Cost;
}
// Scalar float to integer conversions.
- static const TypeConversionCostTblEntry<MVT> NEONFloatConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ NEONFloatConversionTbl[] = {
{ ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
@@ -314,16 +317,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
};
if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup<MVT>(NEONFloatConversionTbl,
- array_lengthof(NEONFloatConversionTbl),
- ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return NEONFloatConversionTbl[Idx].Cost;
}
// Scalar integer to float conversions.
- static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ NEONIntegerConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
@@ -347,16 +349,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
};
if (SrcTy.isInteger() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup<MVT>(NEONIntegerConversionTbl,
- array_lengthof(NEONIntegerConversionTbl),
- ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return NEONIntegerConversionTbl[Idx].Cost;
}
// Scalar integer conversion costs.
- static const TypeConversionCostTblEntry<MVT> ARMIntegerConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ ARMIntegerConversionTbl[] = {
// i16 -> i64 requires two dependent operations.
{ ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
@@ -368,11 +369,8 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
};
if (SrcTy.isInteger()) {
- int Idx =
- ConvertCostTableLookup<MVT>(ARMIntegerConversionTbl,
- array_lengthof(ARMIntegerConversionTbl),
- ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return ARMIntegerConversionTbl[Idx].Cost;
}
@@ -400,7 +398,8 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
// On NEON a a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
// Lowering of some vector selects is currently far from perfect.
- static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ NEONVectorSelectTbl[] = {
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
{ ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
{ ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
@@ -412,10 +411,9 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
EVT SelCondTy = TLI->getValueType(CondTy);
EVT SelValTy = TLI->getValueType(ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
- int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
- array_lengthof(NEONVectorSelectTbl),
- ISD, SelCondTy.getSimpleVT(),
- SelValTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
+ SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT());
if (Idx != -1)
return NEONVectorSelectTbl[Idx].Cost;
}
@@ -448,7 +446,7 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
if (Kind != SK_Reverse)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
- static const CostTblEntry<MVT> NEONShuffleTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
// Reverse shuffle cost one instruction if we are shuffling within a double
// word (vrev) or two if we shuffle a quad word (vrev, vext).
{ ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
@@ -464,8 +462,7 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
- int Idx = CostTableLookup<MVT>(NEONShuffleTbl, array_lengthof(NEONShuffleTbl),
- ISD::VECTOR_SHUFFLE, LT.second);
+ int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx == -1)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
@@ -480,7 +477,7 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK
const unsigned FunctionCallDivCost = 20;
const unsigned ReciprocalDivCost = 10;
- static const CostTblEntry<MVT> CostTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> CostTbl[] = {
// Division.
// These costs are somewhat random. Choose a cost of 20 to indicate that
// vectorizing devision (added function call) is going to be very expensive.
@@ -524,14 +521,37 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK
int Idx = -1;
if (ST->hasNEON())
- Idx = CostTableLookup<MVT>(CostTbl, array_lengthof(CostTbl), ISDOpcode,
- LT.second);
+ Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second);
if (Idx != -1)
return LT.first * CostTbl[Idx].Cost;
-
- return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
- Op2Info);
+ unsigned Cost =
+ TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+
+ // This is somewhat of a hack. The problem that we are facing is that SROA
+ // creates a sequence of shift, and, or instructions to construct values.
+ // These sequences are recognized by the ISel and have zero-cost. Not so for
+ // the vectorized code. Because we have support for v2i64 but not i64 those
+ // sequences look particularily beneficial to vectorize.
+ // To work around this we increase the cost of v2i64 operations to make them
+ // seem less beneficial.
+ if (LT.second == MVT::v2i64 &&
+ Op2Info == TargetTransformInfo::OK_UniformConstantValue)
+ Cost += 4;
+
+ return Cost;
}
+unsigned ARMTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+
+ if (Src->isVectorTy() && Alignment != 16 &&
+ Src->getVectorElementType()->isDoubleTy()) {
+ // Unaligned loads/stores are extremely inefficient.
+ // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
+ return LT.first * 4;
+ }
+ return LT.first;
+}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 80e5c6e..e3f9e0d 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -7,6 +7,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMBuildAttrs.h"
+#include "ARMFPUName.h"
+#include "ARMFeatures.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
@@ -24,6 +27,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -47,8 +51,14 @@ enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ const MCInstrInfo &MII;
const MCRegisterInfo *MRI;
+ ARMTargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = getParser().getStreamer().getTargetStreamer();
+ return static_cast<ARMTargetStreamer &>(TS);
+ }
+
// Unwind directives state
SMLoc FnStartLoc;
SMLoc CantUnwindLoc;
@@ -66,6 +76,8 @@ class ARMAsmParser : public MCTargetAsmParser {
// Map of register aliases registers via the .req directive.
StringMap<unsigned> RegisterReqs;
+ bool NextSymbolIsThumb;
+
struct {
ARMCC::CondCodes Cond; // Condition for IT block.
unsigned Mask:4; // Condition mask for instructions.
@@ -127,6 +139,8 @@ class ARMAsmParser : public MCTargetAsmParser {
bool parseDirectiveUnreq(SMLoc L);
bool parseDirectiveArch(SMLoc L);
bool parseDirectiveEabiAttr(SMLoc L);
+ bool parseDirectiveCPU(SMLoc L);
+ bool parseDirectiveFPU(SMLoc L);
bool parseDirectiveFnStart(SMLoc L);
bool parseDirectiveFnEnd(SMLoc L);
bool parseDirectiveCantUnwind(SMLoc L);
@@ -139,7 +153,8 @@ class ARMAsmParser : public MCTargetAsmParser {
StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
bool &CarrySetting, unsigned &ProcessorIMod,
StringRef &ITMask);
- void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+ bool &CanAcceptCarrySet,
bool &CanAcceptPredicationCode);
bool isThumb() const {
@@ -158,6 +173,9 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasV6Ops() const {
return STI.getFeatureBits() & ARM::HasV6Ops;
}
+ bool hasV6MOps() const {
+ return STI.getFeatureBits() & ARM::HasV6MOps;
+ }
bool hasV7Ops() const {
return STI.getFeatureBits() & ARM::HasV7Ops;
}
@@ -221,6 +239,9 @@ class ARMAsmParser : public MCTargetAsmParser {
// Asm Match Converter Methods
void cvtThumbMultiply(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtThumbBranches(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+
bool validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
bool processInstruction(MCInst &Inst,
@@ -229,8 +250,6 @@ class ARMAsmParser : public MCTargetAsmParser {
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool shouldOmitPredicateOperand(StringRef Mnemonic,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
- bool isDeprecated(MCInst &Inst, StringRef &Info);
-
public:
enum ARMMatchResultTy {
Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
@@ -242,8 +261,9 @@ public:
};
- ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser), FPReg(-1) {
+ ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &MII)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), FPReg(-1) {
MCAsmParserExtension::Initialize(_Parser);
// Cache the MCRegisterInfo.
@@ -255,12 +275,7 @@ public:
// Not in an ITBlock to start with.
ITState.CurPosition = ~0U;
- // Set ELF header flags.
- // FIXME: This should eventually end up somewhere else where more
- // intelligent flag decisions can be made. For now we are just maintaining
- // the statu/parseDirects quo for ARM and setting EF_ARM_EABI_VER5 as the default.
- if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&Parser.getStreamer()))
- MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+ NextSymbolIsThumb = false;
}
// Implementation of the MCTargetAsmParser interface:
@@ -277,6 +292,8 @@ public:
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
bool MatchingInlineAsm);
+ void onLabelParsed(MCSymbol *Symbol);
+
};
} // end anonymous namespace
@@ -601,7 +618,7 @@ public:
template<unsigned width, unsigned scale>
bool isUnsignedOffset() const {
if (!isImm()) return false;
- if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) return true;
+ if (isa<MCSymbolRefExpr>(Imm.Val)) return true;
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
int64_t Val = CE->getValue();
int64_t Align = 1LL << scale;
@@ -610,6 +627,22 @@ public:
}
return false;
}
+ // checks whether this operand is an signed offset which fits is a field
+ // of specified width and scaled by a specific number of bits
+ template<unsigned width, unsigned scale>
+ bool isSignedOffset() const {
+ if (!isImm()) return false;
+ if (isa<MCSymbolRefExpr>(Imm.Val)) return true;
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Val = CE->getValue();
+ int64_t Align = 1LL << scale;
+ int64_t Max = Align * ((1LL << (width-1)) - 1);
+ int64_t Min = -Align * (1LL << (width-1));
+ return ((Val % Align) == 0) && (Val >= Min) && (Val <= Max);
+ }
+ return false;
+ }
+
// checks whether this operand is a memory operand computed as an offset
// applied to PC. the offset may have 8 bits of magnitude and is represented
// with two bits of shift. textually it may be either [pc, #imm], #imm or
@@ -628,7 +661,7 @@ public:
Val = Memory.OffsetImm->getValue();
}
else return false;
- return ((Val % 4) == 0) && (Val >= -1020) && (Val <= 1020);
+ return ((Val % 4) == 0) && (Val >= 0) && (Val <= 1020);
}
bool isFPImm() const {
if (!isImm()) return false;
@@ -658,13 +691,6 @@ public:
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
}
- bool isImm0_4() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 5;
- }
bool isImm0_1020s4() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -687,6 +713,13 @@ public:
// explicitly exclude zero. we want that to use the normal 0_508 version.
return ((Value & 3) == 0) && Value > 0 && Value <= 508;
}
+ bool isImm0_239() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 240;
+ }
bool isImm0_255() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -848,6 +881,15 @@ public:
int64_t Value = CE->getValue();
return Value >= 0 && Value < 65536;
}
+ bool isImm256_65535Expr() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // If it's not a constant expression, it'll generate a fixup and be
+ // handled later.
+ if (!CE) return true;
+ int64_t Value = CE->getValue();
+ return Value >= 256 && Value < 65536;
+ }
bool isImm0_65535Expr() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -927,7 +969,8 @@ public:
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
- return ARM_AM::getT2SOImmVal(~Value) != -1;
+ return ARM_AM::getT2SOImmVal(Value) == -1 &&
+ ARM_AM::getT2SOImmVal(~Value) != -1;
}
bool isT2SOImmNeg() const {
if (!isImm()) return false;
@@ -1773,8 +1816,6 @@ public:
void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
int32_t Imm = Memory.OffsetImm->getValue();
- // FIXME: Handle #-0
- if (Imm == INT32_MIN) Imm = 0;
Inst.addOperand(MCOperand::CreateImm(Imm));
}
@@ -2494,7 +2535,7 @@ void ARMOperand::print(raw_ostream &OS) const {
getImm()->print(OS);
break;
case k_MemBarrierOpt:
- OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
+ OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt(), false) << ">";
break;
case k_InstSyncBarrierOpt:
OS << "<ARM_ISB::" << InstSyncBOptToString(getInstSyncBarrierOpt()) << ">";
@@ -2831,8 +2872,9 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
return -1;
switch (Name[2]) {
default: return -1;
- case '0': return 10;
- case '1': return 11;
+ // p10 and p11 are invalid for coproc instructions (reserved for FP/NEON)
+ case '0': return CoprocOp == 'p'? -1: 10;
+ case '1': return CoprocOp == 'p'? -1: 11;
case '2': return 12;
case '3': return 13;
case '4': return 14;
@@ -3439,18 +3481,27 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
.Case("sy", ARM_MB::SY)
.Case("st", ARM_MB::ST)
+ .Case("ld", ARM_MB::LD)
.Case("sh", ARM_MB::ISH)
.Case("ish", ARM_MB::ISH)
.Case("shst", ARM_MB::ISHST)
.Case("ishst", ARM_MB::ISHST)
+ .Case("ishld", ARM_MB::ISHLD)
.Case("nsh", ARM_MB::NSH)
.Case("un", ARM_MB::NSH)
.Case("nshst", ARM_MB::NSHST)
+ .Case("nshld", ARM_MB::NSHLD)
.Case("unst", ARM_MB::NSHST)
.Case("osh", ARM_MB::OSH)
.Case("oshst", ARM_MB::OSHST)
+ .Case("oshld", ARM_MB::OSHLD)
.Default(~0U);
+ // ishld, oshld, nshld and ld are only available from ARMv8.
+ if (!hasV8Ops() && (Opt == ARM_MB::ISHLD || Opt == ARM_MB::OSHLD ||
+ Opt == ARM_MB::NSHLD || Opt == ARM_MB::LD))
+ Opt = ~0U;
+
if (Opt == ~0U)
return MatchOperand_NoMatch;
@@ -3498,7 +3549,7 @@ parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (Tok.is(AsmToken::Identifier)) {
StringRef OptStr = Tok.getString();
- if (OptStr.lower() == "sy")
+ if (OptStr.equals_lower("sy"))
Opt = ARM_ISB::SY;
else
return MatchOperand_NoMatch;
@@ -4102,6 +4153,65 @@ cvtThumbMultiply(MCInst &Inst,
((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
}
+void ARMAsmParser::
+cvtThumbBranches(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ int CondOp = -1, ImmOp = -1;
+ switch(Inst.getOpcode()) {
+ case ARM::tB:
+ case ARM::tBcc: CondOp = 1; ImmOp = 2; break;
+
+ case ARM::t2B:
+ case ARM::t2Bcc: CondOp = 1; ImmOp = 3; break;
+
+ default: llvm_unreachable("Unexpected instruction in cvtThumbBranches");
+ }
+ // first decide whether or not the branch should be conditional
+ // by looking at it's location relative to an IT block
+ if(inITBlock()) {
+ // inside an IT block we cannot have any conditional branches. any
+ // such instructions needs to be converted to unconditional form
+ switch(Inst.getOpcode()) {
+ case ARM::tBcc: Inst.setOpcode(ARM::tB); break;
+ case ARM::t2Bcc: Inst.setOpcode(ARM::t2B); break;
+ }
+ } else {
+ // outside IT blocks we can only have unconditional branches with AL
+ // condition code or conditional branches with non-AL condition code
+ unsigned Cond = static_cast<ARMOperand*>(Operands[CondOp])->getCondCode();
+ switch(Inst.getOpcode()) {
+ case ARM::tB:
+ case ARM::tBcc:
+ Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc);
+ break;
+ case ARM::t2B:
+ case ARM::t2Bcc:
+ Inst.setOpcode(Cond == ARMCC::AL ? ARM::t2B : ARM::t2Bcc);
+ break;
+ }
+ }
+
+ // now decide on encoding size based on branch target range
+ switch(Inst.getOpcode()) {
+ // classify tB as either t2B or t1B based on range of immediate operand
+ case ARM::tB: {
+ ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]);
+ if(!op->isSignedOffset<11, 1>() && isThumbTwo())
+ Inst.setOpcode(ARM::t2B);
+ break;
+ }
+ // classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand
+ case ARM::tBcc: {
+ ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]);
+ if(!op->isSignedOffset<8, 1>() && isThumbTwo())
+ Inst.setOpcode(ARM::t2Bcc);
+ break;
+ }
+ }
+ ((ARMOperand*)Operands[ImmOp])->addImmOperands(Inst, 1);
+ ((ARMOperand*)Operands[CondOp])->addCondCodeOperands(Inst, 2);
+}
+
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
bool ARMAsmParser::
@@ -4601,7 +4711,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" ||
Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
- Mnemonic == "vaclt" || Mnemonic == "vacle" ||
+ Mnemonic == "vaclt" || Mnemonic == "vacle" || Mnemonic == "hlt" ||
Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
@@ -4688,8 +4798,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
//
// FIXME: It would be nice to autogen this.
void ARMAsmParser::
-getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
- bool &CanAcceptPredicationCode) {
+getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+ bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) {
if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
Mnemonic == "add" || Mnemonic == "adc" ||
@@ -4707,12 +4817,14 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" ||
Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" ||
- Mnemonic == "trap" || Mnemonic == "setend" ||
+ Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic.startswith("crc32") ||
Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") ||
Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
- Mnemonic == "vrintm") {
+ Mnemonic == "vrintm" || Mnemonic.startswith("aes") ||
+ Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
+ (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
// These mnemonics are never predicable
CanAcceptPredicationCode = false;
} else if (!isThumb()) {
@@ -4726,7 +4838,10 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
Mnemonic != "stc2" && Mnemonic != "stc2l" &&
!Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs");
} else if (isThumbOne()) {
- CanAcceptPredicationCode = Mnemonic != "nop" && Mnemonic != "movs";
+ if (hasV6MOps())
+ CanAcceptPredicationCode = Mnemonic != "movs";
+ else
+ CanAcceptPredicationCode = Mnemonic != "nop" && Mnemonic != "movs";
} else
CanAcceptPredicationCode = true;
}
@@ -4877,14 +4992,6 @@ bool ARMAsmParser::shouldOmitPredicateOperand(
return false;
}
-bool ARMAsmParser::isDeprecated(MCInst &Inst, StringRef &Info) {
- if (hasV8Ops() && Inst.getOpcode() == ARM::SETEND) {
- Info = "armv8";
- return true;
- }
- return false;
-}
-
static bool isDataTypeToken(StringRef Tok) {
return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" ||
Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" ||
@@ -4980,7 +5087,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// the matcher deal with finding the right instruction or generating an
// appropriate error.
bool CanAcceptCarrySet, CanAcceptPredicationCode;
- getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode);
+ getMnemonicAcceptInfo(Mnemonic, Name, CanAcceptCarrySet, CanAcceptPredicationCode);
// If we had a carry-set on an instruction that can't do that, issue an
// error.
@@ -5115,8 +5222,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// expressed as a GPRPair, so we have to manually merge them.
// FIXME: We would really like to be able to tablegen'erate this.
if (!isThumb() && Operands.size() > 4 &&
- (Mnemonic == "ldrexd" || Mnemonic == "strexd")) {
- bool isLoad = (Mnemonic == "ldrexd");
+ (Mnemonic == "ldrexd" || Mnemonic == "strexd" || Mnemonic == "ldaexd" ||
+ Mnemonic == "stlexd")) {
+ bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
unsigned Idx = isLoad ? 2 : 3;
ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]);
ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]);
@@ -5200,45 +5308,44 @@ static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
return false;
}
-// FIXME: We would really prefer to have MCInstrInfo (the wrapper around
-// the ARMInsts array) instead. Getting that here requires awkward
-// API changes, though. Better way?
-namespace llvm {
-extern const MCInstrDesc ARMInsts[];
-}
-static const MCInstrDesc &getInstDesc(unsigned Opcode) {
- return ARMInsts[Opcode];
+// Return true if instruction has the interesting property of being
+// allowed in IT blocks, but not being predicable.
+static bool instIsBreakpoint(const MCInst &Inst) {
+ return Inst.getOpcode() == ARM::tBKPT ||
+ Inst.getOpcode() == ARM::BKPT ||
+ Inst.getOpcode() == ARM::tHLT ||
+ Inst.getOpcode() == ARM::HLT;
+
}
// FIXME: We would really like to be able to tablegen'erate this.
bool ARMAsmParser::
validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
SMLoc Loc = Operands[0]->getStartLoc();
+
// Check the IT block state first.
- // NOTE: BKPT instruction has the interesting property of being
- // allowed in IT blocks, but not being predicable. It just always
- // executes.
- if (inITBlock() && Inst.getOpcode() != ARM::tBKPT &&
- Inst.getOpcode() != ARM::BKPT) {
- unsigned bit = 1;
+ // NOTE: BKPT and HLT instructions have the interesting property of being
+ // allowed in IT blocks, but not being predicable. They just always execute.
+ if (inITBlock() && !instIsBreakpoint(Inst)) {
+ unsigned Bit = 1;
if (ITState.FirstCond)
ITState.FirstCond = false;
else
- bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
+ Bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
// The instruction must be predicable.
if (!MCID.isPredicable())
return Error(Loc, "instructions in IT block must be predicable");
unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm();
- unsigned ITCond = bit ? ITState.Cond :
+ unsigned ITCond = Bit ? ITState.Cond :
ARMCC::getOppositeCondition(ITState.Cond);
if (Cond != ITCond) {
// Find the condition code Operand to get its SMLoc information.
SMLoc CondLoc;
- for (unsigned i = 1; i < Operands.size(); ++i)
- if (static_cast<ARMOperand*>(Operands[i])->isCondCode())
- CondLoc = Operands[i]->getStartLoc();
+ for (unsigned I = 1; I < Operands.size(); ++I)
+ if (static_cast<ARMOperand*>(Operands[I])->isCondCode())
+ CondLoc = Operands[I]->getStartLoc();
return Error(CondLoc, "incorrect condition in IT block; got '" +
StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
"', but expected '" +
@@ -5247,20 +5354,55 @@ validateInstruction(MCInst &Inst,
// Check for non-'al' condition codes outside of the IT block.
} else if (isThumbTwo() && MCID.isPredicable() &&
Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
- ARMCC::AL && Inst.getOpcode() != ARM::tB &&
- Inst.getOpcode() != ARM::t2B)
+ ARMCC::AL && Inst.getOpcode() != ARM::tBcc &&
+ Inst.getOpcode() != ARM::t2Bcc)
return Error(Loc, "predicated instructions must be in IT block");
- switch (Inst.getOpcode()) {
+ const unsigned Opcode = Inst.getOpcode();
+ switch (Opcode) {
case ARM::LDRD:
case ARM::LDRD_PRE:
case ARM::LDRD_POST: {
+ const unsigned RtReg = Inst.getOperand(0).getReg();
+
+ // Rt can't be R14.
+ if (RtReg == ARM::LR)
+ return Error(Operands[3]->getStartLoc(),
+ "Rt can't be R14");
+
+ const unsigned Rt = MRI->getEncodingValue(RtReg);
+ // Rt must be even-numbered.
+ if ((Rt & 1) == 1)
+ return Error(Operands[3]->getStartLoc(),
+ "Rt must be even-numbered");
+
// Rt2 must be Rt + 1.
- unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
- unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ const unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"destination operands must be sequential");
+
+ if (Opcode == ARM::LDRD_PRE || Opcode == ARM::LDRD_POST) {
+ const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(3).getReg());
+ // For addressing modes with writeback, the base register needs to be
+ // different from the destination registers.
+ if (Rn == Rt || Rn == Rt2)
+ return Error(Operands[3]->getStartLoc(),
+ "base register needs to be different from destination "
+ "registers");
+ }
+
+ return false;
+ }
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRD_PRE:
+ case ARM::t2LDRD_POST: {
+ // Rt2 must be different from Rt.
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ if (Rt2 == Rt)
+ return Error(Operands[3]->getStartLoc(),
+ "destination operands can't be identical");
return false;
}
case ARM::STRD: {
@@ -5284,50 +5426,77 @@ validateInstruction(MCInst &Inst,
}
case ARM::SBFX:
case ARM::UBFX: {
- // width must be in range [1, 32-lsb]
- unsigned lsb = Inst.getOperand(2).getImm();
- unsigned widthm1 = Inst.getOperand(3).getImm();
- if (widthm1 >= 32 - lsb)
+ // Width must be in range [1, 32-lsb].
+ unsigned LSB = Inst.getOperand(2).getImm();
+ unsigned Widthm1 = Inst.getOperand(3).getImm();
+ if (Widthm1 >= 32 - LSB)
return Error(Operands[5]->getStartLoc(),
"bitfield width must be in range [1,32-lsb]");
return false;
}
+ // Notionally handles ARM::tLDMIA_UPD too.
case ARM::tLDMIA: {
// If we're parsing Thumb2, the .w variant is available and handles
- // most cases that are normally illegal for a Thumb1 LDM
- // instruction. We'll make the transformation in processInstruction()
- // if necessary.
+ // most cases that are normally illegal for a Thumb1 LDM instruction.
+ // We'll make the transformation in processInstruction() if necessary.
//
// Thumb LDM instructions are writeback iff the base register is not
// in the register list.
unsigned Rn = Inst.getOperand(0).getReg();
- bool hasWritebackToken =
+ bool HasWritebackToken =
(static_cast<ARMOperand*>(Operands[3])->isToken() &&
static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
- bool listContainsBase;
- if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) && !isThumbTwo())
- return Error(Operands[3 + hasWritebackToken]->getStartLoc(),
+ bool ListContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, ListContainsBase) && !isThumbTwo())
+ return Error(Operands[3 + HasWritebackToken]->getStartLoc(),
"registers must be in range r0-r7");
// If we should have writeback, then there should be a '!' token.
- if (!listContainsBase && !hasWritebackToken && !isThumbTwo())
+ if (!ListContainsBase && !HasWritebackToken && !isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"writeback operator '!' expected");
// If we should not have writeback, there must not be a '!'. This is
// true even for the 32-bit wide encodings.
- if (listContainsBase && hasWritebackToken)
+ if (ListContainsBase && HasWritebackToken)
return Error(Operands[3]->getStartLoc(),
"writeback operator '!' not allowed when base register "
"in register list");
break;
}
- case ARM::t2LDMIA_UPD: {
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::LDMDA_UPD:
+ // ARM variants loading and updating the same register are only officially
+ // UNPREDICTABLE on v7 upwards. Goodness knows what they did before.
+ if (!hasV7Ops())
+ break;
+ // Fallthrough
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
- return Error(Operands[4]->getStartLoc(),
- "writeback operator '!' not allowed when base register "
- "in register list");
+ return Error(Operands.back()->getStartLoc(),
+ "writeback register not allowed in register list");
break;
}
+ case ARM::sysLDMIA_UPD:
+ case ARM::sysLDMDA_UPD:
+ case ARM::sysLDMDB_UPD:
+ case ARM::sysLDMIB_UPD:
+ if (!listContainsReg(Inst, 3, ARM::PC))
+ return Error(Operands[4]->getStartLoc(),
+ "writeback register only allowed on system LDM "
+ "if PC in register-list");
+ break;
+ case ARM::sysSTMIA_UPD:
+ case ARM::sysSTMDA_UPD:
+ case ARM::sysSTMDB_UPD:
+ case ARM::sysSTMIB_UPD:
+ return Error(Operands[2]->getStartLoc(),
+ "system STM cannot have writeback register");
+ break;
case ARM::tMUL: {
// The second source operand must be the same register as the destination
// operand.
@@ -5351,26 +5520,35 @@ validateInstruction(MCInst &Inst,
// so only issue a diagnostic for thumb1. The instructions will be
// switched to the t2 encodings in processInstruction() if necessary.
case ARM::tPOP: {
- bool listContainsBase;
- if (checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase) &&
+ bool ListContainsBase;
+ if (checkLowRegisterList(Inst, 2, 0, ARM::PC, ListContainsBase) &&
!isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or pc");
break;
}
case ARM::tPUSH: {
- bool listContainsBase;
- if (checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase) &&
+ bool ListContainsBase;
+ if (checkLowRegisterList(Inst, 2, 0, ARM::LR, ListContainsBase) &&
!isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or lr");
break;
}
case ARM::tSTMIA_UPD: {
- bool listContainsBase;
- if (checkLowRegisterList(Inst, 4, 0, 0, listContainsBase) && !isThumbTwo())
+ bool ListContainsBase, InvalidLowList;
+ InvalidLowList = checkLowRegisterList(Inst, 4, Inst.getOperand(0).getReg(),
+ 0, ListContainsBase);
+ if (InvalidLowList && !isThumbTwo())
return Error(Operands[4]->getStartLoc(),
"registers must be in range r0-r7");
+
+ // This would be converted to a 32-bit stm, but that's not valid if the
+ // writeback register is in the list.
+ if (InvalidLowList && ListContainsBase)
+ return Error(Operands[4]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
break;
}
case ARM::tADDrSP: {
@@ -5383,11 +5561,29 @@ validateInstruction(MCInst &Inst,
}
break;
}
+ // Final range checking for Thumb unconditional branch instructions.
+ case ARM::tB:
+ if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<11, 1>())
+ return Error(Operands[2]->getStartLoc(), "branch target out of range");
+ break;
+ case ARM::t2B: {
+ int op = (Operands[2]->isImm()) ? 2 : 3;
+ if (!(static_cast<ARMOperand*>(Operands[op]))->isSignedOffset<24, 1>())
+ return Error(Operands[op]->getStartLoc(), "branch target out of range");
+ break;
+ }
+ // Final range checking for Thumb conditional branch instructions.
+ case ARM::tBcc:
+ if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<8, 1>())
+ return Error(Operands[2]->getStartLoc(), "branch target out of range");
+ break;
+ case ARM::t2Bcc: {
+ int Op = (Operands[2]->isImm()) ? 2 : 3;
+ if (!(static_cast<ARMOperand*>(Operands[Op]))->isSignedOffset<20, 1>())
+ return Error(Operands[Op]->getStartLoc(), "branch target out of range");
+ break;
+ }
}
-
- StringRef DepInfo;
- if (isDeprecated(Inst, DepInfo))
- Warning(Loc, "deprecated on " + DepInfo);
return false;
}
@@ -7425,7 +7621,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
// 16-bit thumb arithmetic instructions either require or preclude the 'S'
// suffix depending on whether they're in an IT block or not.
unsigned Opc = Inst.getOpcode();
- const MCInstrDesc &MCID = getInstDesc(Opc);
+ const MCInstrDesc &MCID = MII.get(Opc);
if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
assert(MCID.hasOptionalDef() &&
"optionally flag setting instruction missing optional def operand");
@@ -7486,12 +7682,22 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
- // Some instructions need post-processing to, for example, tweak which
- // encoding is selected. Loop on it while changes happen so the
- // individual transformations can chain off each other. E.g.,
- // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
- while (processInstruction(Inst, Operands))
- ;
+ { // processInstruction() updates inITBlock state, we need to save it away
+ bool wasInITBlock = inITBlock();
+
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected. Loop on it while changes happen so the
+ // individual transformations can chain off each other. E.g.,
+ // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
+ while (processInstruction(Inst, Operands))
+ ;
+
+ // Only after the instruction is fully processed, we can validate it
+ if (wasInITBlock && hasV8Ops() && isThumb() &&
+ !isV8EligibleForIT(&Inst, 2)) {
+ Warning(IDLoc, "deprecated instruction in IT block");
+ }
+ }
// Only move forward at the very end so that everything in validate
// and process gets a consistent answer about whether we're in an IT
@@ -7544,15 +7750,15 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires ARMv6 or later");
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
- case Match_ImmRange0_4: {
+ case Match_ImmRange0_15: {
SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
- return Error(ErrorLoc, "immediate operand must be in the range [0,4]");
+ return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
}
- case Match_ImmRange0_15: {
+ case Match_ImmRange0_239: {
SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
- return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
+ return Error(ErrorLoc, "immediate operand must be in the range [0,239]");
}
}
@@ -7580,6 +7786,10 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveArch(DirectiveID.getLoc());
else if (IDVal == ".eabi_attribute")
return parseDirectiveEabiAttr(DirectiveID.getLoc());
+ else if (IDVal == ".cpu")
+ return parseDirectiveCPU(DirectiveID.getLoc());
+ else if (IDVal == ".fpu")
+ return parseDirectiveFPU(DirectiveID.getLoc());
else if (IDVal == ".fnstart")
return parseDirectiveFnStart(DirectiveID.getLoc());
else if (IDVal == ".fnend")
@@ -7658,13 +7868,18 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
return false;
}
+void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
+ if (NextSymbolIsThumb) {
+ getParser().getStreamer().EmitThumbFunc(Symbol);
+ NextSymbolIsThumb = false;
+ }
+}
+
/// parseDirectiveThumbFunc
/// ::= .thumbfunc symbol_name
bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
bool isMachO = MAI->hasSubsectionsViaSymbols();
- StringRef Name;
- bool needFuncName = true;
// Darwin asm has (optionally) function name after .thumb_func direction
// ELF doesn't
@@ -7673,29 +7888,19 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
if (Tok.isNot(AsmToken::EndOfStatement)) {
if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
return Error(L, "unexpected token in .thumb_func directive");
- Name = Tok.getIdentifier();
+ MCSymbol *Func =
+ getParser().getContext().GetOrCreateSymbol(Tok.getIdentifier());
+ getParser().getStreamer().EmitThumbFunc(Func);
Parser.Lex(); // Consume the identifier token.
- needFuncName = false;
+ return false;
}
}
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(L, "unexpected token in directive");
- // Eat the end of statement and any blank lines that follow.
- while (getLexer().is(AsmToken::EndOfStatement))
- Parser.Lex();
+ NextSymbolIsThumb = true;
- // FIXME: assuming function name will be the line following .thumb_func
- // We really should be checking the next symbol definition even if there's
- // stuff in between.
- if (needFuncName) {
- Name = Parser.getTok().getIdentifier();
- }
-
- // Mark symbol as a thumb symbol.
- MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
- getParser().getStreamer().EmitThumbFunc(Func);
return false;
}
@@ -7807,7 +8012,48 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
/// parseDirectiveEabiAttr
/// ::= .eabi_attribute int, int
bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
- return true;
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ return Error(L, "integer expected");
+ int64_t Tag = Parser.getTok().getIntVal();
+ Parser.Lex(); // eat tag integer
+
+ if (Parser.getTok().isNot(AsmToken::Comma))
+ return Error(L, "comma expected");
+ Parser.Lex(); // skip comma
+
+ L = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ return Error(L, "integer expected");
+ int64_t Value = Parser.getTok().getIntVal();
+ Parser.Lex(); // eat value integer
+
+ getTargetStreamer().emitAttribute(Tag, Value);
+ return false;
+}
+
+/// parseDirectiveCPU
+/// ::= .cpu str
+bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
+ StringRef CPU = getParser().parseStringToEndOfStatement().trim();
+ getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU);
+ return false;
+}
+
+/// parseDirectiveFPU
+/// ::= .fpu str
+bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
+ StringRef FPU = getParser().parseStringToEndOfStatement().trim();
+
+ unsigned ID = StringSwitch<unsigned>(FPU)
+#define ARM_FPU_NAME(NAME, ID) .Case(NAME, ARM::ID)
+#include "ARMFPUName.def"
+ .Default(ARM::INVALID_FPU);
+
+ if (ID == ARM::INVALID_FPU)
+ return Error(L, "Unknown FPU name");
+
+ getTargetStreamer().emitFPU(ID);
+ return false;
}
/// parseDirectiveFnStart
@@ -7820,7 +8066,7 @@ bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
}
FnStartLoc = L;
- getParser().getStreamer().EmitFnStart();
+ getTargetStreamer().emitFnStart();
return false;
}
@@ -7833,8 +8079,7 @@ bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
// Reset the unwind directives parser state
resetUnwindDirectiveParserState();
-
- getParser().getStreamer().EmitFnEnd();
+ getTargetStreamer().emitFnEnd();
return false;
}
@@ -7856,7 +8101,7 @@ bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) {
return true;
}
- getParser().getStreamer().EmitCantUnwind();
+ getTargetStreamer().emitCantUnwind();
return false;
}
@@ -7887,7 +8132,7 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
Parser.Lex();
MCSymbol *PR = getParser().getContext().GetOrCreateSymbol(Name);
- getParser().getStreamer().EmitPersonality(PR);
+ getTargetStreamer().emitPersonality(PR);
return false;
}
@@ -7904,7 +8149,7 @@ bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
return true;
}
- getParser().getStreamer().EmitHandlerData();
+ getTargetStreamer().emitHandlerData();
return false;
}
@@ -7964,9 +8209,8 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
Offset = CE->getValue();
}
- getParser().getStreamer().EmitSetFP(static_cast<unsigned>(NewFPReg),
- static_cast<unsigned>(NewSPReg),
- Offset);
+ getTargetStreamer().emitSetFP(static_cast<unsigned>(NewFPReg),
+ static_cast<unsigned>(NewSPReg), Offset);
return false;
}
@@ -7995,7 +8239,7 @@ bool ARMAsmParser::parseDirectivePad(SMLoc L) {
if (!CE)
return Error(ExLoc, "pad offset must be an immediate");
- getParser().getStreamer().EmitPad(CE->getValue());
+ getTargetStreamer().emitPad(CE->getValue());
return false;
}
@@ -8027,7 +8271,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
if (IsVector && !Op->isDPRRegList())
return Error(L, ".vsave expects DPR registers");
- getParser().getStreamer().EmitRegSave(Op->getRegList(), IsVector);
+ getTargetStreamer().emitRegSave(Op->getRegList(), IsVector);
return false;
}
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 8a06664..9c7988f 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -323,8 +323,6 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
@@ -507,6 +505,14 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
+ result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
Size = 0;
return MCDisassembler::Fail;
}
@@ -823,16 +829,28 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Check(result, AddThumbPredicate(MI));
return result;
}
- }
- MI.clear();
- uint32_t NEONv8Insn = insn32;
- NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
- result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
- Size = 4;
- return result;
+ MI.clear();
+ uint32_t NEONCryptoInsn = insn32;
+ NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
+ NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
+ NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25
+ result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn,
+ Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
+ uint32_t NEONv8Insn = insn32;
+ NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
+ result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
}
MI.clear();
@@ -1185,20 +1203,22 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- bool writebackLoad = false;
- unsigned writebackReg = 0;
+ bool NeedDisjointWriteback = false;
+ unsigned WritebackReg = 0;
switch (Inst.getOpcode()) {
- default:
- break;
- case ARM::LDMIA_UPD:
- case ARM::LDMDB_UPD:
- case ARM::LDMIB_UPD:
- case ARM::LDMDA_UPD:
- case ARM::t2LDMIA_UPD:
- case ARM::t2LDMDB_UPD:
- writebackLoad = true;
- writebackReg = Inst.getOperand(0).getReg();
- break;
+ default:
+ break;
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ NeedDisjointWriteback = true;
+ WritebackReg = Inst.getOperand(0).getReg();
+ break;
}
// Empty register lists are not allowed.
@@ -1208,7 +1228,7 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
return MCDisassembler::Fail;
// Writeback not allowed if Rn is in the target list.
- if (writebackLoad && writebackReg == Inst.end()[-1].getReg())
+ if (NeedDisjointWriteback && WritebackReg == Inst.end()[-1].getReg())
Check(S, MCDisassembler::SoftFail);
}
}
@@ -1343,6 +1363,11 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
break;
}
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ if ((featureBits & ARM::HasV8Ops) && (coproc != 14))
+ return MCDisassembler::Fail;
+
Inst.addOperand(MCOperand::CreateImm(coproc));
Inst.addOperand(MCOperand::CreateImm(CRd));
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -3794,6 +3819,11 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
if (Val == 0xA || Val == 0xB)
return MCDisassembler::Fail;
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ if ((featureBits & ARM::HasV8Ops) && !(Val == 14 || Val == 15))
+ return MCDisassembler::Fail;
+
Inst.addOperand(MCOperand::CreateImm(Val));
return MCDisassembler::Success;
}
@@ -4901,15 +4931,6 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder)
-{
- unsigned Imm = fieldFromInstruction(Insn, 0, 3);
- if (Imm > 4) return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::CreateImm(Imm));
- return MCDisassembler::Success;
-}
-
static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 97da232..f897028 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -76,14 +76,23 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot) {
unsigned Opcode = MI->getOpcode();
+ switch(Opcode) {
+
// Check for HINT instructions w/ canonical names.
- if (Opcode == ARM::HINT || Opcode == ARM::t2HINT) {
+ case ARM::HINT:
+ case ARM::tHINT:
+ case ARM::t2HINT:
switch (MI->getOperand(0).getImm()) {
case 0: O << "\tnop"; break;
case 1: O << "\tyield"; break;
case 2: O << "\twfe"; break;
case 3: O << "\twfi"; break;
case 4: O << "\tsev"; break;
+ case 5:
+ if ((getAvailableFeatures() & ARM::HasV8Ops)) {
+ O << "\tsevl";
+ break;
+ } // Fallthrough for non-v8
default:
// Anything else should just print normally.
printInstruction(MI, O);
@@ -95,10 +104,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
O << ".w";
printAnnotation(O, Annot);
return;
- }
// Check for MOVs and print canonical forms, instead.
- if (Opcode == ARM::MOVsr) {
+ case ARM::MOVsr: {
// FIXME: Thumb variants?
const MCOperand &Dst = MI->getOperand(0);
const MCOperand &MO1 = MI->getOperand(1);
@@ -121,7 +129,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- if (Opcode == ARM::MOVsi) {
+ case ARM::MOVsi: {
// FIXME: Thumb variants?
const MCOperand &Dst = MI->getOperand(0);
const MCOperand &MO1 = MI->getOperand(1);
@@ -149,81 +157,91 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
-
// A8.6.123 PUSH
- if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP &&
- MI->getNumOperands() > 5) {
- // Should only print PUSH if there are at least two registers in the list.
- O << '\t' << "push";
- printPredicateOperand(MI, 2, O);
- if (Opcode == ARM::t2STMDB_UPD)
- O << ".w";
- O << '\t';
- printRegisterList(MI, 4, O);
- printAnnotation(O, Annot);
- return;
- }
- if (Opcode == ARM::STR_PRE_IMM && MI->getOperand(2).getReg() == ARM::SP &&
- MI->getOperand(3).getImm() == -4) {
- O << '\t' << "push";
- printPredicateOperand(MI, 4, O);
- O << "\t{";
- printRegName(O, MI->getOperand(1).getReg());
- O << "}";
- printAnnotation(O, Annot);
- return;
- }
+ case ARM::STMDB_UPD:
+ case ARM::t2STMDB_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
+ // Should only print PUSH if there are at least two registers in the list.
+ O << '\t' << "push";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2STMDB_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
+
+ case ARM::STR_PRE_IMM:
+ if (MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(3).getImm() == -4) {
+ O << '\t' << "push";
+ printPredicateOperand(MI, 4, O);
+ O << "\t{";
+ printRegName(O, MI->getOperand(1).getReg());
+ O << "}";
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
// A8.6.122 POP
- if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP &&
- MI->getNumOperands() > 5) {
- // Should only print POP if there are at least two registers in the list.
- O << '\t' << "pop";
- printPredicateOperand(MI, 2, O);
- if (Opcode == ARM::t2LDMIA_UPD)
- O << ".w";
- O << '\t';
- printRegisterList(MI, 4, O);
- printAnnotation(O, Annot);
- return;
- }
- if (Opcode == ARM::LDR_POST_IMM && MI->getOperand(2).getReg() == ARM::SP &&
- MI->getOperand(4).getImm() == 4) {
- O << '\t' << "pop";
- printPredicateOperand(MI, 5, O);
- O << "\t{";
- printRegName(O, MI->getOperand(0).getReg());
- O << "}";
- printAnnotation(O, Annot);
- return;
- }
-
+ case ARM::LDMIA_UPD:
+ case ARM::t2LDMIA_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
+ // Should only print POP if there are at least two registers in the list.
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2LDMIA_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
+
+ case ARM::LDR_POST_IMM:
+ if (MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(4).getImm() == 4) {
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 5, O);
+ O << "\t{";
+ printRegName(O, MI->getOperand(0).getReg());
+ O << "}";
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
// A8.6.355 VPUSH
- if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP) {
- O << '\t' << "vpush";
- printPredicateOperand(MI, 2, O);
- O << '\t';
- printRegisterList(MI, 4, O);
- printAnnotation(O, Annot);
- return;
- }
+ case ARM::VSTMSDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpush";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
// A8.6.354 VPOP
- if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP) {
- O << '\t' << "vpop";
- printPredicateOperand(MI, 2, O);
- O << '\t';
- printRegisterList(MI, 4, O);
- printAnnotation(O, Annot);
- return;
- }
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMDIA_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpop";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
- if (Opcode == ARM::tLDMIA) {
+ case ARM::tLDMIA: {
bool Writeback = true;
unsigned BaseReg = MI->getOperand(0).getReg();
for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
@@ -249,9 +267,10 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// GPRs. However, when decoding them, the two GRPs cannot be automatically
// expressed as a GPRPair, so we have to manually merge them.
// FIXME: We would really like to be able to tablegen'erate this.
- if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD) {
+ case ARM::LDREXD: case ARM::STREXD:
+ case ARM::LDAEXD: case ARM::STLEXD:
const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
- bool isStore = Opcode == ARM::STREXD;
+ bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD;
unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
if (MRC.contains(Reg)) {
MCInst NewMI;
@@ -676,7 +695,7 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned val = MI->getOperand(OpNum).getImm();
- O << ARM_MB::MemBOptToString(val);
+ O << ARM_MB::MemBOptToString(val, (getAvailableFeatures() & ARM::HasV8Ops));
}
void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index b1e25d8..5615b80 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -25,9 +25,9 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -640,16 +640,16 @@ public:
// FIXME: This should be in a separate file.
class DarwinARMAsmBackend : public ARMAsmBackend {
public:
- const object::mach::CPUSubtypeARM Subtype;
+ const MachO::CPUSubTypeARM Subtype;
DarwinARMAsmBackend(const Target &T, const StringRef TT,
- object::mach::CPUSubtypeARM st)
+ MachO::CPUSubTypeARM st)
: ARMAsmBackend(T, TT), Subtype(st) {
HasDataInCodeSupport = true;
}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
- object::mach::CTM_ARM,
+ MachO::CPU_TYPE_ARM,
Subtype);
}
@@ -660,22 +660,24 @@ public:
} // end anonymous namespace
-MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin()) {
- object::mach::CPUSubtypeARM CS =
- StringSwitch<object::mach::CPUSubtypeARM>(TheTriple.getArchName())
- .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T)
- .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ)
- .Cases("armv6", "thumbv6", object::mach::CSARM_V6)
- .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M)
- .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM)
- .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F)
- .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K)
- .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M)
- .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S)
- .Default(object::mach::CSARM_V7);
+ MachO::CPUSubTypeARM CS =
+ StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
+ .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
+ .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
+ .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
+ .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
+ .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
+ .Cases("armv7f", "thumbv7f", MachO::CPU_SUBTYPE_ARM_V7F)
+ .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
+ .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
+ .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
+ .Default(MachO::CPU_SUBTYPE_ARM_V7);
return new DarwinARMAsmBackend(T, TT, CS);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ff9917d..af939fc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -121,41 +121,41 @@ namespace ARM_MB {
// the option field for memory barrier operations.
enum MemBOpt {
RESERVED_0 = 0,
- RESERVED_1 = 1,
+ OSHLD = 1,
OSHST = 2,
OSH = 3,
RESERVED_4 = 4,
- RESERVED_5 = 5,
+ NSHLD = 5,
NSHST = 6,
NSH = 7,
RESERVED_8 = 8,
- RESERVED_9 = 9,
+ ISHLD = 9,
ISHST = 10,
ISH = 11,
RESERVED_12 = 12,
- RESERVED_13 = 13,
+ LD = 13,
ST = 14,
SY = 15
};
- inline static const char *MemBOptToString(unsigned val) {
+ inline static const char *MemBOptToString(unsigned val, bool HasV8) {
switch (val) {
default: llvm_unreachable("Unknown memory operation");
case SY: return "sy";
case ST: return "st";
- case RESERVED_13: return "#0xd";
+ case LD: return HasV8 ? "ld" : "#0xd";
case RESERVED_12: return "#0xc";
case ISH: return "ish";
case ISHST: return "ishst";
- case RESERVED_9: return "#0x9";
+ case ISHLD: return HasV8 ? "ishld" : "#0x9";
case RESERVED_8: return "#0x8";
case NSH: return "nsh";
case NSHST: return "nshst";
- case RESERVED_5: return "#0x5";
+ case NSHLD: return HasV8 ? "nshld" : "#0x5";
case RESERVED_4: return "#0x4";
case OSH: return "osh";
case OSHST: return "oshst";
- case RESERVED_1: return "#0x1";
+ case OSHLD: return HasV8 ? "oshld" : "#0x1";
case RESERVED_0: return "#0x0";
}
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6b98205..471897d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -13,6 +13,8 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMBuildAttrs.h"
+#include "ARMFPUName.h"
#include "ARMRegisterInfo.h"
#include "ARMUnwindOp.h"
#include "ARMUnwindOpAsm.h"
@@ -27,6 +29,7 @@
#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
@@ -36,7 +39,9 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
using namespace llvm;
@@ -45,8 +50,218 @@ static std::string GetAEABIUnwindPersonalityName(unsigned Index) {
return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str();
}
+static const char *GetFPUName(unsigned ID) {
+ switch (ID) {
+ default:
+ llvm_unreachable("Unknown FPU kind");
+ break;
+#define ARM_FPU_NAME(NAME, ID) case ARM::ID: return NAME;
+#include "ARMFPUName.def"
+ }
+ return NULL;
+}
+
namespace {
+class ARMELFStreamer;
+
+class ARMTargetAsmStreamer : public ARMTargetStreamer {
+ formatted_raw_ostream &OS;
+ MCInstPrinter &InstPrinter;
+
+ virtual void emitFnStart();
+ virtual void emitFnEnd();
+ virtual void emitCantUnwind();
+ virtual void emitPersonality(const MCSymbol *Personality);
+ virtual void emitHandlerData();
+ virtual void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+ virtual void emitPad(int64_t Offset);
+ virtual void emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector);
+
+ virtual void switchVendor(StringRef Vendor);
+ virtual void emitAttribute(unsigned Attribute, unsigned Value);
+ virtual void emitTextAttribute(unsigned Attribute, StringRef String);
+ virtual void emitFPU(unsigned FPU);
+ virtual void finishAttributeSection();
+
+public:
+ ARMTargetAsmStreamer(formatted_raw_ostream &OS, MCInstPrinter &InstPrinter);
+};
+
+ARMTargetAsmStreamer::ARMTargetAsmStreamer(formatted_raw_ostream &OS,
+ MCInstPrinter &InstPrinter)
+ : OS(OS), InstPrinter(InstPrinter) {}
+void ARMTargetAsmStreamer::emitFnStart() { OS << "\t.fnstart\n"; }
+void ARMTargetAsmStreamer::emitFnEnd() { OS << "\t.fnend\n"; }
+void ARMTargetAsmStreamer::emitCantUnwind() { OS << "\t.cantunwind\n"; }
+void ARMTargetAsmStreamer::emitPersonality(const MCSymbol *Personality) {
+ OS << "\t.personality " << Personality->getName() << '\n';
+}
+void ARMTargetAsmStreamer::emitHandlerData() { OS << "\t.handlerdata\n"; }
+void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
+ int64_t Offset) {
+ OS << "\t.setfp\t";
+ InstPrinter.printRegName(OS, FpReg);
+ OS << ", ";
+ InstPrinter.printRegName(OS, SpReg);
+ if (Offset)
+ OS << ", #" << Offset;
+ OS << '\n';
+}
+void ARMTargetAsmStreamer::emitPad(int64_t Offset) {
+ OS << "\t.pad\t#" << Offset << '\n';
+}
+void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {
+ assert(RegList.size() && "RegList should not be empty");
+ if (isVector)
+ OS << "\t.vsave\t{";
+ else
+ OS << "\t.save\t{";
+
+ InstPrinter.printRegName(OS, RegList[0]);
+
+ for (unsigned i = 1, e = RegList.size(); i != e; ++i) {
+ OS << ", ";
+ InstPrinter.printRegName(OS, RegList[i]);
+ }
+
+ OS << "}\n";
+}
+void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {
+}
+void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
+ OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value) << "\n";
+}
+void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef String) {
+ switch (Attribute) {
+ default: llvm_unreachable("Unsupported Text attribute in ASM Mode");
+ case ARMBuildAttrs::CPU_name:
+ OS << "\t.cpu\t" << String.lower() << "\n";
+ break;
+ }
+}
+void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
+ OS << "\t.fpu\t" << GetFPUName(FPU) << "\n";
+}
+void ARMTargetAsmStreamer::finishAttributeSection() {
+}
+
+class ARMTargetELFStreamer : public ARMTargetStreamer {
+private:
+ // This structure holds all attributes, accounting for
+ // their string/numeric value, so we can later emmit them
+ // in declaration order, keeping all in the same vector
+ struct AttributeItem {
+ enum {
+ HiddenAttribute = 0,
+ NumericAttribute,
+ TextAttribute
+ } Type;
+ unsigned Tag;
+ unsigned IntValue;
+ StringRef StringValue;
+
+ static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) {
+ return (LHS.Tag < RHS.Tag);
+ }
+ };
+
+ StringRef CurrentVendor;
+ unsigned FPU;
+ SmallVector<AttributeItem, 64> Contents;
+
+ const MCSection *AttributeSection;
+
+ // FIXME: this should be in a more generic place, but
+ // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf
+ static size_t getULEBSize(int Value) {
+ size_t Size = 0;
+ do {
+ Value >>= 7;
+ Size += sizeof(int8_t); // Is this really necessary?
+ } while (Value);
+ return Size;
+ }
+
+ AttributeItem *getAttributeItem(unsigned Attribute) {
+ for (size_t i = 0; i < Contents.size(); ++i)
+ if (Contents[i].Tag == Attribute)
+ return &Contents[i];
+ return 0;
+ }
+
+ void setAttributeItem(unsigned Attribute, unsigned Value,
+ bool OverwriteExisting) {
+ // Look for existing attribute item
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->IntValue = Value;
+ return;
+ }
+
+ // Create new attribute item
+ AttributeItem Item = {
+ AttributeItem::NumericAttribute,
+ Attribute,
+ Value,
+ StringRef("")
+ };
+ Contents.push_back(Item);
+ }
+
+ void setAttributeItem(unsigned Attribute, StringRef Value,
+ bool OverwriteExisting) {
+ // Look for existing attribute item
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->StringValue = Value;
+ return;
+ }
+
+ // Create new attribute item
+ AttributeItem Item = {
+ AttributeItem::TextAttribute,
+ Attribute,
+ 0,
+ Value
+ };
+ Contents.push_back(Item);
+ }
+
+ void emitFPUDefaultAttributes();
+
+ ARMELFStreamer &getStreamer();
+
+ virtual void emitFnStart();
+ virtual void emitFnEnd();
+ virtual void emitCantUnwind();
+ virtual void emitPersonality(const MCSymbol *Personality);
+ virtual void emitHandlerData();
+ virtual void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+ virtual void emitPad(int64_t Offset);
+ virtual void emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector);
+
+ virtual void switchVendor(StringRef Vendor);
+ virtual void emitAttribute(unsigned Attribute, unsigned Value);
+ virtual void emitTextAttribute(unsigned Attribute, StringRef String);
+ virtual void emitFPU(unsigned FPU);
+ virtual void finishAttributeSection();
+
+ size_t calculateContentSize() const;
+
+public:
+ ARMTargetELFStreamer()
+ : ARMTargetStreamer(), CurrentVendor("aeabi"), FPU(ARM::INVALID_FPU),
+ AttributeSection(0) {
+ }
+};
+
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
/// the appropriate points in the object files. These symbols are defined in the
/// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf.
@@ -61,27 +276,29 @@ namespace {
/// by MachO. Beware!
class ARMELFStreamer : public MCELFStreamer {
public:
- ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
- MCCodeEmitter *Emitter, bool IsThumb)
- : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
+ friend class ARMTargetELFStreamer;
+
+ ARMELFStreamer(MCContext &Context, MCTargetStreamer *TargetStreamer,
+ MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool IsThumb)
+ : MCELFStreamer(Context, TargetStreamer, TAB, OS, Emitter),
IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) {
Reset();
}
~ARMELFStreamer() {}
+ virtual void FinishImpl();
+
// ARM exception handling directives
- virtual void EmitFnStart();
- virtual void EmitFnEnd();
- virtual void EmitCantUnwind();
- virtual void EmitPersonality(const MCSymbol *Per);
- virtual void EmitHandlerData();
- virtual void EmitSetFP(unsigned NewFpReg,
- unsigned NewSpReg,
- int64_t Offset = 0);
- virtual void EmitPad(int64_t Offset);
- virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
- bool isVector);
+ void emitFnStart();
+ void emitFnEnd();
+ void emitCantUnwind();
+ void emitPersonality(const MCSymbol *Per);
+ void emitHandlerData();
+ void emitSetFP(unsigned NewFpReg, unsigned NewSpReg, int64_t Offset = 0);
+ void emitPad(int64_t Offset);
+ void emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector);
virtual void ChangeSection(const MCSection *Section,
const MCExpr *Subsection) {
@@ -141,10 +358,6 @@ public:
}
}
- static bool classof(const MCStreamer *S) {
- return S->getKind() == SK_ARMELFStreamer;
- }
-
private:
enum ElfMappingSymbol {
EMS_None,
@@ -183,7 +396,7 @@ private:
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
- Symbol->setSection(*getCurrentSection().first);
+ AssignSection(Symbol, getCurrentSection().first);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
@@ -232,6 +445,224 @@ private:
};
} // end anonymous namespace
+ARMELFStreamer &ARMTargetELFStreamer::getStreamer() {
+ ARMELFStreamer *S = static_cast<ARMELFStreamer *>(Streamer);
+ return *S;
+}
+
+void ARMTargetELFStreamer::emitFnStart() { getStreamer().emitFnStart(); }
+void ARMTargetELFStreamer::emitFnEnd() { getStreamer().emitFnEnd(); }
+void ARMTargetELFStreamer::emitCantUnwind() { getStreamer().emitCantUnwind(); }
+void ARMTargetELFStreamer::emitPersonality(const MCSymbol *Personality) {
+ getStreamer().emitPersonality(Personality);
+}
+void ARMTargetELFStreamer::emitHandlerData() {
+ getStreamer().emitHandlerData();
+}
+void ARMTargetELFStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
+ int64_t Offset) {
+ getStreamer().emitSetFP(FpReg, SpReg, Offset);
+}
+void ARMTargetELFStreamer::emitPad(int64_t Offset) {
+ getStreamer().emitPad(Offset);
+}
+void ARMTargetELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {
+ getStreamer().emitRegSave(RegList, isVector);
+}
+void ARMTargetELFStreamer::switchVendor(StringRef Vendor) {
+ assert(!Vendor.empty() && "Vendor cannot be empty.");
+
+ if (CurrentVendor == Vendor)
+ return;
+
+ if (!CurrentVendor.empty())
+ finishAttributeSection();
+
+ assert(Contents.empty() &&
+ ".ARM.attributes should be flushed before changing vendor");
+ CurrentVendor = Vendor;
+
+}
+void ARMTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
+ setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
+}
+void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef Value) {
+ setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
+}
+void ARMTargetELFStreamer::emitFPU(unsigned Value) {
+ FPU = Value;
+}
+void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
+ switch (FPU) {
+ case ARM::VFP:
+ case ARM::VFPV2:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv2,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::VFPV3:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3A,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::VFPV3_D16:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3B,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::VFPV4:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4A,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::VFPV4_D16:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4B,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FP_ARMV8:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPARMv8A,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::NEON:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeon,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::NEON_VFPV4:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeon2,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::NEON_FP_ARMV8:
+ case ARM::CRYPTO_NEON_FP_ARMV8:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPARMv8A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeonARMv8,
+ /* OverwriteExisting= */ false);
+ break;
+
+ default:
+ report_fatal_error("Unknown FPU: " + Twine(FPU));
+ break;
+ }
+}
+size_t ARMTargetELFStreamer::calculateContentSize() const {
+ size_t Result = 0;
+ for (size_t i = 0; i < Contents.size(); ++i) {
+ AttributeItem item = Contents[i];
+ switch (item.Type) {
+ case AttributeItem::HiddenAttribute:
+ break;
+ case AttributeItem::NumericAttribute:
+ Result += getULEBSize(item.Tag);
+ Result += getULEBSize(item.IntValue);
+ break;
+ case AttributeItem::TextAttribute:
+ Result += getULEBSize(item.Tag);
+ Result += item.StringValue.size() + 1; // string + '\0'
+ break;
+ }
+ }
+ return Result;
+}
+void ARMTargetELFStreamer::finishAttributeSection() {
+ // <format-version>
+ // [ <section-length> "vendor-name"
+ // [ <file-tag> <size> <attribute>*
+ // | <section-tag> <size> <section-number>* 0 <attribute>*
+ // | <symbol-tag> <size> <symbol-number>* 0 <attribute>*
+ // ]+
+ // ]*
+
+ if (FPU != ARM::INVALID_FPU)
+ emitFPUDefaultAttributes();
+
+ if (Contents.empty())
+ return;
+
+ std::sort(Contents.begin(), Contents.end(), AttributeItem::LessTag);
+
+ ARMELFStreamer &Streamer = getStreamer();
+
+ // Switch to .ARM.attributes section
+ if (AttributeSection) {
+ Streamer.SwitchSection(AttributeSection);
+ } else {
+ AttributeSection =
+ Streamer.getContext().getELFSection(".ARM.attributes",
+ ELF::SHT_ARM_ATTRIBUTES,
+ 0,
+ SectionKind::getMetadata());
+ Streamer.SwitchSection(AttributeSection);
+
+ // Format version
+ Streamer.EmitIntValue(0x41, 1);
+ }
+
+ // Vendor size + Vendor name + '\0'
+ const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
+
+ // Tag + Tag Size
+ const size_t TagHeaderSize = 1 + 4;
+
+ const size_t ContentsSize = calculateContentSize();
+
+ Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
+ Streamer.EmitBytes(CurrentVendor);
+ Streamer.EmitIntValue(0, 1); // '\0'
+
+ Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
+ Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+
+ // Size should have been accounted for already, now
+ // emit each field as its type (ULEB or String)
+ for (size_t i = 0; i < Contents.size(); ++i) {
+ AttributeItem item = Contents[i];
+ Streamer.EmitULEB128IntValue(item.Tag);
+ switch (item.Type) {
+ default: llvm_unreachable("Invalid attribute type");
+ case AttributeItem::NumericAttribute:
+ Streamer.EmitULEB128IntValue(item.IntValue);
+ break;
+ case AttributeItem::TextAttribute:
+ Streamer.EmitBytes(item.StringValue.upper());
+ Streamer.EmitIntValue(0, 1); // '\0'
+ break;
+ }
+ }
+
+ Contents.clear();
+ FPU = ARM::INVALID_FPU;
+}
+
+void ARMELFStreamer::FinishImpl() {
+ MCTargetStreamer &TS = getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+ ATS.finishAttributeSection();
+
+ MCELFStreamer::FinishImpl();
+}
+
inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
unsigned Type,
unsigned Flags,
@@ -295,29 +726,13 @@ void ARMELFStreamer::Reset() {
UnwindOpAsm.Reset();
}
-// Add the R_ARM_NONE fixup at the same position
-void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
- const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name);
-
- const MCSymbolRefExpr *PersonalityRef =
- MCSymbolRefExpr::Create(PersonalitySym,
- MCSymbolRefExpr::VK_ARM_NONE,
- getContext());
-
- AddValueSymbols(PersonalityRef);
- MCDataFragment *DF = getOrCreateDataFragment();
- DF->getFixups().push_back(
- MCFixup::Create(DF->getContents().size(), PersonalityRef,
- MCFixup::getKindForSize(4, false)));
-}
-
-void ARMELFStreamer::EmitFnStart() {
+void ARMELFStreamer::emitFnStart() {
assert(FnStart == 0);
FnStart = getContext().CreateTempSymbol();
EmitLabel(FnStart);
}
-void ARMELFStreamer::EmitFnEnd() {
+void ARMELFStreamer::emitFnEnd() {
assert(FnStart && ".fnstart must preceeds .fnend");
// Emit unwind opcodes if there is no .handlerdata directive
@@ -365,8 +780,20 @@ void ARMELFStreamer::EmitFnEnd() {
Reset();
}
-void ARMELFStreamer::EmitCantUnwind() {
- CantUnwind = true;
+void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; }
+
+// Add the R_ARM_NONE fixup at the same position
+void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
+ const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name);
+
+ const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create(
+ PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext());
+
+ AddValueSymbols(PersonalityRef);
+ MCDataFragment *DF = getOrCreateDataFragment();
+ DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(),
+ PersonalityRef,
+ MCFixup::getKindForSize(4, false)));
}
void ARMELFStreamer::FlushPendingOffset() {
@@ -429,17 +856,14 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
EmitIntValue(0, 4);
}
-void ARMELFStreamer::EmitHandlerData() {
- FlushUnwindOpcodes(false);
-}
+void ARMELFStreamer::emitHandlerData() { FlushUnwindOpcodes(false); }
-void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
+void ARMELFStreamer::emitPersonality(const MCSymbol *Per) {
Personality = Per;
UnwindOpAsm.setPersonality(Per);
}
-void ARMELFStreamer::EmitSetFP(unsigned NewFPReg,
- unsigned NewSPReg,
+void ARMELFStreamer::emitSetFP(unsigned NewFPReg, unsigned NewSPReg,
int64_t Offset) {
assert((NewSPReg == ARM::SP || NewSPReg == FPReg) &&
"the operand of .setfp directive should be either $sp or $fp");
@@ -453,7 +877,7 @@ void ARMELFStreamer::EmitSetFP(unsigned NewFPReg,
FPOffset += Offset;
}
-void ARMELFStreamer::EmitPad(int64_t Offset) {
+void ARMELFStreamer::emitPad(int64_t Offset) {
// Track the change of the $sp offset
SPOffset -= Offset;
@@ -462,7 +886,7 @@ void ARMELFStreamer::EmitPad(int64_t Offset) {
PendingOffset -= Offset;
}
-void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+void ARMELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool IsVector) {
// Collect the registers in the register list
unsigned Count = 0;
@@ -493,11 +917,31 @@ void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
}
namespace llvm {
+
+MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst) {
+ ARMTargetAsmStreamer *S = new ARMTargetAsmStreamer(OS, *InstPrint);
+
+ return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI,
+ useDwarfDirectory, InstPrint, CE, TAB,
+ ShowInst);
+}
+
MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll, bool NoExecStack,
bool IsThumb) {
- ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
+ ARMTargetELFStreamer *TS = new ARMTargetELFStreamer();
+ ARMELFStreamer *S =
+ new ARMELFStreamer(Context, TS, TAB, OS, Emitter, IsThumb);
+ // FIXME: This should eventually end up somewhere else where more
+ // intelligent flag decisions can be made. For now we are just maintaining
+ // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
+ S->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
if (NoExecStack)
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
deleted file mode 100644
index 77ae5d2..0000000
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- ARMELFStreamer.h - ELF Streamer for ARM ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF streamer information for the ARM backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM_ELF_STREAMER_H
-#define ARM_ELF_STREAMER_H
-
-#include "llvm/MC/MCELFStreamer.h"
-
-namespace llvm {
-
- MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack,
- bool IsThumb);
-}
-
-#endif // ARM_ELF_STREAMER_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index c1aab9c..ad796e6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -49,8 +49,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
- WeakRefDirective = "\t.weak\t";
-
HasLEB128 = true;
SupportsDebugInformation = true;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index f0b289c..e1f716d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -15,6 +15,7 @@
#define LLVM_ARMTARGETASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
@@ -24,7 +25,7 @@ namespace llvm {
explicit ARMMCAsmInfoDarwin();
};
- class ARMELFMCAsmInfo : public MCAsmInfo {
+ class ARMELFMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit ARMELFMCAsmInfo();
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index a18d465..4382d0d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -58,8 +58,7 @@ public:
}
bool isTargetDarwin() const {
Triple TT(STI.getTargetTriple());
- Triple::OSType OS = TT.getOS();
- return OS == Triple::Darwin || OS == Triple::MacOSX || OS == Triple::IOS;
+ return TT.isOSDarwin();
}
unsigned getMachineSoImmOpValue(unsigned SoImm) const;
@@ -638,8 +637,14 @@ getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- unsigned Val =
- ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+ unsigned Val = 0;
+ const MCOperand MO = MI.getOperand(OpIdx);
+
+ if(MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+ else
+ Val = MO.getImm() >> 1;
+
bool I = (Val & 0x800000);
bool J1 = (Val & 0x400000);
bool J2 = (Val & 0x200000);
@@ -665,7 +670,7 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
if (MO.isExpr())
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
Fixups);
- int32_t offset = MO.getImm();
+ int64_t offset = MO.getImm();
uint32_t Val = 0x2000;
int SoImmVal;
@@ -772,8 +777,10 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
} else {
Reg = ARM::PC;
int32_t Offset = MO.getImm();
- // FIXME: Handle #-0.
- if (Offset < 0) {
+ if (Offset == INT32_MIN) {
+ Offset = 0;
+ isAdd = false;
+ } else if (Offset < 0) {
Offset *= -1;
isAdd = false;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index caa1949..a99de0e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -12,30 +12,73 @@
//===----------------------------------------------------------------------===//
#include "ARMBaseInfo.h"
-#include "ARMELFStreamer.h"
#include "ARMMCAsmInfo.h"
#include "ARMMCTargetDesc.h"
#include "InstPrinter/ARMInstPrinter.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_REGINFO_MC_DESC
#include "ARMGenRegisterInfo.inc"
+static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+ std::string &Info) {
+ if (STI.getFeatureBits() & llvm::ARM::HasV7Ops &&
+ (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) &&
+ (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) &&
+ // Checks for the deprecated CP15ISB encoding:
+ // mcr p15, #0, rX, c7, c5, #4
+ (MI.getOperand(3).isImm() && MI.getOperand(3).getImm() == 7)) {
+ if ((MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 4)) {
+ if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 5) {
+ Info = "deprecated since v7, use 'isb'";
+ return true;
+ }
+
+ // Checks for the deprecated CP15DSB encoding:
+ // mcr p15, #0, rX, c7, c10, #4
+ if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10) {
+ Info = "deprecated since v7, use 'dsb'";
+ return true;
+ }
+ }
+ // Checks for the deprecated CP15DMB encoding:
+ // mcr p15, #0, rX, c7, c10, #5
+ if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10 &&
+ (MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 5)) {
+ Info = "deprecated since v7, use 'dmb'";
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+ std::string &Info) {
+ if (STI.getFeatureBits() & llvm::ARM::HasV8Ops &&
+ MI.getOperand(1).isImm() && MI.getOperand(1).getImm() != 8) {
+ Info = "applying IT instruction to more than one subsequent instruction is deprecated";
+ return true;
+ }
+
+ return false;
+}
+
#define GET_INSTRINFO_MC_DESC
#include "ARMGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
#include "ARMGenSubtargetInfo.inc"
-using namespace llvm;
std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
Triple triple(TT);
@@ -60,8 +103,13 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
if (Idx) {
unsigned SubVer = TT[Idx];
if (SubVer == '8') {
- // FIXME: Parse v8 features
- ARMArchFeature = "+v8";
+ if (NoCPU)
+ // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, FeatureMP,
+ // FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, FeatureT2XtPk, FeatureCrypto, FeatureCRC
+ ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,+trustzone,+t2xtpk,+crypto,+crc";
+ else
+ // Use CPU to figure out the exact features
+ ARMArchFeature = "+v8";
} else if (SubVer == '7') {
if (Len >= Idx+2 && TT[Idx+1] == 'm') {
isThumb = true;
@@ -106,7 +154,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
isThumb = true;
if (NoCPU)
// v6m: FeatureNoARM, FeatureMClass
- ARMArchFeature = "+v6,+noarm,+mclass";
+ ARMArchFeature = "+v6m,+noarm,+mclass";
else
ARMArchFeature = "+v6";
} else
@@ -307,6 +355,10 @@ extern "C" void LLVMInitializeARMTargetMC() {
TargetRegistry::RegisterMCObjectStreamer(TheARMTarget, createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(TheThumbTarget, createMCStreamer);
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmStreamer(TheARMTarget, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheThumbTarget, createMCAsmStreamer);
+
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 4e94c53..959be8b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -18,13 +18,16 @@
#include <string>
namespace llvm {
+class formatted_raw_ostream;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
+class MCInstPrinter;
class MCObjectWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
+class MCStreamer;
class MCRelocationInfo;
class StringRef;
class Target;
@@ -42,12 +45,19 @@ namespace ARM_MC {
StringRef FS);
}
+MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst);
+
MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU);
+MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index b9efe74..1f681ba 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -20,10 +20,9 @@
#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
using namespace llvm;
-using namespace llvm::object;
namespace {
class ARMMachObjectWriter : public MCMachObjectTargetWriter {
@@ -63,7 +62,7 @@ public:
static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
unsigned &Log2Size) {
- RelocType = unsigned(macho::RIT_Vanilla);
+ RelocType = unsigned(MachO::ARM_RELOC_VANILLA);
Log2Size = ~0U;
switch (Kind) {
@@ -92,21 +91,21 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
case ARM::fixup_arm_uncondbl:
case ARM::fixup_arm_condbl:
case ARM::fixup_arm_blx:
- RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+ RelocType = unsigned(MachO::ARM_RELOC_BR24);
// Report as 'long', even though that is not quite accurate.
Log2Size = llvm::Log2_32(4);
return true;
// Handle Thumb branches.
case ARM::fixup_arm_thumb_br:
- RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22);
Log2Size = llvm::Log2_32(2);
return true;
case ARM::fixup_t2_uncondbranch:
case ARM::fixup_arm_thumb_bl:
case ARM::fixup_arm_thumb_blx:
- RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22);
Log2Size = llvm::Log2_32(4);
return true;
@@ -121,23 +120,23 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
// 1 - thumb instructions
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_Half);
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
Log2Size = 1;
return true;
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_Half);
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
Log2Size = 3;
return true;
case ARM::fixup_arm_movw_lo16:
case ARM::fixup_arm_movw_lo16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_Half);
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
Log2Size = 0;
return true;
case ARM::fixup_t2_movw_lo16:
case ARM::fixup_t2_movw_lo16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_Half);
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
Log2Size = 2;
return true;
}
@@ -153,7 +152,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
- unsigned Type = macho::RIT_ARM_Half;
+ unsigned Type = MachO::ARM_RELOC_HALF;
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -179,7 +178,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
"' can not be undefined in a subtraction expression");
// Select the appropriate difference relocation type.
- Type = macho::RIT_ARM_HalfDifference;
+ Type = MachO::ARM_RELOC_HALF_SECTDIFF;
Value2 = Writer->getSymbolAddress(B_SD, Layout);
FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
}
@@ -223,29 +222,29 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
break;
}
- if (Type == macho::RIT_ARM_HalfDifference) {
+ if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
uint32_t OtherHalf = MovtBit
? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
- macho::RelocationEntry MRE;
- MRE.Word0 = ((OtherHalf << 0) |
- (macho::RIT_Pair << 24) |
- (MovtBit << 28) |
- (ThumbBit << 29) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value2;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((OtherHalf << 0) |
+ (MachO::ARM_RELOC_PAIR << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
Writer->addRelocation(Fragment->getParent(), MRE);
}
- macho::RelocationEntry MRE;
- MRE.Word0 = ((FixupOffset << 0) |
- (Type << 24) |
- (MovtBit << 28) |
- (ThumbBit << 29) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value;
Writer->addRelocation(Fragment->getParent(), MRE);
}
@@ -259,7 +258,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
- unsigned Type = macho::RIT_Vanilla;
+ unsigned Type = MachO::ARM_RELOC_VANILLA;
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -284,31 +283,31 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
"' can not be undefined in a subtraction expression");
// Select the appropriate difference relocation type.
- Type = macho::RIT_Difference;
+ Type = MachO::ARM_RELOC_SECTDIFF;
Value2 = Writer->getSymbolAddress(B_SD, Layout);
FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
}
// Relocations are written out in reverse order, so the PAIR comes first.
- if (Type == macho::RIT_Difference ||
- Type == macho::RIT_Generic_LocalDifference) {
- macho::RelocationEntry MRE;
- MRE.Word0 = ((0 << 0) |
- (macho::RIT_Pair << 24) |
- (Log2Size << 28) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value2;
+ if (Type == MachO::ARM_RELOC_SECTDIFF ||
+ Type == MachO::ARM_RELOC_LOCAL_SECTDIFF) {
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((0 << 0) |
+ (MachO::ARM_RELOC_PAIR << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
Writer->addRelocation(Fragment->getParent(), MRE);
}
- macho::RelocationEntry MRE;
- MRE.Word0 = ((FixupOffset << 0) |
- (Type << 24) |
- (Log2Size << 28) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value;
Writer->addRelocation(Fragment->getParent(), MRE);
}
@@ -326,13 +325,13 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
switch (RelocType) {
default:
return false;
- case macho::RIT_ARM_Branch24Bit:
+ case MachO::ARM_RELOC_BR24:
// PC pre-adjustment of 8 for these instructions.
Value -= 8;
// ARM BL/BLX has a 25-bit offset.
Range = 0x1ffffff;
break;
- case macho::RIT_ARM_ThumbBranch22Bit:
+ case MachO::ARM_THUMB_RELOC_BR22:
// PC pre-adjustment of 4 for these instructions.
Value -= 4;
// Thumb BL/BLX has a 24-bit offset.
@@ -361,7 +360,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
uint64_t &FixedValue) {
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Log2Size;
- unsigned RelocType = macho::RIT_Vanilla;
+ unsigned RelocType = MachO::ARM_RELOC_VANILLA;
if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size))
// If we failed to get fixup kind info, it's because there's no legal
// relocation type for the fixup kind. This happens when it's a fixup that's
@@ -374,7 +373,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// scattered relocation entry. Differences always require scattered
// relocations.
if (Target.getSymB()) {
- if (RelocType == macho::RIT_ARM_Half)
+ if (RelocType == MachO::ARM_RELOC_HALF)
return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
Fixup, Target, FixedValue);
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
@@ -392,7 +391,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
//
// Is this right for ARM?
uint32_t Offset = Target.getConstant();
- if (IsPCRel && RelocType == macho::RIT_Vanilla)
+ if (IsPCRel && RelocType == MachO::ARM_RELOC_VANILLA)
Offset += 1 << Log2Size;
if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
@@ -445,17 +444,17 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
}
// struct relocation_info (8 bytes)
- macho::RelocationEntry MRE;
- MRE.Word0 = FixupOffset;
- MRE.Word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = FixupOffset;
+ MRE.r_word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
// Even when it's not a scattered relocation, movw/movt always uses
// a PAIR relocation.
- if (Type == macho::RIT_ARM_Half) {
+ if (Type == MachO::ARM_RELOC_HALF) {
// The other-half value only gets populated for the movt and movw
// relocation entries.
uint32_t Value = 0;
@@ -474,11 +473,11 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
Value = FixedValue & 0xffff;
break;
}
- macho::RelocationEntry MREPair;
- MREPair.Word0 = Value;
- MREPair.Word1 = ((0xffffff) |
- (Log2Size << 25) |
- (macho::RIT_Pair << 28));
+ MachO::any_relocation_info MREPair;
+ MREPair.r_word0 = Value;
+ MREPair.r_word1 = ((0xffffff << 0) |
+ (Log2Size << 25) |
+ (MachO::ARM_RELOC_PAIR << 28));
Writer->addRelocation(Fragment->getParent(), MREPair);
}
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index db49db8..cfb33f5 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -127,7 +127,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::LR:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- AFI->addGPRCalleeSavedArea1Frame(FI);
GPRCS1Size += 4;
break;
case ARM::R8:
@@ -136,16 +135,12 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R11:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- if (STI.isTargetIOS()) {
- AFI->addGPRCalleeSavedArea2Frame(FI);
+ if (STI.isTargetIOS())
GPRCS2Size += 4;
- } else {
- AFI->addGPRCalleeSavedArea1Frame(FI);
+ else
GPRCS1Size += 4;
- }
break;
default:
- AFI->addDPRCalleeSavedAreaFrame(FI);
DPRCSSize += 8;
}
}
@@ -169,10 +164,17 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
NumBytes = DPRCSOffset;
+ int FramePtrOffsetInBlock = 0;
+ if (tryFoldSPUpdateIntoPushPop(MF, prior(MBBI), NumBytes)) {
+ FramePtrOffsetInBlock = NumBytes;
+ NumBytes = 0;
+ }
+
// Adjust FP so it point to the stack slot that contains the previous FP.
if (HasFP) {
+ FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size;
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addFrameIndex(FramePtrSpillFI).addImm(0)
+ .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
.setMIFlags(MachineInstr::FrameSetup));
if (NumBytes > 508)
// If offset is > 508 then sp cannot be adjusted in a single instruction,
@@ -213,13 +215,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
-static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
- for (unsigned i = 0; CSRegs[i]; ++i)
- if (Reg == CSRegs[i])
- return true;
- return false;
-}
-
static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
if (MI->getOpcode() == ARM::tLDRspi &&
MI->getOperand(1).isFI() &&
@@ -296,8 +291,9 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
&MBB.front() != MBBI &&
prior(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = prior(MBBI);
- emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
- } else
+ if (!tryFoldSPUpdateIntoPushPop(MF, PMBBI, NumBytes))
+ emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+ } else if (!tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes))
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
}
}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 6722614..65a7221 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -426,7 +426,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
*this);
} else {
// Translate r0 = add sp, -imm to
- // r0 = -imm (this is then translated into a series of instructons)
+ // r0 = -imm (this is then translated into a series of instructions)
// r0 = add r0, sp
emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
@@ -573,11 +573,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
MF.getFrameInfo()->getStackSize() + SPAdj;
- if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea1Offset();
- else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea2Offset();
- else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ if (MF.getFrameInfo()->hasVarSizedObjects()) {
assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
"Unexpected");
// There are alloca()'s in this function, must reference off the frame
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index d8596d7..0b7d3bb 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -28,6 +28,7 @@ namespace {
static char ID;
Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+ bool restrictIT;
const Thumb2InstrInfo *TII;
const TargetRegisterInfo *TRI;
ARMFunctionInfo *AFI;
@@ -192,37 +193,42 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Form IT block.
ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
unsigned Mask = 0, Pos = 3;
- // Branches, including tricky ones like LDM_RET, need to end an IT
- // block so check the instruction we just put in the block.
- for (; MBBI != E && Pos &&
- (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
- if (MBBI->isDebugValue())
- continue;
-
- MachineInstr *NMI = &*MBBI;
- MI = NMI;
-
- unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
- if (NCC == CC || NCC == OCC) {
- Mask |= (NCC & 1) << Pos;
- // Add implicit use of ITSTATE.
- NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
- true/*isImp*/, false/*isKill*/));
- LastITMI = NMI;
- } else {
- if (NCC == ARMCC::AL &&
- MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
- --MBBI;
- MBB.remove(NMI);
- MBB.insert(InsertPos, NMI);
- ++NumMovedInsts;
+
+ // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it
+ // is set: skip the loop
+ if (!restrictIT) {
+ // Branches, including tricky ones like LDM_RET, need to end an IT
+ // block so check the instruction we just put in the block.
+ for (; MBBI != E && Pos &&
+ (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
+ if (MBBI->isDebugValue())
continue;
+
+ MachineInstr *NMI = &*MBBI;
+ MI = NMI;
+
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
+ if (NCC == CC || NCC == OCC) {
+ Mask |= (NCC & 1) << Pos;
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
+ if (NCC == ARMCC::AL &&
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ++NumMovedInsts;
+ continue;
+ }
+ break;
}
- break;
+ TrackDefUses(NMI, Defs, Uses, TRI);
+ --Pos;
}
- TrackDefUses(NMI, Defs, Uses, TRI);
- --Pos;
}
// Finalize IT mask.
@@ -250,6 +256,7 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
TRI = TM.getRegisterInfo();
+ restrictIT = TM.getSubtarget<ARMSubtarget>().restrictIT();
if (!AFI->isThumbFunction())
return false;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 286eaa0..91788ac 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -36,7 +36,8 @@ Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
- NopInst.setOpcode(ARM::tNOP);
+ NopInst.setOpcode(ARM::tHINT);
+ NopInst.addOperand(MCOperand::CreateImm(0));
NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
NopInst.addOperand(MCOperand::CreateReg(0));
}
@@ -214,6 +215,13 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ if (NumBytes == 0 && DestReg != BaseReg) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+ return;
+ }
+
bool isSub = NumBytes < 0;
if (isSub) NumBytes = -NumBytes;
@@ -334,6 +342,7 @@ negativeOffsetOpcode(unsigned opcode)
case ARM::t2STRi12: return ARM::t2STRi8;
case ARM::t2STRBi12: return ARM::t2STRBi8;
case ARM::t2STRHi12: return ARM::t2STRHi8;
+ case ARM::t2PLDi12: return ARM::t2PLDi8;
case ARM::t2LDRi8:
case ARM::t2LDRHi8:
@@ -343,6 +352,7 @@ negativeOffsetOpcode(unsigned opcode)
case ARM::t2STRi8:
case ARM::t2STRBi8:
case ARM::t2STRHi8:
+ case ARM::t2PLDi8:
return opcode;
default:
@@ -364,6 +374,7 @@ positiveOffsetOpcode(unsigned opcode)
case ARM::t2STRi8: return ARM::t2STRi12;
case ARM::t2STRBi8: return ARM::t2STRBi12;
case ARM::t2STRHi8: return ARM::t2STRHi12;
+ case ARM::t2PLDi8: return ARM::t2PLDi12;
case ARM::t2LDRi12:
case ARM::t2LDRHi12:
@@ -373,6 +384,7 @@ positiveOffsetOpcode(unsigned opcode)
case ARM::t2STRi12:
case ARM::t2STRBi12:
case ARM::t2STRHi12:
+ case ARM::t2PLDi12:
return opcode;
default:
@@ -394,6 +406,7 @@ immediateOffsetOpcode(unsigned opcode)
case ARM::t2STRs: return ARM::t2STRi12;
case ARM::t2STRBs: return ARM::t2STRBi12;
case ARM::t2STRHs: return ARM::t2STRHi12;
+ case ARM::t2PLDs: return ARM::t2PLDi12;
case ARM::t2LDRi12:
case ARM::t2LDRHi12:
@@ -403,6 +416,7 @@ immediateOffsetOpcode(unsigned opcode)
case ARM::t2STRi12:
case ARM::t2STRBi12:
case ARM::t2STRHi12:
+ case ARM::t2PLDi12:
case ARM::t2LDRi8:
case ARM::t2LDRHi8:
case ARM::t2LDRBi8:
@@ -411,6 +425,7 @@ immediateOffsetOpcode(unsigned opcode)
case ARM::t2STRi8:
case ARM::t2STRBi8:
case ARM::t2STRHi8:
+ case ARM::t2PLDi8:
return opcode;
default:
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 0ddcad2..ddc7a66 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -33,6 +33,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include <algorithm>
+#include <cctype>
#include <cstdio>
#include <map>
#include <set>
@@ -291,8 +292,6 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
Out << "GlobalValue::LinkOnceAnyLinkage "; break;
case GlobalValue::LinkOnceODRLinkage:
Out << "GlobalValue::LinkOnceODRLinkage "; break;
- case GlobalValue::LinkOnceODRAutoHideLinkage:
- Out << "GlobalValue::LinkOnceODRAutoHideLinkage"; break;
case GlobalValue::WeakAnyLinkage:
Out << "GlobalValue::WeakAnyLinkage"; break;
case GlobalValue::WeakODRLinkage:
@@ -497,6 +496,7 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
HANDLE_ATTR(ReadOnly);
HANDLE_ATTR(NoInline);
HANDLE_ATTR(AlwaysInline);
+ HANDLE_ATTR(OptimizeNone);
HANDLE_ATTR(OptimizeForSize);
HANDLE_ATTR(StackProtect);
HANDLE_ATTR(StackProtectReq);
@@ -1139,7 +1139,7 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out);
for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
- const IntegersSubset CaseVal = i.getCaseValueEx();
+ const ConstantInt* CaseVal = i.getCaseValue();
const BasicBlock *BB = i.getCaseSuccessor();
Out << iName << "->addCase("
<< getOpName(CaseVal) << ", "
@@ -1160,8 +1160,7 @@ void CppWriter::printInstruction(const Instruction *I,
break;
}
case Instruction::Resume: {
- Out << "ResumeInst::Create(mod->getContext(), " << opNames[0]
- << ", " << bbname << ");";
+ Out << "ResumeInst::Create(" << opNames[0] << ", " << bbname << ");";
break;
}
case Instruction::Invoke: {
@@ -1175,7 +1174,7 @@ void CppWriter::printInstruction(const Instruction *I,
}
// FIXME: This shouldn't use magic numbers -3, -2, and -1.
Out << "InvokeInst *" << iName << " = InvokeInst::Create("
- << getOpName(inv->getCalledFunction()) << ", "
+ << getOpName(inv->getCalledValue()) << ", "
<< getOpName(inv->getNormalDest()) << ", "
<< getOpName(inv->getUnwindDest()) << ", "
<< iName << "_params, \"";
@@ -1589,6 +1588,20 @@ void CppWriter::printInstruction(const Instruction *I,
Out << "\");";
break;
}
+ case Instruction::LandingPad: {
+ const LandingPadInst *lpi = cast<LandingPadInst>(I);
+ Out << "LandingPadInst* " << iName << " = LandingPadInst::Create(";
+ printCppName(lpi->getType());
+ Out << ", " << opNames[0] << ", " << lpi->getNumClauses() << ", \"";
+ printEscapedString(lpi->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCleanup("
+ << (lpi->isCleanup() ? "true" : "false")
+ << ");";
+ for (unsigned i = 0, e = lpi->getNumClauses(); i != e; ++i)
+ nl(Out) << iName << "->addClause(" << opNames[i+1] << ");";
+ break;
+ }
}
DefinedValues.insert(I);
nl(Out);
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 2b79791..ae3c9eb 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_target(HexagonCodeGen
HexagonFrameLowering.cpp
HexagonHardwareLoops.cpp
HexagonFixupHwLoops.cpp
+ HexagonMachineFunctionInfo.cpp
HexagonMachineScheduler.cpp
HexagonMCInstLower.cpp
HexagonInstrInfo.cpp
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 88cd3fb..a2e04ba 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -99,7 +99,7 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
return;
case MachineOperand::MO_GlobalAddress:
// Computing the address of a global symbol, not calling it.
- O << *Mang->getSymbol(MO.getGlobal());
+ O << *getSymbol(MO.getGlobal());
printOffset(MO.getOffset(), O);
return;
}
@@ -267,7 +267,7 @@ void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo,
assert( (MO.getType() == MachineOperand::MO_GlobalAddress) &&
"Expecting global address");
- O << *Mang->getSymbol(MO.getGlobal());
+ O << *getSymbol(MO.getGlobal());
if (MO.getOffset() != 0) {
O << " + ";
O << MO.getOffset();
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 3c4ca0f..52d5ab2 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -871,7 +871,7 @@ bool HexagonHardwareLoops::isInvalidLoopOperation(
/// \brief - Return true if the loop contains an instruction that inhibits
/// the use of the hardware loop function.
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
- const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+ const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
MachineBasicBlock *MBB = Blocks[i];
for (MachineBasicBlock::iterator
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 9e78e51..5ae9328 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -1344,8 +1344,10 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
- if (N->isMachineOpcode())
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
return NULL; // Already selected.
+ }
switch (N->getOpcode()) {
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 567faca..1374179 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -39,13 +39,24 @@
using namespace llvm;
-const unsigned Hexagon_MAX_RET_SIZE = 64;
-
static cl::opt<bool>
EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
cl::desc("Control jump table emission on Hexagon target"));
-int NumNamedVarArgParams = -1;
+namespace {
+class HexagonCCState : public CCState {
+ int NumNamedVarArgParams;
+
+public:
+ HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
+ LLVMContext &C, int NumNamedVarArgParams)
+ : CCState(CC, isVarArg, MF, TM, locs, C),
+ NumNamedVarArgParams(NumNamedVarArgParams) {}
+
+ int getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
+};
+}
// Implement calling convention for Hexagon.
static bool
@@ -82,12 +93,13 @@ static bool
CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ HexagonCCState &HState = static_cast<HexagonCCState &>(State);
// NumNamedVarArgParams can not be zero for a VarArg function.
- assert ( (NumNamedVarArgParams > 0) &&
- "NumNamedVarArgParams is not bigger than zero.");
+ assert((HState.getNumNamedVarArgParams() > 0) &&
+ "NumNamedVarArgParams is not bigger than zero.");
- if ( (int)ValNo < NumNamedVarArgParams ) {
+ if ((int)ValNo < HState.getNumNamedVarArgParams()) {
// Deal with named arguments.
return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
}
@@ -394,13 +406,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
-
// Check for varargs.
- NumNamedVarArgParams = -1;
+ int NumNamedVarArgParams = -1;
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee))
{
const Function* CalleeFn = NULL;
@@ -417,6 +424,12 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(),
+ NumNamedVarArgParams);
+
if (NumNamedVarArgParams > 0)
CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
else
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 4fe0107..73da226 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -141,8 +141,11 @@ namespace llvm {
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- virtual EVT getSetCCResultType(LLVMContext &, EVT) const {
- return MVT::i1;
+ virtual EVT getSetCCResultType(LLVMContext &C, EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i1;
+ else
+ return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
}
virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index e71386a..d25bfa8 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -63,7 +63,7 @@ class MemAccessSize<bits<3> value> {
def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction.
def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb).
def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh).
-def WordAccess : MemAccessSize<3>;// Word access instrution (memw).
+def WordAccess : MemAccessSize<3>;// Word access instruction (memw).
def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 5af645c..6b97609 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -26,7 +26,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRMAP_INFO
#include "HexagonGenInstrInfo.inc"
#include "HexagonGenDFAPacketizer.inc"
@@ -55,6 +55,8 @@ const int Hexagon_MEMH_AUTOINC_MIN = -16;
const int Hexagon_MEMB_AUTOINC_MAX = 7;
const int Hexagon_MEMB_AUTOINC_MIN = -8;
+// Pin the vtable to this file.
+void HexagonInstrInfo::anchor() {}
HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
: HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 3c28df4..3f45b8b 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -26,6 +26,7 @@
namespace llvm {
class HexagonInstrInfo : public HexagonGenInstrInfo {
+ virtual void anchor();
const HexagonRegisterInfo RI;
const HexagonSubtarget &Subtarget;
typedef unsigned Opcode_t;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index fee83fb..475c23d 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -3390,4 +3390,3 @@ def : Pat<(i32 (load FoldGlobalAddrGP:$addr)),
def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr),
(i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
-
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index f011d51..bbb2fa4 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -73,7 +73,7 @@ void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI,
AP.OutContext));
break;
case MachineOperand::MO_GlobalAddress:
- MCO = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP);
+ MCO = GetSymbolRef(MO, AP.getSymbol(MO.getGlobal()), AP);
break;
case MachineOperand::MO_ExternalSymbol:
MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()),
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp b/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
new file mode 100644
index 0000000..9579c8b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
@@ -0,0 +1,16 @@
+//= HexagonMachineFunctionInfo.cpp - Hexagon machine function info *- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMachineFunctionInfo.h"
+
+using namespace llvm;
+
+// pin vtable to this file
+void HexagonMachineFunctionInfo::anchor() {}
+
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index bd7b26a..a59c8c9 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//=- HexagonMachineFuctionInfo.h - Hexagon machine function info --*- C++ -*-=//
+//=- HexagonMachineFunctionInfo.h - Hexagon machine function info -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -10,6 +10,7 @@
#ifndef HexagonMACHINEFUNCTIONINFO_H
#define HexagonMACHINEFUNCTIONINFO_H
+#include <map>
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -30,9 +31,8 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
int VarArgsFrameIndex;
bool HasClobberLR;
bool HasEHReturn;
-
std::map<const MachineInstr*, unsigned> PacketInfo;
-
+ virtual void anchor();
public:
HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0),
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 10bb3e9..c94f081 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -407,11 +407,11 @@ SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() {
#ifndef NDEBUG
void ConvergingVLIWScheduler::traceCandidate(const char *Label,
const ReadyQueue &Q,
- SUnit *SU, PressureElement P) {
+ SUnit *SU, PressureChange P) {
dbgs() << Label << " " << Q.getName() << " ";
if (P.isValid())
- dbgs() << DAG->TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
- << " ";
+ dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
+ << P.getUnitInc() << " ";
else
dbgs() << " ";
SU->dump(DAG);
@@ -457,9 +457,7 @@ static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
// Constants used to denote relative importance of
// heuristic components for cost computation.
static const unsigned PriorityOne = 200;
-static const unsigned PriorityTwo = 100;
-static const unsigned PriorityThree = 50;
-static const unsigned PriorityFour = 20;
+static const unsigned PriorityTwo = 50;
static const unsigned ScaleTwo = 10;
static const unsigned FactorOne = 2;
@@ -517,8 +515,8 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
ResCount += (NumNodesBlocking * ScaleTwo);
// Factor in reg pressure as a heuristic.
- ResCount -= (Delta.Excess.UnitIncrease*PriorityThree);
- ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree);
+ ResCount -= (Delta.Excess.getUnitInc()*PriorityTwo);
+ ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityTwo);
DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index 171193e..8ac333f 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -233,7 +233,7 @@ protected:
SchedCandidate &Candidate);
#ifndef NDEBUG
void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
- PressureElement P = PressureElement());
+ PressureChange P = PressureChange());
#endif
};
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 89e3406..5490ecd 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -29,7 +29,7 @@
//
// Note: The peephole pass makes the instrucstions like
// %vreg170<def> = SXTW %vreg166 or %vreg16<def> = NOT_p %vreg15<kill>
-// redundant and relies on some form of dead removal instrucions, like
+// redundant and relies on some form of dead removal instructions, like
// DCE or DIE to actually eliminate them.
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index d5ca4d7..1786e9d 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -295,13 +295,5 @@ unsigned HexagonRegisterInfo::getStackRegister() const {
return Hexagon::R29;
}
-unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
-}
-
-unsigned HexagonRegisterInfo::getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
-}
-
#define GET_REGINFO_TARGET_DESC
#include "HexagonGenRegisterInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index fec86df..89af7c3 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -78,10 +78,6 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getFrameRegister() const;
unsigned getStackRegister() const;
-
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 3bf2f20..5166f8e 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -16,33 +16,31 @@
// scheduled after register allocation.
//
//===----------------------------------------------------------------------===//
+
#define DEBUG_TYPE "xfer"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "HexagonTargetMachine.h"
-#include "HexagonSubtarget.h"
-#include "HexagonMachineFunctionInfo.h"
#include <map>
-#include <iostream>
-
-#include "llvm/Support/CommandLine.h"
-#define DEBUG_TYPE "xfer"
-
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 07d5ce1..fca6707 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -86,3 +86,5 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
ModeIEEERndNear = false;
}
+// Pin the vtable to this file.
+void HexagonSubtarget::anchor() {}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 76a8fba..690bef0 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -27,7 +27,7 @@
namespace llvm {
class HexagonSubtarget : public HexagonGenSubtargetInfo {
-
+ virtual void anchor();
bool UseMemOps;
bool ModeIEEERndNear;
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index cd96b58..bb950a0 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -102,17 +102,25 @@ class HexagonPassConfig : public TargetPassConfig {
public:
HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {
- // Enable MI scheduler.
- if (!DisableHexagonMISched) {
+ // FIXME: Rather than calling enablePass(&MachineSchedulerID) below, define
+ // HexagonSubtarget::enableMachineScheduler() { return true; }.
+ // That will bypass the SelectionDAG VLIW scheduler, which is probably just
+ // hurting compile time and will be removed eventually anyway.
+ if (DisableHexagonMISched)
+ disablePass(&MachineSchedulerID);
+ else
enablePass(&MachineSchedulerID);
- MachineSchedRegistry::setDefault(createVLIWMachineSched);
- }
}
HexagonTargetMachine &getHexagonTargetMachine() const {
return getTM<HexagonTargetMachine>();
}
+ virtual ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const {
+ return createVLIWMachineSched(C);
+ }
+
virtual bool addInstSelector();
virtual bool addPreRegAlloc();
virtual bool addPostRegAlloc();
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
index 2ea0d2e..7c41507 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
@@ -21,7 +21,6 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/raw_ostream.h"
-#include <cstdio>
using namespace llvm;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index e0f5a27..8519cf3 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -73,7 +73,7 @@ namespace HexagonII {
NoMemAccess = 0, // Not a memory acces instruction.
ByteAccess = 1, // Byte access instruction (memb).
HalfWordAccess = 2, // Half word access instruction (memh).
- WordAccess = 3, // Word access instrution (memw).
+ WordAccess = 3, // Word access instruction (memw).
DoubleWordAccess = 4 // Double word access instruction (memd)
};
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index 495dbb9..3f9415b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -15,6 +15,9 @@
using namespace llvm;
+// Pin the vtable to this file.
+void HexagonMCAsmInfo::anchor() {}
+
HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) {
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
@@ -29,7 +32,6 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) {
InlineAsmEnd = "# InlineAsm End";
ZeroDirective = "\t.space\t";
AscizDirective = "\t.string\t";
- WeakRefDirective = "\t.weak\t";
SupportsDebugInformation = true;
UsesELFSectionDirectiveForBSS = true;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
index 0b94d21..bd8cb76 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -15,10 +15,11 @@
#define HexagonMCASMINFO_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class HexagonMCAsmInfo : public MCAsmInfo {
+ class HexagonMCAsmInfo : public MCAsmInfoELF {
+ virtual void anchor();
public:
explicit HexagonMCAsmInfo(StringRef TT);
};
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index d213a45..acf2ab8 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -21,11 +21,8 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(StringRef TT) {
PointerSize = CalleeSaveStackSlotSize = 2;
PrivateGlobalPrefix = ".L";
- WeakRefDirective ="\t.weak\t";
- PCSymbol=".";
CommentString = ";";
AlignmentIsInBytes = false;
- AllowNameToStartWithDigit = true;
UsesELFSectionDirectiveForBSS = true;
}
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index feb040d..a7e0e58 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -14,12 +14,12 @@
#ifndef MSP430TARGETASMINFO_H
#define MSP430TARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class StringRef;
- class MSP430MCAsmInfo : public MCAsmInfo {
+ class MSP430MCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit MSP430MCAsmInfo(StringRef TT);
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 0a04e5d..18311c3 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -92,7 +92,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
if (Offset)
O << '(' << Offset << '+';
- O << *Mang->getSymbol(MO.getGlobal());
+ O << *getSymbol(MO.getGlobal());
if (Offset)
O << ')';
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
index b448cc4..8a69d1e 100644
--- a/lib/Target/MSP430/MSP430CallingConv.td
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -23,18 +23,15 @@ def RetCC_MSP430 : CallingConv<[
//===----------------------------------------------------------------------===//
// MSP430 Argument Calling Conventions
//===----------------------------------------------------------------------===//
-def CC_MSP430 : CallingConv<[
+def CC_MSP430_AssignStack : CallingConv<[
// Pass by value if the byval attribute is given
CCIfByVal<CCPassByVal<2, 2>>,
// Promote i8 arguments to i16.
CCIfType<[i8], CCPromoteToType<i16>>,
- // The first 4 integer arguments of non-varargs functions are passed in
- // integer registers.
- CCIfNotVarArg<CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>>,
-
// Integer values get stored in stack slots that are 2 bytes in
// size and 2-byte aligned.
CCIfType<[i16], CCAssignToStack<2, 2>>
]>;
+
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index c673f59..8370714 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -27,8 +27,8 @@ protected:
public:
explicit MSP430FrameLowering(const MSP430Subtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
- }
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2, 2),
+ STI(sti) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 543f54c..4152829 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -395,6 +395,7 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
DEBUG(errs() << "== ";
Node->dump(CurDAG);
errs() << "\n");
+ Node->setNodeId(-1);
return NULL;
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 803e899..745cdf5 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -45,7 +45,7 @@ typedef enum {
} HWMultUseMode;
static cl::opt<HWMultUseMode>
-HWMultMode("msp430-hwmult-mode",
+HWMultMode("msp430-hwmult-mode", cl::Hidden,
cl::desc("Hardware multiplier use mode"),
cl::init(HWMultNoIntr),
cl::values(
@@ -250,6 +250,123 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
#include "MSP430GenCallingConv.inc"
+/// For each argument in a function store the number of pieces it is composed
+/// of.
+template<typename ArgT>
+static void ParseFunctionArgs(const SmallVectorImpl<ArgT> &Args,
+ SmallVectorImpl<unsigned> &Out) {
+ unsigned CurrentArgIndex = ~0U;
+ for (unsigned i = 0, e = Args.size(); i != e; i++) {
+ if (CurrentArgIndex == Args[i].OrigArgIndex) {
+ Out.back()++;
+ } else {
+ Out.push_back(1);
+ CurrentArgIndex++;
+ }
+ }
+}
+
+static void AnalyzeVarArgs(CCState &State,
+ const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ State.AnalyzeCallOperands(Outs, CC_MSP430_AssignStack);
+}
+
+static void AnalyzeVarArgs(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) {
+ State.AnalyzeFormalArguments(Ins, CC_MSP430_AssignStack);
+}
+
+/// Analyze incoming and outgoing function arguments. We need custom C++ code
+/// to handle special constraints in the ABI like reversing the order of the
+/// pieces of splitted arguments. In addition, all pieces of a certain argument
+/// have to be passed either using registers or the stack but never mixing both.
+template<typename ArgT>
+static void AnalyzeArguments(CCState &State,
+ SmallVectorImpl<CCValAssign> &ArgLocs,
+ const SmallVectorImpl<ArgT> &Args) {
+ static const uint16_t RegList[] = {
+ MSP430::R15W, MSP430::R14W, MSP430::R13W, MSP430::R12W
+ };
+ static const unsigned NbRegs = array_lengthof(RegList);
+
+ if (State.isVarArg()) {
+ AnalyzeVarArgs(State, Args);
+ return;
+ }
+
+ SmallVector<unsigned, 4> ArgsParts;
+ ParseFunctionArgs(Args, ArgsParts);
+
+ unsigned RegsLeft = NbRegs;
+ bool UseStack = false;
+ unsigned ValNo = 0;
+
+ for (unsigned i = 0, e = ArgsParts.size(); i != e; i++) {
+ MVT ArgVT = Args[ValNo].VT;
+ ISD::ArgFlagsTy ArgFlags = Args[ValNo].Flags;
+ MVT LocVT = ArgVT;
+ CCValAssign::LocInfo LocInfo = CCValAssign::Full;
+
+ // Promote i8 to i16
+ if (LocVT == MVT::i8) {
+ LocVT = MVT::i16;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ // Handle byval arguments
+ if (ArgFlags.isByVal()) {
+ State.HandleByVal(ValNo++, ArgVT, LocVT, LocInfo, 2, 2, ArgFlags);
+ continue;
+ }
+
+ unsigned Parts = ArgsParts[i];
+
+ if (!UseStack && Parts <= RegsLeft) {
+ unsigned FirstVal = ValNo;
+ for (unsigned j = 0; j < Parts; j++) {
+ unsigned Reg = State.AllocateReg(RegList, NbRegs);
+ State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo));
+ RegsLeft--;
+ }
+
+ // Reverse the order of the pieces to agree with the "big endian" format
+ // required in the calling convention ABI.
+ SmallVectorImpl<CCValAssign>::iterator B = ArgLocs.begin() + FirstVal;
+ std::reverse(B, B + Parts);
+ } else {
+ UseStack = true;
+ for (unsigned j = 0; j < Parts; j++)
+ CC_MSP430_AssignStack(ValNo++, ArgVT, LocVT, LocInfo, ArgFlags, State);
+ }
+ }
+}
+
+static void AnalyzeRetResult(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) {
+ State.AnalyzeCallResult(Ins, RetCC_MSP430);
+}
+
+static void AnalyzeRetResult(CCState &State,
+ const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ State.AnalyzeReturn(Outs, RetCC_MSP430);
+}
+
+template<typename ArgT>
+static void AnalyzeReturnValues(CCState &State,
+ SmallVectorImpl<CCValAssign> &RVLocs,
+ const SmallVectorImpl<ArgT> &Args) {
+ AnalyzeRetResult(State, Args);
+
+ // Reverse splitted return values to get the "big endian" format required
+ // to agree with the calling convention ABI.
+ std::reverse(RVLocs.begin(), RVLocs.end());
+}
+
SDValue
MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
@@ -325,7 +442,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430);
+ AnalyzeArguments(CCInfo, ArgLocs, Ins);
// Create frame index for the start of the first vararg value
if (isVarArg) {
@@ -423,7 +540,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
getTargetMachine(), RVLocs, *DAG.getContext());
// Analize return values.
- CCInfo.AnalyzeReturn(Outs, RetCC_MSP430);
+ AnalyzeReturnValues(CCInfo, RVLocs, Outs);
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -471,8 +588,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
-
- CCInfo.AnalyzeCallOperands(Outs, CC_MSP430);
+ AnalyzeArguments(CCInfo, ArgLocs, Outs);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -610,7 +726,7 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_MSP430);
+ AnalyzeReturnValues(CCInfo, RVLocs, Ins);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index c850594..7a0b00a 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -22,11 +22,14 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "MSP430GenInstrInfo.inc"
using namespace llvm;
+// Pin the vtable to this file.
+void MSP430InstrInfo::anchor() {}
+
MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
: MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
RI(tm) {}
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index d79f992..ad2b8cc 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -42,6 +42,7 @@ namespace MSP430II {
class MSP430InstrInfo : public MSP430GenInstrInfo {
const MSP430RegisterInfo RI;
+ virtual void anchor();
public:
explicit MSP430InstrInfo(MSP430TargetMachine &TM);
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index 043e5be..52f9ee5 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -33,7 +33,7 @@ GetGlobalAddressSymbol(const MachineOperand &MO) const {
case 0: break;
}
- return Printer.Mang->getSymbol(MO.getGlobal());
+ return Printer.getSymbol(MO.getGlobal());
}
MCSymbol *MSP430MCInstLower::
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index f86428c..38be25c 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -23,84 +23,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) {
- if ((C < 'a' || C > 'z') &&
- (C < 'A' || C > 'Z') &&
- (C < '0' || C > '9') &&
- C != '_' && C != '$' && C != '@' &&
- !(AllowPeriod && C == '.') &&
- !(AllowUTF8 && (C & 0x80)))
- return false;
- return true;
-}
-
-static char HexDigit(int V) {
- return V < 10 ? V+'0' : V+'A'-10;
-}
-
-static void MangleLetter(SmallVectorImpl<char> &OutName, unsigned char C) {
- OutName.push_back('_');
- OutName.push_back(HexDigit(C >> 4));
- OutName.push_back(HexDigit(C & 15));
- OutName.push_back('_');
-}
-
-/// NameNeedsEscaping - Return true if the identifier \p Str needs quotes
-/// for this assembler.
-static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo *MAI) {
- assert(!Str.empty() && "Cannot create an empty MCSymbol");
-
- // If the first character is a number and the target does not allow this, we
- // need quotes.
- if (!MAI->doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9')
- return true;
-
- // If any of the characters in the string is an unacceptable character, force
- // quotes.
- bool AllowPeriod = MAI->doesAllowPeriodsInName();
- bool AllowUTF8 = MAI->doesAllowUTF8();
- for (unsigned i = 0, e = Str.size(); i != e; ++i)
- if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
- return true;
- return false;
-}
-
-/// appendMangledName - Add the specified string in mangled form if it uses
-/// any unusual characters.
-static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
- const MCAsmInfo *MAI) {
- // The first character is not allowed to be a number unless the target
- // explicitly allows it.
- if (!MAI->doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') {
- MangleLetter(OutName, Str[0]);
- Str = Str.substr(1);
- }
-
- bool AllowPeriod = MAI->doesAllowPeriodsInName();
- bool AllowUTF8 = MAI->doesAllowUTF8();
- for (unsigned i = 0, e = Str.size(); i != e; ++i) {
- if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
- MangleLetter(OutName, Str[i]);
- else
- OutName.push_back(Str[i]);
- }
-}
-
-
-/// appendMangledQuotedName - On systems that support quoted symbols, we still
-/// have to escape some (obscure) characters like " and \n which would break the
-/// assembler's lexing.
-static void appendMangledQuotedName(SmallVectorImpl<char> &OutName,
- StringRef Str) {
- for (unsigned i = 0, e = Str.size(); i != e; ++i) {
- if (Str[i] == '"' || Str[i] == '\n')
- MangleLetter(OutName, Str[i]);
- else
- OutName.push_back(Str[i]);
- }
-}
-
-
/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
/// and the specified name as the global variable name. GVName must not be
/// empty.
@@ -111,7 +33,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
StringRef Name = GVName.toStringRef(TmpData);
assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
- const MCAsmInfo *MAI = Context.getAsmInfo();
+ const MCAsmInfo *MAI = TM->getMCAsmInfo();
// If the global name is not led with \1, add the appropriate prefixes.
if (Name[0] == '\1') {
@@ -136,26 +58,9 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
OutName.append(Prefix, Prefix+strlen(Prefix));
}
}
-
+
// If this is a simple string that doesn't need escaping, just append it.
- if (!NameNeedsEscaping(Name, MAI) ||
- // If quotes are supported, they can be used unless the string contains
- // a quote or newline.
- (MAI->doesAllowQuotesInName() &&
- Name.find_first_of("\n\"") == StringRef::npos)) {
- OutName.append(Name.begin(), Name.end());
- return;
- }
-
- // On systems that do not allow quoted names, we need to mangle most
- // strange characters.
- if (!MAI->doesAllowQuotesInName())
- return appendMangledName(OutName, Name, MAI);
-
- // Okay, the system allows quoted strings. We can quote most anything, the
- // only characters that need escaping are " and \n.
- assert(Name.find_first_of("\n\"") != StringRef::npos);
- return appendMangledQuotedName(OutName, Name);
+ OutName.append(Name.begin(), Name.end());
}
/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
@@ -212,7 +117,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
// If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
// add it.
- if (Context.getAsmInfo()->hasMicrosoftFastStdCallMangling()) {
+ if (TM->getMCAsmInfo()->hasMicrosoftFastStdCallMangling()) {
if (const Function *F = dyn_cast<Function>(GV)) {
CallingConv::ID CC = F->getCallingConv();
@@ -236,13 +141,3 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
}
}
}
-
-/// getSymbol - Return the MCSymbol for the specified global value. This
-/// symbol is the main label that is the address of the global.
-MCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
- SmallString<60> NameStr;
- getNameWithPrefix(NameStr, GV, false);
- return Context.GetOrCreateSymbol(NameStr.str());
-}
-
-
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 3dd6562..cdae6c2 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "MipsRegisterInfo.h"
+#include "MipsTargetStreamer.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -24,23 +25,25 @@
using namespace llvm;
+namespace llvm {
+class MCInstrInfo;
+}
+
namespace {
class MipsAssemblerOptions {
public:
- MipsAssemblerOptions():
- aTReg(1), reorder(true), macro(true) {
- }
+ MipsAssemblerOptions() : aTReg(1), reorder(true), macro(true) {}
- unsigned getATRegNum() {return aTReg;}
+ unsigned getATRegNum() { return aTReg; }
bool setATReg(unsigned Reg);
- bool isReorder() {return reorder;}
- void setReorder() {reorder = true;}
- void setNoreorder() {reorder = false;}
+ bool isReorder() { return reorder; }
+ void setReorder() { reorder = true; }
+ void setNoreorder() { reorder = false; }
- bool isMacro() {return macro;}
- void setMacro() {macro = true;}
- void setNomacro() {macro = false;}
+ bool isMacro() { return macro; }
+ void setMacro() { macro = true; }
+ void setNomacro() { macro = false; }
private:
unsigned aTReg;
@@ -52,23 +55,21 @@ private:
namespace {
class MipsAsmParser : public MCTargetAsmParser {
- enum FpFormatTy {
- FP_FORMAT_NONE = -1,
- FP_FORMAT_S,
- FP_FORMAT_D,
- FP_FORMAT_L,
- FP_FORMAT_W
- } FpFormat;
+ MipsTargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = Parser.getStreamer().getTargetStreamer();
+ return static_cast<MipsTargetStreamer &>(TS);
+ }
MCSubtargetInfo &STI;
MCAsmParser &Parser;
MipsAssemblerOptions Options;
+ bool hasConsumedDollar;
#define GET_ASSEMBLER_HEADER
#include "MipsGenAsmMatcher.inc"
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
bool MatchingInlineAsm);
@@ -76,56 +77,98 @@ class MipsAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands);
bool ParseDirective(AsmToken DirectiveID);
MipsAsmParser::OperandMatchResultTy
- parseRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- int RegKind);
+ parseRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands, int RegKind);
MipsAsmParser::OperandMatchResultTy
- parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseMSARegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands, int RegKind);
MipsAsmParser::OperandMatchResultTy
- parseGPR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseMSACtrlRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ int RegKind);
MipsAsmParser::OperandMatchResultTy
- parseGPR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseMemOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ bool parsePtrReg(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ int RegKind);
MipsAsmParser::OperandMatchResultTy
- parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parsePtrReg(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseGPR32(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseGPR64(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseAFGR64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseHWRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseFGR64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseFGR32Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseAFGR64Regs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseFCCRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseFGR64Regs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseACRegsDSP(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseFGR32Regs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseFGRH32Regs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseFCCRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseACC64DSP(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseLO32DSP(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseHI32DSP(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCOP2(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseMSA128BRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseMSA128HRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseMSA128WRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseMSA128DRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseMSA128CtrlRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseLSAImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
unsigned RegKind);
- bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
+ bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &,
StringRef Mnemonic);
int tryParseRegister(bool is64BitReg);
- bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
bool is64BitReg);
bool needsExpansion(MCInst &Inst);
@@ -139,17 +182,19 @@ class MipsAsmParser : public MCTargetAsmParser {
void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
void expandMemInst(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions,
- bool isLoad,bool isImmOpnd);
+ SmallVectorImpl<MCInst> &Instructions, bool isLoad,
+ bool isImmOpnd);
bool reportParseError(StringRef ErrorMsg);
bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
bool parseRelocOperand(const MCExpr *&Res);
- const MCExpr* evaluateRelocExpr(const MCExpr *Expr, StringRef RelocStr);
+ const MCExpr *evaluateRelocExpr(const MCExpr *Expr, StringRef RelocStr);
bool isEvaluated(const MCExpr *Expr);
bool parseDirectiveSet();
+ bool parseDirectiveMipsHackStocg();
+ bool parseDirectiveMipsHackELFFlags();
bool parseSetAtDirective();
bool parseSetNoAtDirective();
@@ -161,6 +206,7 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetAssignment();
bool parseDirectiveWord(unsigned Size, SMLoc L);
+ bool parseDirectiveGpWord();
MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
@@ -172,40 +218,49 @@ class MipsAsmParser : public MCTargetAsmParser {
return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0;
}
+ bool isN64() const { return STI.getFeatureBits() & Mips::FeatureN64; }
+
int matchRegisterName(StringRef Symbol, bool is64BitReg);
int matchCPURegisterName(StringRef Symbol);
int matchRegisterByNumber(unsigned RegNum, unsigned RegClass);
- int matchFPURegisterName(StringRef Name, FpFormatTy Format);
+ int matchFPURegisterName(StringRef Name);
- void setFpFormat(FpFormatTy Format) {
- FpFormat = Format;
- }
+ int matchFCCRegisterName(StringRef Name);
- void setDefaultFpFormat();
+ int matchACRegisterName(StringRef Name);
- void setFpFormat(StringRef Format);
+ int matchMSA128RegisterName(StringRef Name);
- FpFormatTy getFpFormat() {return FpFormat;}
+ int matchMSA128CtrlRegisterName(StringRef Name);
+
+ int regKindToRegClass(int RegKind);
unsigned getReg(int RC, int RegNo);
int getATReg();
bool processInstruction(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions);
+ SmallVectorImpl<MCInst> &Instructions);
+
+ // Helper function that checks if the value of a vector index is within the
+ // boundaries of accepted values for each RegisterKind
+ // Example: INSERT.B $w0[n], $1 => 16 > n >= 0
+ bool validateMSAIndex(int Val, int RegKind);
+
public:
- MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
- : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ const MCInstrInfo &MII)
+ : MCTargetAsmParser(), STI(sti), Parser(parser),
+ hasConsumedDollar(false) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
};
}
@@ -221,13 +276,21 @@ public:
Kind_GPR32,
Kind_GPR64,
Kind_HWRegs,
- Kind_HW64Regs,
Kind_FGR32Regs,
+ Kind_FGRH32Regs,
Kind_FGR64Regs,
Kind_AFGR64Regs,
Kind_CCRRegs,
Kind_FCCRegs,
- Kind_ACRegsDSP
+ Kind_ACC64DSP,
+ Kind_LO32DSP,
+ Kind_HI32DSP,
+ Kind_COP2,
+ Kind_MSA128BRegs,
+ Kind_MSA128HRegs,
+ Kind_MSA128WRegs,
+ Kind_MSA128DRegs,
+ Kind_MSA128CtrlRegs
};
private:
@@ -238,7 +301,9 @@ private:
k_Memory,
k_PostIndexRegister,
k_Register,
- k_Token
+ k_PtrReg,
+ k_Token,
+ k_LSAImm
} Kind;
MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -277,7 +342,12 @@ public:
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
- void addExpr(MCInst &Inst, const MCExpr *Expr) const{
+ void addPtrRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getPtrReg()));
+ }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediate when possible. Null MCExpr = 0.
if (Expr == 0)
Inst.addOperand(MCOperand::CreateImm(0));
@@ -306,6 +376,9 @@ public:
bool isImm() const { return Kind == k_Immediate; }
bool isToken() const { return Kind == k_Token; }
bool isMem() const { return Kind == k_Memory; }
+ bool isPtrReg() const { return Kind == k_PtrReg; }
+ bool isInvNum() const { return Kind == k_Immediate; }
+ bool isLSAImm() const { return Kind == k_LSAImm; }
StringRef getToken() const {
assert(Kind == k_Token && "Invalid access!");
@@ -317,13 +390,18 @@ public:
return Reg.RegNum;
}
+ unsigned getPtrReg() const {
+ assert((Kind == k_PtrReg) && "Invalid access!");
+ return Reg.RegNum;
+ }
+
void setRegKind(RegisterKind RegKind) {
- assert((Kind == k_Register) && "Invalid access!");
+ assert((Kind == k_Register || Kind == k_PtrReg) && "Invalid access!");
Reg.Kind = RegKind;
}
const MCExpr *getImm() const {
- assert((Kind == k_Immediate) && "Invalid access!");
+ assert((Kind == k_Immediate || Kind == k_LSAImm) && "Invalid access!");
return Imm.Val;
}
@@ -354,6 +432,14 @@ public:
return Op;
}
+ static MipsOperand *CreatePtrReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ MipsOperand *Op = new MipsOperand(k_PtrReg);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static MipsOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
MipsOperand *Op = new MipsOperand(k_Immediate);
Op->Imm.Val = Val;
@@ -362,8 +448,16 @@ public:
return Op;
}
+ static MipsOperand *CreateLSAImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ MipsOperand *Op = new MipsOperand(k_LSAImm);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static MipsOperand *CreateMem(unsigned Base, const MCExpr *Off,
- SMLoc S, SMLoc E) {
+ SMLoc S, SMLoc E) {
MipsOperand *Op = new MipsOperand(k_Memory);
Op->Mem.Base = Base;
Op->Mem.Off = Off;
@@ -388,17 +482,12 @@ public:
return Reg.Kind == Kind_HWRegs;
}
- bool isHW64RegsAsm() const {
- assert((Kind == k_Register) && "Invalid access!");
- return Reg.Kind == Kind_HW64Regs;
- }
-
bool isCCRAsm() const {
assert((Kind == k_Register) && "Invalid access!");
return Reg.Kind == Kind_CCRRegs;
}
- bool isAFGR64Asm() const {
+ bool isAFGR64Asm() const {
return Kind == k_Register && Reg.Kind == Kind_AFGR64Regs;
}
@@ -410,28 +499,58 @@ public:
return (Kind == k_Register) && Reg.Kind == Kind_FGR32Regs;
}
+ bool isFGRH32Asm() const {
+ return (Kind == k_Register) && Reg.Kind == Kind_FGRH32Regs;
+ }
+
bool isFCCRegsAsm() const {
return (Kind == k_Register) && Reg.Kind == Kind_FCCRegs;
}
- bool isACRegsDSPAsm() const {
- return Kind == k_Register && Reg.Kind == Kind_ACRegsDSP;
+ bool isACC64DSPAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_ACC64DSP;
}
- /// getStartLoc - Get the location of the first token of this operand.
- SMLoc getStartLoc() const {
- return StartLoc;
+ bool isLO32DSPAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_LO32DSP;
}
- /// getEndLoc - Get the location of the last token of this operand.
- SMLoc getEndLoc() const {
- return EndLoc;
+
+ bool isHI32DSPAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_HI32DSP;
+ }
+
+ bool isCOP2Asm() const { return Kind == k_Register && Reg.Kind == Kind_COP2; }
+
+ bool isMSA128BAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_MSA128BRegs;
+ }
+
+ bool isMSA128HAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_MSA128HRegs;
+ }
+
+ bool isMSA128WAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_MSA128WRegs;
+ }
+
+ bool isMSA128DAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_MSA128DRegs;
}
+ bool isMSA128CRAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_MSA128CtrlRegs;
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
virtual void print(raw_ostream &OS) const {
llvm_unreachable("unimplemented!");
}
}; // class MipsOperand
-} // namespace
+} // namespace
namespace llvm {
extern const MCInstrDesc MipsInsts[];
@@ -462,8 +581,8 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
// reference or immediate we may have to expand instructions.
for (unsigned i = 0; i < MCID.getNumOperands(); i++) {
const MCOperandInfo &OpInfo = MCID.OpInfo[i];
- if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY)
- || (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+ if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
+ (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
MCOperand &Op = Inst.getOperand(i);
if (Op.isImm()) {
int MemOffset = Op.getImm();
@@ -476,7 +595,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
const MCExpr *Expr = Op.getExpr();
if (Expr->getKind() == MCExpr::SymbolRef) {
const MCSymbolRefExpr *SR =
- static_cast<const MCSymbolRefExpr*>(Expr);
+ static_cast<const MCSymbolRefExpr *>(Expr);
if (SR->getKind() == MCSymbolRefExpr::VK_None) {
// Expand symbol.
expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), false);
@@ -489,7 +608,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
}
}
} // for
- } // if load/store
+ } // if load/store
if (needsExpansion(Inst))
expandInstruction(Inst, IDLoc, Instructions);
@@ -512,7 +631,7 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) {
}
void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
+ SmallVectorImpl<MCInst> &Instructions) {
switch (Inst.getOpcode()) {
case Mips::LoadImm32Reg:
return expandLoadImm(Inst, IDLoc, Instructions);
@@ -567,8 +686,9 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
}
}
-void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
+void
+MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(2);
assert(ImmOp.isImm() && "expected immediate operand kind");
@@ -609,8 +729,9 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
}
}
-void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
+void
+MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(1);
assert(ImmOp.isImm() && "expected immediate operand kind");
@@ -643,14 +764,15 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
}
void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions, bool isLoad, bool isImmOpnd) {
+ SmallVectorImpl<MCInst> &Instructions,
+ bool isLoad, bool isImmOpnd) {
const MCSymbolRefExpr *SR;
MCInst TempInst;
unsigned ImmOffset, HiOffset, LoOffset;
const MCExpr *ExprOffset;
unsigned TmpRegNum;
- unsigned AtRegNum = getReg((isMips64()) ? Mips::GPR64RegClassID
- : Mips::GPR32RegClassID, getATReg());
+ unsigned AtRegNum = getReg(
+ (isMips64()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, getATReg());
// 1st operand is either the source or destination register.
assert(Inst.getOperand(0).isReg() && "expected register operand kind");
unsigned RegOpNum = Inst.getOperand(0).getReg();
@@ -680,7 +802,7 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
TempInst.addOperand(MCOperand::CreateImm(HiOffset));
else {
if (ExprOffset->getKind() == MCExpr::SymbolRef) {
- SR = static_cast<const MCSymbolRefExpr*>(ExprOffset);
+ SR = static_cast<const MCSymbolRefExpr *>(ExprOffset);
const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create(
SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI,
getContext());
@@ -723,15 +845,14 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
TempInst.clear();
}
-bool MipsAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool MipsAsmParser::MatchAndEmitInstruction(
+ SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo, bool MatchingInlineAsm) {
MCInst Inst;
SmallVector<MCInst, 8> Instructions;
- unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
- MatchingInlineAsm);
+ unsigned MatchResult =
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
default:
@@ -752,7 +873,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((MipsOperand*) Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((MipsOperand *)Operands[ErrorInfo])->getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
@@ -766,44 +887,44 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
int MipsAsmParser::matchCPURegisterName(StringRef Name) {
- int CC;
+ int CC;
if (Name == "at")
return getATReg();
- CC = StringSwitch<unsigned>(Name)
- .Case("zero", 0)
- .Case("a0", 4)
- .Case("a1", 5)
- .Case("a2", 6)
- .Case("a3", 7)
- .Case("v0", 2)
- .Case("v1", 3)
- .Case("s0", 16)
- .Case("s1", 17)
- .Case("s2", 18)
- .Case("s3", 19)
- .Case("s4", 20)
- .Case("s5", 21)
- .Case("s6", 22)
- .Case("s7", 23)
- .Case("k0", 26)
- .Case("k1", 27)
- .Case("sp", 29)
- .Case("fp", 30)
- .Case("gp", 28)
- .Case("ra", 31)
- .Case("t0", 8)
- .Case("t1", 9)
- .Case("t2", 10)
- .Case("t3", 11)
- .Case("t4", 12)
- .Case("t5", 13)
- .Case("t6", 14)
- .Case("t7", 15)
- .Case("t8", 24)
- .Case("t9", 25)
- .Default(-1);
+ CC = StringSwitch<unsigned>(Name)
+ .Case("zero", 0)
+ .Case("a0", 4)
+ .Case("a1", 5)
+ .Case("a2", 6)
+ .Case("a3", 7)
+ .Case("v0", 2)
+ .Case("v1", 3)
+ .Case("s0", 16)
+ .Case("s1", 17)
+ .Case("s2", 18)
+ .Case("s3", 19)
+ .Case("s4", 20)
+ .Case("s5", 21)
+ .Case("s6", 22)
+ .Case("s7", 23)
+ .Case("k0", 26)
+ .Case("k1", 27)
+ .Case("sp", 29)
+ .Case("fp", 30)
+ .Case("gp", 28)
+ .Case("ra", 31)
+ .Case("t0", 8)
+ .Case("t1", 9)
+ .Case("t2", 10)
+ .Case("t3", 11)
+ .Case("t4", 12)
+ .Case("t5", 13)
+ .Case("t6", 14)
+ .Case("t7", 15)
+ .Case("t8", 24)
+ .Case("t9", 25)
+ .Default(-1);
// Although SGI documentation just cuts out t0-t3 for n32/n64,
// GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
@@ -813,72 +934,140 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
if (CC == -1 && isMips64())
CC = StringSwitch<unsigned>(Name)
- .Case("a4", 8)
- .Case("a5", 9)
- .Case("a6", 10)
- .Case("a7", 11)
- .Case("kt0", 26)
- .Case("kt1", 27)
- .Case("s8", 30)
- .Default(-1);
+ .Case("a4", 8)
+ .Case("a5", 9)
+ .Case("a6", 10)
+ .Case("a7", 11)
+ .Case("kt0", 26)
+ .Case("kt1", 27)
+ .Case("s8", 30)
+ .Default(-1);
return CC;
}
-int MipsAsmParser::matchFPURegisterName(StringRef Name, FpFormatTy Format) {
+int MipsAsmParser::matchFPURegisterName(StringRef Name) {
if (Name[0] == 'f') {
StringRef NumString = Name.substr(1);
unsigned IntVal;
if (NumString.getAsInteger(10, IntVal))
- return -1; // This is not an integer.
- if (IntVal > 31)
+ return -1; // This is not an integer.
+ if (IntVal > 31) // Maximum index for fpu register.
return -1;
+ return IntVal;
+ }
+ return -1;
+}
- if (Format == FP_FORMAT_S || Format == FP_FORMAT_W)
- return getReg(Mips::FGR32RegClassID, IntVal);
- if (Format == FP_FORMAT_D) {
- if (isFP64()) {
- return getReg(Mips::FGR64RegClassID, IntVal);
- }
- // Only even numbers available as register pairs.
- if ((IntVal > 31) || (IntVal % 2 != 0))
- return -1;
- return getReg(Mips::AFGR64RegClassID, IntVal / 2);
- }
+int MipsAsmParser::matchFCCRegisterName(StringRef Name) {
+
+ if (Name.startswith("fcc")) {
+ StringRef NumString = Name.substr(3);
+ unsigned IntVal;
+ if (NumString.getAsInteger(10, IntVal))
+ return -1; // This is not an integer.
+ if (IntVal > 7) // There are only 8 fcc registers.
+ return -1;
+ return IntVal;
}
return -1;
}
-int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
+int MipsAsmParser::matchACRegisterName(StringRef Name) {
+
+ if (Name.startswith("ac")) {
+ StringRef NumString = Name.substr(2);
+ unsigned IntVal;
+ if (NumString.getAsInteger(10, IntVal))
+ return -1; // This is not an integer.
+ if (IntVal > 3) // There are only 3 acc registers.
+ return -1;
+ return IntVal;
+ }
+ return -1;
+}
- if (Name.equals("fcc0"))
- return Mips::FCC0;
+int MipsAsmParser::matchMSA128RegisterName(StringRef Name) {
+ unsigned IntVal;
+
+ if (Name.front() != 'w' || Name.drop_front(1).getAsInteger(10, IntVal))
+ return -1;
+
+ if (IntVal > 31)
+ return -1;
+
+ return IntVal;
+}
+
+int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) {
+ int CC;
+
+ CC = StringSwitch<unsigned>(Name)
+ .Case("msair", 0)
+ .Case("msacsr", 1)
+ .Case("msaaccess", 2)
+ .Case("msasave", 3)
+ .Case("msamodify", 4)
+ .Case("msarequest", 5)
+ .Case("msamap", 6)
+ .Case("msaunmap", 7)
+ .Default(-1);
+
+ return CC;
+}
+
+int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
int CC;
CC = matchCPURegisterName(Name);
if (CC != -1)
return matchRegisterByNumber(CC, is64BitReg ? Mips::GPR64RegClassID
: Mips::GPR32RegClassID);
- return matchFPURegisterName(Name, getFpFormat());
-}
-
-void MipsAsmParser::setDefaultFpFormat() {
-
- if (isMips64() || isFP64())
- FpFormat = FP_FORMAT_D;
- else
- FpFormat = FP_FORMAT_S;
+ CC = matchFPURegisterName(Name);
+ // TODO: decide about fpu register class
+ if (CC != -1)
+ return matchRegisterByNumber(CC, isFP64() ? Mips::FGR64RegClassID
+ : Mips::FGR32RegClassID);
+ return matchMSA128RegisterName(Name);
}
-void MipsAsmParser::setFpFormat(StringRef Format) {
-
- FpFormat = StringSwitch<FpFormatTy>(Format.lower())
- .Case(".s", FP_FORMAT_S)
- .Case(".d", FP_FORMAT_D)
- .Case(".l", FP_FORMAT_L)
- .Case(".w", FP_FORMAT_W)
- .Default(FP_FORMAT_NONE);
+int MipsAsmParser::regKindToRegClass(int RegKind) {
+
+ switch (RegKind) {
+ case MipsOperand::Kind_GPR32:
+ return Mips::GPR32RegClassID;
+ case MipsOperand::Kind_GPR64:
+ return Mips::GPR64RegClassID;
+ case MipsOperand::Kind_HWRegs:
+ return Mips::HWRegsRegClassID;
+ case MipsOperand::Kind_FGR32Regs:
+ return Mips::FGR32RegClassID;
+ case MipsOperand::Kind_FGRH32Regs:
+ return Mips::FGRH32RegClassID;
+ case MipsOperand::Kind_FGR64Regs:
+ return Mips::FGR64RegClassID;
+ case MipsOperand::Kind_AFGR64Regs:
+ return Mips::AFGR64RegClassID;
+ case MipsOperand::Kind_CCRRegs:
+ return Mips::CCRRegClassID;
+ case MipsOperand::Kind_ACC64DSP:
+ return Mips::ACC64DSPRegClassID;
+ case MipsOperand::Kind_FCCRegs:
+ return Mips::FCCRegClassID;
+ case MipsOperand::Kind_MSA128BRegs:
+ return Mips::MSA128BRegClassID;
+ case MipsOperand::Kind_MSA128HRegs:
+ return Mips::MSA128HRegClassID;
+ case MipsOperand::Kind_MSA128WRegs:
+ return Mips::MSA128WRegClassID;
+ case MipsOperand::Kind_MSA128DRegs:
+ return Mips::MSA128DRegClassID;
+ case MipsOperand::Kind_MSA128CtrlRegs:
+ return Mips::MSACtrlRegClassID;
+ default:
+ return -1;
+ }
}
bool MipsAssemblerOptions::setATReg(unsigned Reg) {
@@ -889,17 +1078,15 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) {
return true;
}
-int MipsAsmParser::getATReg() {
- return Options.getATRegNum();
-}
+int MipsAsmParser::getATReg() { return Options.getATRegNum(); }
unsigned MipsAsmParser::getReg(int RC, int RegNo) {
return *(getContext().getRegisterInfo()->getRegClass(RC).begin() + RegNo);
}
int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) {
-
- if (RegNum > 31)
+ if (RegNum >
+ getContext().getRegisterInfo()->getRegClass(RegClass).getNumRegs())
return -1;
return getReg(RegClass, RegNum);
@@ -914,12 +1101,13 @@ int MipsAsmParser::tryParseRegister(bool is64BitReg) {
RegNum = matchRegisterName(lowerCase, is64BitReg);
} else if (Tok.is(AsmToken::Integer))
RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()),
- is64BitReg ? Mips::GPR64RegClassID : Mips::GPR32RegClassID);
+ is64BitReg ? Mips::GPR64RegClassID
+ : Mips::GPR32RegClassID);
return RegNum;
}
bool MipsAsmParser::tryParseRegisterOperand(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands, bool is64BitReg) {
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands, bool is64BitReg) {
SMLoc S = Parser.getTok().getLoc();
int RegNo = -1;
@@ -928,14 +1116,15 @@ bool MipsAsmParser::tryParseRegisterOperand(
if (RegNo == -1)
return true;
- Operands.push_back(MipsOperand::CreateReg(RegNo, S,
- Parser.getTok().getLoc()));
+ Operands.push_back(
+ MipsOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
Parser.Lex(); // Eat register token.
return false;
}
-bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
- StringRef Mnemonic) {
+bool
+MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ StringRef Mnemonic) {
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -983,22 +1172,39 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return true;
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-
MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier);
-
// Otherwise create a symbol reference.
- const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
- getContext());
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
Operands.push_back(MipsOperand::CreateImm(Res, S, E));
return false;
}
case AsmToken::Identifier:
+ // For instruction aliases like "bc1f $Label" dedicated parser will
+ // eat the '$' sign before failing. So in order to look for appropriate
+ // label we must check first if we have already consumed '$'.
+ if (hasConsumedDollar) {
+ hasConsumedDollar = false;
+ SMLoc S = Parser.getTok().getLoc();
+ StringRef Identifier;
+ if (Parser.parseIdentifier(Identifier))
+ return true;
+ SMLoc E =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier);
+ // Create a symbol reference.
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
+
+ Operands.push_back(MipsOperand::CreateImm(Res, S, E));
+ return false;
+ }
// Look for the existing symbol, we should check if
// we need to assigne the propper RegisterKind.
if (searchSymbolAlias(Operands, MipsOperand::Kind_None))
return false;
- // Else drop to expression parsing.
+ // Else drop to expression parsing.
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -1029,7 +1235,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return true;
}
-const MCExpr* MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
+const MCExpr *MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
StringRef RelocStr) {
const MCExpr *Res;
// Check the type of the expression.
@@ -1099,7 +1305,7 @@ bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
}
bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
- Parser.Lex(); // Eat the % token.
+ Parser.Lex(); // Eat the % token.
const AsmToken &Tok = Parser.getTok(); // Get next token, operation.
if (Tok.isNot(AsmToken::Identifier))
return true;
@@ -1145,7 +1351,7 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
StartLoc = Parser.getTok().getLoc();
RegNo = tryParseRegister(isMips64());
EndLoc = Parser.getTok().getLoc();
- return (RegNo == (unsigned) -1);
+ return (RegNo == (unsigned)-1);
}
bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
@@ -1177,11 +1383,12 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
}
MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
const MCExpr *IdVal = 0;
SMLoc S;
bool isParenExpr = false;
+ MipsAsmParser::OperandMatchResultTy Res = MatchOperand_NoMatch;
// First operand is the offset.
S = Parser.getTok().getLoc();
@@ -1196,21 +1403,20 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
const AsmToken &Tok = Parser.getTok(); // Get the next token.
if (Tok.isNot(AsmToken::LParen)) {
- MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
+ MipsOperand *Mnemonic = static_cast<MipsOperand *>(Operands[0]);
if (Mnemonic->getToken() == "la") {
- SMLoc E = SMLoc::getFromPointer(
- Parser.getTok().getLoc().getPointer() - 1);
+ SMLoc E =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
return MatchOperand_Success;
}
if (Tok.is(AsmToken::EndOfStatement)) {
- SMLoc E = SMLoc::getFromPointer(
- Parser.getTok().getLoc().getPointer() - 1);
+ SMLoc E =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
// Zero register assumed, add a memory operand with ZERO as its base.
- Operands.push_back(MipsOperand::CreateMem(isMips64() ? Mips::ZERO_64
- : Mips::ZERO,
- IdVal, S, E));
+ Operands.push_back(MipsOperand::CreateMem(
+ isMips64() ? Mips::ZERO_64 : Mips::ZERO, IdVal, S, E));
return MatchOperand_Success;
}
Error(Parser.getTok().getLoc(), "'(' expected");
@@ -1220,21 +1426,12 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
Parser.Lex(); // Eat the '(' token.
}
- const AsmToken &Tok1 = Parser.getTok(); // Get next token
- if (Tok1.is(AsmToken::Dollar)) {
- Parser.Lex(); // Eat the '$' token.
- if (tryParseRegisterOperand(Operands, isMips64())) {
- Error(Parser.getTok().getLoc(), "unexpected token in operand");
- return MatchOperand_ParseFail;
- }
-
- } else {
- Error(Parser.getTok().getLoc(), "unexpected token in operand");
- return MatchOperand_ParseFail;
- }
+ Res = parseRegs(Operands, isMips64() ? (int)MipsOperand::Kind_GPR64
+ : (int)MipsOperand::Kind_GPR32);
+ if (Res != MatchOperand_Success)
+ return Res;
- const AsmToken &Tok2 = Parser.getTok(); // Get next token.
- if (Tok2.isNot(AsmToken::RParen)) {
+ if (Parser.getTok().isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "')' expected");
return MatchOperand_ParseFail;
}
@@ -1247,7 +1444,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
IdVal = MCConstantExpr::Create(0, getContext());
// Replace the register operand with the memory operand.
- MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+ MipsOperand *op = static_cast<MipsOperand *>(Operands.back());
int RegNo = op->getReg();
// Remove the register from the operands.
Operands.pop_back();
@@ -1266,31 +1463,150 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
return MatchOperand_Success;
}
+bool MipsAsmParser::parsePtrReg(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ int RegKind) {
+ // If the first token is not '$' we have an error.
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return false;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex();
+ AsmToken::TokenKind TkKind = getLexer().getKind();
+ int Reg;
+
+ if (TkKind == AsmToken::Integer) {
+ Reg = matchRegisterByNumber(Parser.getTok().getIntVal(),
+ regKindToRegClass(RegKind));
+ if (Reg == -1)
+ return false;
+ } else if (TkKind == AsmToken::Identifier) {
+ if ((Reg = matchCPURegisterName(Parser.getTok().getString().lower())) == -1)
+ return false;
+ Reg = getReg(regKindToRegClass(RegKind), Reg);
+ } else {
+ return false;
+ }
+
+ MipsOperand *Op = MipsOperand::CreatePtrReg(Reg, S, Parser.getTok().getLoc());
+ Op->setRegKind((MipsOperand::RegisterKind)RegKind);
+ Operands.push_back(Op);
+ Parser.Lex();
+ return true;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parsePtrReg(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ MipsOperand::RegisterKind RegKind =
+ isN64() ? MipsOperand::Kind_GPR64 : MipsOperand::Kind_GPR32;
+
+ // Parse index register.
+ if (!parsePtrReg(Operands, RegKind))
+ return MatchOperand_NoMatch;
+
+ // Parse '('.
+ if (Parser.getTok().isNot(AsmToken::LParen))
+ return MatchOperand_NoMatch;
+
+ Operands.push_back(MipsOperand::CreateToken("(", getLexer().getLoc()));
+ Parser.Lex();
+
+ // Parse base register.
+ if (!parsePtrReg(Operands, RegKind))
+ return MatchOperand_NoMatch;
+
+ // Parse ')'.
+ if (Parser.getTok().isNot(AsmToken::RParen))
+ return MatchOperand_NoMatch;
+
+ Operands.push_back(MipsOperand::CreateToken(")", getLexer().getLoc()));
+ Parser.Lex();
+
+ return MatchOperand_Success;
+}
+
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+MipsAsmParser::parseRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
int RegKind) {
MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind;
- if (getLexer().getKind() == AsmToken::Identifier) {
+ if (getLexer().getKind() == AsmToken::Identifier && !hasConsumedDollar) {
if (searchSymbolAlias(Operands, Kind))
return MatchOperand_Success;
return MatchOperand_NoMatch;
}
+ SMLoc S = Parser.getTok().getLoc();
// If the first token is not '$', we have an error.
- if (Parser.getTok().isNot(AsmToken::Dollar))
+ if (Parser.getTok().isNot(AsmToken::Dollar) && !hasConsumedDollar)
return MatchOperand_NoMatch;
-
- Parser.Lex(); // Eat $
- if (!tryParseRegisterOperand(Operands,
- RegKind == MipsOperand::Kind_GPR64)) {
- // Set the proper register kind.
- MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
- op->setRegKind(Kind);
- if ((Kind == MipsOperand::Kind_GPR32)
- && (getLexer().is(AsmToken::LParen))) {
+ if (!hasConsumedDollar) {
+ Parser.Lex(); // Eat the '$'
+ hasConsumedDollar = true;
+ }
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ int RegNum = -1;
+ std::string RegName = Parser.getTok().getString().lower();
+ // Match register by name
+ switch (RegKind) {
+ case MipsOperand::Kind_GPR32:
+ case MipsOperand::Kind_GPR64:
+ RegNum = matchCPURegisterName(RegName);
+ break;
+ case MipsOperand::Kind_AFGR64Regs:
+ case MipsOperand::Kind_FGR64Regs:
+ case MipsOperand::Kind_FGR32Regs:
+ case MipsOperand::Kind_FGRH32Regs:
+ RegNum = matchFPURegisterName(RegName);
+ if (RegKind == MipsOperand::Kind_AFGR64Regs)
+ RegNum /= 2;
+ else if (RegKind == MipsOperand::Kind_FGRH32Regs && !isFP64())
+ if (RegNum != -1 && RegNum % 2 != 0)
+ Warning(S, "Float register should be even.");
+ break;
+ case MipsOperand::Kind_FCCRegs:
+ RegNum = matchFCCRegisterName(RegName);
+ break;
+ case MipsOperand::Kind_ACC64DSP:
+ RegNum = matchACRegisterName(RegName);
+ break;
+ default:
+ break; // No match, value is set to -1.
+ }
+ // No match found, return _NoMatch to give a chance to other round.
+ if (RegNum < 0)
+ return MatchOperand_NoMatch;
+
+ int RegVal = getReg(regKindToRegClass(Kind), RegNum);
+ if (RegVal == -1)
+ return MatchOperand_NoMatch;
+
+ MipsOperand *Op =
+ MipsOperand::CreateReg(RegVal, S, Parser.getTok().getLoc());
+ Op->setRegKind(Kind);
+ Operands.push_back(Op);
+ hasConsumedDollar = false;
+ Parser.Lex(); // Eat the register name.
+ return MatchOperand_Success;
+ } else if (getLexer().getKind() == AsmToken::Integer) {
+ unsigned RegNum = Parser.getTok().getIntVal();
+ if (Kind == MipsOperand::Kind_HWRegs) {
+ if (RegNum != 29)
+ return MatchOperand_NoMatch;
+ // Only hwreg 29 is supported, found at index 0.
+ RegNum = 0;
+ }
+ int Reg = matchRegisterByNumber(RegNum, regKindToRegClass(Kind));
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+ MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc());
+ Op->setRegKind(Kind);
+ Operands.push_back(Op);
+ hasConsumedDollar = false;
+ Parser.Lex(); // Eat the register number.
+ if ((RegKind == MipsOperand::Kind_GPR32) &&
+ (getLexer().is(AsmToken::LParen))) {
// Check if it is indexed addressing operand.
Operands.push_back(MipsOperand::CreateToken("(", getLexer().getLoc()));
Parser.Lex(); // Eat the parenthesis.
- if (parseRegs(Operands,RegKind) != MatchOperand_Success)
+ if (parseRegs(Operands, RegKind) != MatchOperand_Success)
return MatchOperand_NoMatch;
if (getLexer().isNot(AsmToken::RParen))
return MatchOperand_NoMatch;
@@ -1302,49 +1618,254 @@ MipsAsmParser::parseRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return MatchOperand_NoMatch;
}
+bool MipsAsmParser::validateMSAIndex(int Val, int RegKind) {
+ MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind;
+
+ if (Val < 0)
+ return false;
+
+ switch (Kind) {
+ default:
+ return false;
+ case MipsOperand::Kind_MSA128BRegs:
+ return Val < 16;
+ case MipsOperand::Kind_MSA128HRegs:
+ return Val < 8;
+ case MipsOperand::Kind_MSA128WRegs:
+ return Val < 4;
+ case MipsOperand::Kind_MSA128DRegs:
+ return Val < 2;
+ }
+}
+
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseGPR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseMSARegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ int RegKind) {
+ MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind;
+ SMLoc S = Parser.getTok().getLoc();
+ std::string RegName;
- if (!isMips64())
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ switch (RegKind) {
+ default:
+ return MatchOperand_ParseFail;
+ case MipsOperand::Kind_MSA128BRegs:
+ case MipsOperand::Kind_MSA128HRegs:
+ case MipsOperand::Kind_MSA128WRegs:
+ case MipsOperand::Kind_MSA128DRegs:
+ break;
+ }
+
+ Parser.Lex(); // Eat the '$'.
+ if (getLexer().getKind() == AsmToken::Identifier)
+ RegName = Parser.getTok().getString().lower();
+ else
+ return MatchOperand_ParseFail;
+
+ int RegNum = matchMSA128RegisterName(RegName);
+
+ if (RegNum < 0 || RegNum > 31)
+ return MatchOperand_ParseFail;
+
+ int RegVal = getReg(regKindToRegClass(Kind), RegNum);
+ if (RegVal == -1)
+ return MatchOperand_ParseFail;
+
+ MipsOperand *Op = MipsOperand::CreateReg(RegVal, S, Parser.getTok().getLoc());
+ Op->setRegKind(Kind);
+ Operands.push_back(Op);
+
+ Parser.Lex(); // Eat the register identifier.
+
+ // MSA registers may be suffixed with an index in the form of:
+ // 1) Immediate expression.
+ // 2) General Purpose Register.
+ // Examples:
+ // 1) copy_s.b $29,$w0[0]
+ // 2) sld.b $w0,$w1[$1]
+
+ if (Parser.getTok().isNot(AsmToken::LBrac))
+ return MatchOperand_Success;
+
+ MipsOperand *Mnemonic = static_cast<MipsOperand *>(Operands[0]);
+
+ Operands.push_back(MipsOperand::CreateToken("[", Parser.getTok().getLoc()));
+ Parser.Lex(); // Parse the '[' token.
+
+ if (Parser.getTok().is(AsmToken::Dollar)) {
+ // This must be a GPR.
+ MipsOperand *RegOp;
+ SMLoc VIdx = Parser.getTok().getLoc();
+ Parser.Lex(); // Parse the '$' token.
+
+ // GPR have aliases and we must account for that. Example: $30 == $fp
+ if (getLexer().getKind() == AsmToken::Integer) {
+ unsigned RegNum = Parser.getTok().getIntVal();
+ int Reg = matchRegisterByNumber(
+ RegNum, regKindToRegClass(MipsOperand::Kind_GPR32));
+ if (Reg == -1) {
+ Error(VIdx, "invalid general purpose register");
+ return MatchOperand_ParseFail;
+ }
+
+ RegOp = MipsOperand::CreateReg(Reg, VIdx, Parser.getTok().getLoc());
+ } else if (getLexer().getKind() == AsmToken::Identifier) {
+ int RegNum = -1;
+ std::string RegName = Parser.getTok().getString().lower();
+
+ RegNum = matchCPURegisterName(RegName);
+ if (RegNum == -1) {
+ Error(VIdx, "general purpose register expected");
+ return MatchOperand_ParseFail;
+ }
+ RegNum = getReg(regKindToRegClass(MipsOperand::Kind_GPR32), RegNum);
+ RegOp = MipsOperand::CreateReg(RegNum, VIdx, Parser.getTok().getLoc());
+ } else
+ return MatchOperand_ParseFail;
+
+ RegOp->setRegKind(MipsOperand::Kind_GPR32);
+ Operands.push_back(RegOp);
+ Parser.Lex(); // Eat the register identifier.
+
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(MipsOperand::CreateToken("]", Parser.getTok().getLoc()));
+ Parser.Lex(); // Parse the ']' token.
+
+ return MatchOperand_Success;
+ }
+
+ // The index must be a constant expression then.
+ SMLoc VIdx = Parser.getTok().getLoc();
+ const MCExpr *ImmVal;
+
+ if (getParser().parseExpression(ImmVal))
+ return MatchOperand_ParseFail;
+
+ const MCConstantExpr *expr = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!expr || !validateMSAIndex((int)expr->getValue(), Kind)) {
+ Error(VIdx, "invalid immediate value");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = Parser.getTok().getEndLoc();
+
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return MatchOperand_ParseFail;
+
+ bool insve =
+ Mnemonic->getToken() == "insve.b" || Mnemonic->getToken() == "insve.h" ||
+ Mnemonic->getToken() == "insve.w" || Mnemonic->getToken() == "insve.d";
+
+ // The second vector index of insve instructions is always 0.
+ if (insve && Operands.size() > 6) {
+ if (expr->getValue() != 0) {
+ Error(VIdx, "immediate value must be 0");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(MipsOperand::CreateToken("0", VIdx));
+ } else
+ Operands.push_back(MipsOperand::CreateImm(expr, VIdx, E));
+
+ Operands.push_back(MipsOperand::CreateToken("]", Parser.getTok().getLoc()));
+
+ Parser.Lex(); // Parse the ']' token.
+
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseMSACtrlRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ int RegKind) {
+ MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind;
+
+ if (Kind != MipsOperand::Kind_MSA128CtrlRegs)
return MatchOperand_NoMatch;
- return parseRegs(Operands, (int) MipsOperand::Kind_GPR64);
+
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_ParseFail;
+
+ SMLoc S = Parser.getTok().getLoc();
+
+ Parser.Lex(); // Eat the '$' symbol.
+
+ int RegNum = -1;
+ if (getLexer().getKind() == AsmToken::Identifier)
+ RegNum = matchMSA128CtrlRegisterName(Parser.getTok().getString().lower());
+ else if (getLexer().getKind() == AsmToken::Integer)
+ RegNum = Parser.getTok().getIntVal();
+ else
+ return MatchOperand_ParseFail;
+
+ if (RegNum < 0 || RegNum > 7)
+ return MatchOperand_ParseFail;
+
+ int RegVal = getReg(regKindToRegClass(Kind), RegNum);
+ if (RegVal == -1)
+ return MatchOperand_ParseFail;
+
+ MipsOperand *RegOp =
+ MipsOperand::CreateReg(RegVal, S, Parser.getTok().getLoc());
+ RegOp->setRegKind(MipsOperand::Kind_MSA128CtrlRegs);
+ Operands.push_back(RegOp);
+ Parser.Lex(); // Eat the register identifier.
+
+ return MatchOperand_Success;
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseGPR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseRegs(Operands, (int) MipsOperand::Kind_GPR32);
+MipsAsmParser::parseGPR64(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+
+ if (!isMips64())
+ return MatchOperand_NoMatch;
+ return parseRegs(Operands, (int)MipsOperand::Kind_GPR64);
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseAFGR64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseGPR32(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_GPR32);
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseAFGR64Regs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
if (isFP64())
return MatchOperand_NoMatch;
- // Double operand is expected, set appropriate format
- setFpFormat(FP_FORMAT_D);
-
- return parseRegs(Operands, (int) MipsOperand::Kind_AFGR64Regs);
+ return parseRegs(Operands, (int)MipsOperand::Kind_AFGR64Regs);
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseFGR64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseFGR64Regs(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
if (!isFP64())
return MatchOperand_NoMatch;
- // Double operand is expected, set appropriate format
- setFpFormat(FP_FORMAT_D);
+ return parseRegs(Operands, (int)MipsOperand::Kind_FGR64Regs);
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseFGR32Regs(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_FGR32Regs);
+}
- return parseRegs(Operands, (int) MipsOperand::Kind_FGR64Regs);
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseFGRH32Regs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_FGRH32Regs);
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseFGR32Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Single operand is expected, set appropriate format
- setFpFormat(FP_FORMAT_S);
- return parseRegs(Operands, (int) MipsOperand::Kind_FGR32Regs);
+MipsAsmParser::parseFCCRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_FCCRegs);
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseFCCRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseACC64DSP(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_ACC64DSP);
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseLO32DSP(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
// If the first token is not '$' we have an error.
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
@@ -1357,19 +1878,19 @@ MipsAsmParser::parseFCCRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
- if (!Tok.getIdentifier().startswith("fcc"))
+ if (!Tok.getIdentifier().startswith("ac"))
return MatchOperand_NoMatch;
- StringRef NumString = Tok.getIdentifier().substr(3);
+ StringRef NumString = Tok.getIdentifier().substr(2);
unsigned IntVal;
if (NumString.getAsInteger(10, IntVal))
return MatchOperand_NoMatch;
- unsigned Reg = matchRegisterByNumber(IntVal, Mips::FCCRegClassID);
+ unsigned Reg = matchRegisterByNumber(IntVal, Mips::LO32DSPRegClassID);
MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc());
- Op->setRegKind(MipsOperand::Kind_FCCRegs);
+ Op->setRegKind(MipsOperand::Kind_LO32DSP);
Operands.push_back(Op);
Parser.Lex(); // Eat the register number.
@@ -1377,7 +1898,7 @@ MipsAsmParser::parseFCCRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseACRegsDSP(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseHI32DSP(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
// If the first token is not '$' we have an error.
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
@@ -1390,27 +1911,78 @@ MipsAsmParser::parseACRegsDSP(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
- if (!Tok.getIdentifier().startswith("acc"))
+ if (!Tok.getIdentifier().startswith("ac"))
return MatchOperand_NoMatch;
- StringRef NumString = Tok.getIdentifier().substr(3);
+ StringRef NumString = Tok.getIdentifier().substr(2);
unsigned IntVal;
if (NumString.getAsInteger(10, IntVal))
return MatchOperand_NoMatch;
- unsigned Reg = matchRegisterByNumber(IntVal, Mips::ACRegsDSPRegClassID);
+ unsigned Reg = matchRegisterByNumber(IntVal, Mips::HI32DSPRegClassID);
MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc());
- Op->setRegKind(MipsOperand::Kind_ACRegsDSP);
+ Op->setRegKind(MipsOperand::Kind_HI32DSP);
Operands.push_back(Op);
Parser.Lex(); // Eat the register number.
return MatchOperand_Success;
}
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCOP2(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ // If the first token is not '$' we have an error.
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the '$'
+
+ const AsmToken &Tok = Parser.getTok(); // Get next token.
+
+ if (Tok.isNot(AsmToken::Integer))
+ return MatchOperand_NoMatch;
+
+ unsigned IntVal = Tok.getIntVal();
+
+ unsigned Reg = matchRegisterByNumber(IntVal, Mips::COP2RegClassID);
+
+ MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc());
+ Op->setRegKind(MipsOperand::Kind_COP2);
+ Operands.push_back(Op);
+
+ Parser.Lex(); // Eat the register number.
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMSA128BRegs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseMSARegs(Operands, (int)MipsOperand::Kind_MSA128BRegs);
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMSA128HRegs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseMSARegs(Operands, (int)MipsOperand::Kind_MSA128HRegs);
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMSA128WRegs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseMSARegs(Operands, (int)MipsOperand::Kind_MSA128WRegs);
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMSA128DRegs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseMSARegs(Operands, (int)MipsOperand::Kind_MSA128DRegs);
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMSA128CtrlRegs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseMSACtrlRegs(Operands, (int)MipsOperand::Kind_MSA128CtrlRegs);
+}
+
bool MipsAsmParser::searchSymbolAlias(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands, unsigned RegKind) {
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands, unsigned RegKind) {
MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
if (Sym) {
@@ -1421,38 +1993,39 @@ bool MipsAsmParser::searchSymbolAlias(
else
return false;
if (Expr->getKind() == MCExpr::SymbolRef) {
- MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind) RegKind;
- const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind;
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
const StringRef DefSymbol = Ref->getSymbol().getName();
if (DefSymbol.startswith("$")) {
int RegNum = -1;
APInt IntVal(32, -1);
if (!DefSymbol.substr(1).getAsInteger(10, IntVal))
RegNum = matchRegisterByNumber(IntVal.getZExtValue(),
- isMips64()
- ? Mips::GPR64RegClassID
- : Mips::GPR32RegClassID);
+ isMips64() ? Mips::GPR64RegClassID
+ : Mips::GPR32RegClassID);
else {
// Lookup for the register with the corresponding name.
switch (Kind) {
case MipsOperand::Kind_AFGR64Regs:
case MipsOperand::Kind_FGR64Regs:
- RegNum = matchFPURegisterName(DefSymbol.substr(1), FP_FORMAT_D);
+ RegNum = matchFPURegisterName(DefSymbol.substr(1));
break;
case MipsOperand::Kind_FGR32Regs:
- RegNum = matchFPURegisterName(DefSymbol.substr(1), FP_FORMAT_S);
+ RegNum = matchFPURegisterName(DefSymbol.substr(1));
break;
case MipsOperand::Kind_GPR64:
case MipsOperand::Kind_GPR32:
default:
- RegNum = matchRegisterName(DefSymbol.substr(1), isMips64());
+ RegNum = matchCPURegisterName(DefSymbol.substr(1));
break;
}
+ if (RegNum > -1)
+ RegNum = getReg(regKindToRegClass(Kind), RegNum);
}
if (RegNum > -1) {
Parser.Lex();
- MipsOperand *op = MipsOperand::CreateReg(RegNum, S,
- Parser.getTok().getLoc());
+ MipsOperand *op =
+ MipsOperand::CreateReg(RegNum, S, Parser.getTok().getLoc());
op->setRegKind(Kind);
Operands.push_back(op);
return true;
@@ -1460,9 +2033,9 @@ bool MipsAsmParser::searchSymbolAlias(
}
} else if (Expr->getKind() == MCExpr::Constant) {
Parser.Lex();
- const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr);
- MipsOperand *op = MipsOperand::CreateImm(Const, S,
- Parser.getTok().getLoc());
+ const MCConstantExpr *Const = static_cast<const MCConstantExpr *>(Expr);
+ MipsOperand *op =
+ MipsOperand::CreateImm(Const, S, Parser.getTok().getLoc());
Operands.push_back(op);
return true;
}
@@ -1471,115 +2044,101 @@ bool MipsAsmParser::searchSymbolAlias(
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-
- // If the first token is not '$' we have error.
- if (Parser.getTok().isNot(AsmToken::Dollar))
- return MatchOperand_NoMatch;
- SMLoc S = Parser.getTok().getLoc();
- Parser.Lex(); // Eat the '$'.
-
- const AsmToken &Tok = Parser.getTok(); // Get the next token.
- if (Tok.isNot(AsmToken::Integer))
- return MatchOperand_NoMatch;
-
- unsigned RegNum = Tok.getIntVal();
- // At the moment only hwreg29 is supported.
- if (RegNum != 29)
- return MatchOperand_ParseFail;
-
- MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S,
- Parser.getTok().getLoc());
- op->setRegKind(MipsOperand::Kind_HWRegs);
- Operands.push_back(op);
-
- Parser.Lex(); // Eat the register number.
- return MatchOperand_Success;
+MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_HWRegs);
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseHW64Regs(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_CCRRegs);
+}
- if (!isMips64())
- return MatchOperand_NoMatch;
- // If the first token is not '$' we have an error.
- if (Parser.getTok().isNot(AsmToken::Dollar))
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ const MCExpr *IdVal;
+ // If the first token is '$' we may have register operand.
+ if (Parser.getTok().is(AsmToken::Dollar))
return MatchOperand_NoMatch;
SMLoc S = Parser.getTok().getLoc();
- Parser.Lex(); // Eat $
-
- const AsmToken &Tok = Parser.getTok(); // Get the next token.
- if (Tok.isNot(AsmToken::Integer))
- return MatchOperand_NoMatch;
-
- unsigned RegNum = Tok.getIntVal();
- // At the moment only hwreg29 is supported.
- if (RegNum != 29)
+ if (getParser().parseExpression(IdVal))
return MatchOperand_ParseFail;
-
- MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S,
- Parser.getTok().getLoc());
- op->setRegKind(MipsOperand::Kind_HW64Regs);
- Operands.push_back(op);
-
- Parser.Lex(); // Eat the register number.
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal);
+ assert(MCE && "Unexpected MCExpr type.");
+ int64_t Val = MCE->getValue();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(MipsOperand::CreateImm(
+ MCConstantExpr::Create(0 - Val, getContext()), S, E));
return MatchOperand_Success;
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // If the first token is not '$' we have an error.
- if (Parser.getTok().isNot(AsmToken::Dollar))
+MipsAsmParser::parseLSAImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ switch (getLexer().getKind()) {
+ default:
return MatchOperand_NoMatch;
+ case AsmToken::LParen:
+ case AsmToken::Plus:
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ break;
+ }
+ const MCExpr *Expr;
SMLoc S = Parser.getTok().getLoc();
- Parser.Lex(); // Eat the '$'
-
- const AsmToken &Tok = Parser.getTok(); // Get next token.
- if (Tok.isNot(AsmToken::Integer))
- return MatchOperand_NoMatch;
+ if (getParser().parseExpression(Expr))
+ return MatchOperand_ParseFail;
- unsigned Reg = matchRegisterByNumber(Tok.getIntVal(), Mips::CCRRegClassID);
+ int64_t Val;
+ if (!Expr->EvaluateAsAbsolute(Val)) {
+ Error(S, "expected immediate value");
+ return MatchOperand_ParseFail;
+ }
- MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc());
- Op->setRegKind(MipsOperand::Kind_CCRRegs);
- Operands.push_back(Op);
+ // The LSA instruction allows a 2-bit unsigned immediate. For this reason
+ // and because the CPU always adds one to the immediate field, the allowed
+ // range becomes 1..4. We'll only check the range here and will deal
+ // with the addition/subtraction when actually decoding/encoding
+ // the instruction.
+ if (Val < 1 || Val > 4) {
+ Error(S, "immediate not in range (1..4)");
+ return MatchOperand_ParseFail;
+ }
- Parser.Lex(); // Eat the register number.
+ Operands.push_back(MipsOperand::CreateLSAImm(Expr, S,
+ Parser.getTok().getLoc()));
return MatchOperand_Success;
}
MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
- MCSymbolRefExpr::VariantKind VK
- = StringSwitch<MCSymbolRefExpr::VariantKind>(Symbol)
- .Case("hi", MCSymbolRefExpr::VK_Mips_ABS_HI)
- .Case("lo", MCSymbolRefExpr::VK_Mips_ABS_LO)
- .Case("gp_rel", MCSymbolRefExpr::VK_Mips_GPREL)
- .Case("call16", MCSymbolRefExpr::VK_Mips_GOT_CALL)
- .Case("got", MCSymbolRefExpr::VK_Mips_GOT)
- .Case("tlsgd", MCSymbolRefExpr::VK_Mips_TLSGD)
- .Case("tlsldm", MCSymbolRefExpr::VK_Mips_TLSLDM)
- .Case("dtprel_hi", MCSymbolRefExpr::VK_Mips_DTPREL_HI)
- .Case("dtprel_lo", MCSymbolRefExpr::VK_Mips_DTPREL_LO)
- .Case("gottprel", MCSymbolRefExpr::VK_Mips_GOTTPREL)
- .Case("tprel_hi", MCSymbolRefExpr::VK_Mips_TPREL_HI)
- .Case("tprel_lo", MCSymbolRefExpr::VK_Mips_TPREL_LO)
- .Case("got_disp", MCSymbolRefExpr::VK_Mips_GOT_DISP)
- .Case("got_page", MCSymbolRefExpr::VK_Mips_GOT_PAGE)
- .Case("got_ofst", MCSymbolRefExpr::VK_Mips_GOT_OFST)
- .Case("hi(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_HI)
- .Case("lo(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_LO)
- .Default(MCSymbolRefExpr::VK_None);
+ MCSymbolRefExpr::VariantKind VK =
+ StringSwitch<MCSymbolRefExpr::VariantKind>(Symbol)
+ .Case("hi", MCSymbolRefExpr::VK_Mips_ABS_HI)
+ .Case("lo", MCSymbolRefExpr::VK_Mips_ABS_LO)
+ .Case("gp_rel", MCSymbolRefExpr::VK_Mips_GPREL)
+ .Case("call16", MCSymbolRefExpr::VK_Mips_GOT_CALL)
+ .Case("got", MCSymbolRefExpr::VK_Mips_GOT)
+ .Case("tlsgd", MCSymbolRefExpr::VK_Mips_TLSGD)
+ .Case("tlsldm", MCSymbolRefExpr::VK_Mips_TLSLDM)
+ .Case("dtprel_hi", MCSymbolRefExpr::VK_Mips_DTPREL_HI)
+ .Case("dtprel_lo", MCSymbolRefExpr::VK_Mips_DTPREL_LO)
+ .Case("gottprel", MCSymbolRefExpr::VK_Mips_GOTTPREL)
+ .Case("tprel_hi", MCSymbolRefExpr::VK_Mips_TPREL_HI)
+ .Case("tprel_lo", MCSymbolRefExpr::VK_Mips_TPREL_LO)
+ .Case("got_disp", MCSymbolRefExpr::VK_Mips_GOT_DISP)
+ .Case("got_page", MCSymbolRefExpr::VK_Mips_GOT_PAGE)
+ .Case("got_ofst", MCSymbolRefExpr::VK_Mips_GOT_OFST)
+ .Case("hi(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_HI)
+ .Case("lo(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_LO)
+ .Default(MCSymbolRefExpr::VK_None);
return VK;
}
-bool MipsAsmParser::
-ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool MipsAsmParser::ParseInstruction(
+ ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
// Check if we have valid mnemonic
if (!mnemonicIsValid(Name, 0)) {
Parser.eatToEndOfStatement();
@@ -1757,12 +2316,13 @@ bool MipsAsmParser::parseSetAssignment() {
// We have a '$' followed by something, make sure they are adjacent.
if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer())
return true;
- StringRef Res = StringRef(DollarLoc.getPointer(),
- getTok().getEndLoc().getPointer() - DollarLoc.getPointer());
+ StringRef Res =
+ StringRef(DollarLoc.getPointer(),
+ getTok().getEndLoc().getPointer() - DollarLoc.getPointer());
Symbol = getContext().GetOrCreateSymbol(Res);
Parser.Lex();
- Value = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
- getContext());
+ Value =
+ MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, getContext());
} else if (Parser.parseExpression(Value))
return reportParseError("expected valid expression after comma");
@@ -1810,6 +2370,34 @@ bool MipsAsmParser::parseDirectiveSet() {
return true;
}
+bool MipsAsmParser::parseDirectiveMipsHackStocg() {
+ MCAsmParser &Parser = getParser();
+ StringRef Name;
+ if (Parser.parseIdentifier(Name))
+ reportParseError("expected identifier");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token");
+ Lex();
+
+ int64_t Flags = 0;
+ if (Parser.parseAbsoluteExpression(Flags))
+ return TokError("unexpected token");
+
+ getTargetStreamer().emitMipsHackSTOCG(Sym, Flags);
+ return false;
+}
+
+bool MipsAsmParser::parseDirectiveMipsHackELFFlags() {
+ int64_t Flags = 0;
+ if (Parser.parseAbsoluteExpression(Flags))
+ return TokError("unexpected token");
+
+ getTargetStreamer().emitMipsHackELFFlags(Flags);
+ return false;
+}
+
/// parseDirectiveWord
/// ::= .word [ expression (, expression)* ]
bool MipsAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
@@ -1835,6 +2423,22 @@ bool MipsAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
+/// parseDirectiveGpWord
+/// ::= .gpword local_sym
+bool MipsAsmParser::parseDirectiveGpWord() {
+ const MCExpr *Value;
+ // EmitGPRel32Value requires an expression, so we are using base class
+ // method to evaluate the expression.
+ if (getParser().parseExpression(Value))
+ return true;
+ getParser().getStreamer().EmitGPRel32Value(Value);
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(getLexer().getLoc(), "unexpected token in directive");
+ Parser.Lex(); // Eat EndOfStatement token.
+ return false;
+}
+
bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
@@ -1875,7 +2479,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".gpword") {
// Ignore this directive for now.
- Parser.eatToEndOfStatement();
+ parseDirectiveGpWord();
return false;
}
@@ -1884,6 +2488,12 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
}
+ if (IDVal == ".mips_hack_stocg")
+ return parseDirectiveMipsHackStocg();
+
+ if (IDVal == ".mips_hack_elf_flags")
+ return parseDirectiveMipsHackELFFlags();
+
return true;
}
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index aedb78b..6acc9a8 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -35,7 +35,6 @@ add_llvm_target(MipsCodeGen
MipsMachineFunction.cpp
MipsModuleISelDAGToDAG.cpp
MipsOs16.cpp
- MipsOptimizeMathLibCalls.cpp
MipsRegisterInfo.cpp
MipsSEFrameLowering.cpp
MipsSEInstrInfo.cpp
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index d99df4d..60508a8 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -35,26 +35,33 @@ public:
///
MipsDisassemblerBase(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
bool bigEndian) :
- MCDisassembler(STI), RegInfo(Info), isBigEndian(bigEndian) {}
+ MCDisassembler(STI), RegInfo(Info),
+ IsN64(STI.getFeatureBits() & Mips::FeatureN64), isBigEndian(bigEndian) {}
virtual ~MipsDisassemblerBase() {}
const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); }
+ bool isN64() const { return IsN64; }
+
private:
OwningPtr<const MCRegisterInfo> RegInfo;
+ bool IsN64;
protected:
bool isBigEndian;
};
/// MipsDisassembler - a disasembler class for Mips32.
class MipsDisassembler : public MipsDisassemblerBase {
+ bool IsMicroMips;
public:
/// Constructor - Initializes the disassembler.
///
MipsDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
bool bigEndian) :
- MipsDisassemblerBase(STI, Info, bigEndian) {}
+ MipsDisassemblerBase(STI, Info, bigEndian) {
+ IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips;
+ }
/// getInstruction - See MCDisassembler.
virtual DecodeStatus getInstruction(MCInst &instr,
@@ -103,10 +110,15 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodePtrRegisterClass(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
unsigned RegNo,
@@ -118,6 +130,11 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeFGRH32RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -138,20 +155,45 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
-static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
-static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
@@ -163,11 +205,38 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+// DecodeBranchTargetMM - Decode microMIPS branch offset, which is
+// shifted left by 1 bit.
+static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder);
+
+// DecodeJumpTargetMM - Decode microMIPS jump target, which is
+// shifted left by 1 bit.
+static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeMem(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMemMMImm16(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
@@ -177,6 +246,13 @@ static DecodeStatus DecodeSimm16(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+// Decode the immediate field of an LSA instruction which
+// is off by one.
+static DecodeStatus DecodeLSAImm(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeInsSize(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -237,7 +313,8 @@ static DecodeStatus readInstruction32(const MemoryObject &region,
uint64_t address,
uint64_t &size,
uint32_t &insn,
- bool isBigEndian) {
+ bool isBigEndian,
+ bool IsMicroMips) {
uint8_t Bytes[4];
// We want to read exactly 4 Bytes of data.
@@ -255,10 +332,20 @@ static DecodeStatus readInstruction32(const MemoryObject &region,
}
else {
// Encoded as a small-endian 32-bit word in the stream.
- insn = (Bytes[0] << 0) |
- (Bytes[1] << 8) |
- (Bytes[2] << 16) |
- (Bytes[3] << 24);
+ // Little-endian byte ordering:
+ // mips32r2: 4 | 3 | 2 | 1
+ // microMIPS: 2 | 1 | 4 | 3
+ if (IsMicroMips) {
+ insn = (Bytes[2] << 0) |
+ (Bytes[3] << 8) |
+ (Bytes[0] << 16) |
+ (Bytes[1] << 24);
+ } else {
+ insn = (Bytes[0] << 0) |
+ (Bytes[1] << 8) |
+ (Bytes[2] << 16) |
+ (Bytes[3] << 24);
+ }
}
return MCDisassembler::Success;
@@ -274,10 +361,21 @@ MipsDisassembler::getInstruction(MCInst &instr,
uint32_t Insn;
DecodeStatus Result = readInstruction32(Region, Address, Size,
- Insn, isBigEndian);
+ Insn, isBigEndian, IsMicroMips);
if (Result == MCDisassembler::Fail)
return MCDisassembler::Fail;
+ if (IsMicroMips) {
+ // Calling the auto-generated decoder function.
+ Result = decodeInstruction(DecoderTableMicroMips32, instr, Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+ return MCDisassembler::Fail;
+ }
+
// Calling the auto-generated decoder function.
Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
this, STI);
@@ -299,7 +397,7 @@ Mips64Disassembler::getInstruction(MCInst &instr,
uint32_t Insn;
DecodeStatus Result = readInstruction32(Region, Address, Size,
- Insn, isBigEndian);
+ Insn, isBigEndian, false);
if (Result == MCDisassembler::Fail)
return MCDisassembler::Fail;
@@ -359,10 +457,20 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodePtrRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (static_cast<const MipsDisassembler *>(Decoder)->isN64())
+ return DecodeGPR64RegisterClass(Inst, RegNo, Address, Decoder);
+
+ return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder);
}
@@ -390,6 +498,18 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeFGRH32RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::FGRH32RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -434,6 +554,58 @@ static DecodeStatus DecodeMem(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10));
+ unsigned Reg = fieldFromInstruction(Insn, 6, 5);
+ unsigned Base = fieldFromInstruction(Insn, 11, 5);
+
+ Reg = getReg(Decoder, Mips::MSA128BRegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<12>(Insn & 0x0fff);
+ unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemMMImm16(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeFMem(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -477,38 +649,98 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo >= 4)
return MCDisassembler::Fail;
- unsigned Reg = getReg(Decoder, Mips::ACRegsDSPRegClassID, RegNo);
+ unsigned Reg = getReg(Decoder, Mips::ACC64DSPRegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo >= 4)
return MCDisassembler::Fail;
- unsigned Reg = getReg(Decoder, Mips::HIRegsDSPRegClassID, RegNo);
+ unsigned Reg = getReg(Decoder, Mips::HI32DSPRegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo >= 4)
return MCDisassembler::Fail;
- unsigned Reg = getReg(Decoder, Mips::LORegsDSPRegClassID, RegNo);
+ unsigned Reg = getReg(Decoder, Mips::LO32DSPRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::MSA128BRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::MSA128HRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::MSA128WRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::MSA128DRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::MSACtrlRegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
@@ -533,6 +765,24 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned BranchOffset = Offset & 0xffff;
+ BranchOffset = SignExtend32<18>(BranchOffset << 1);
+ Inst.addOperand(MCOperand::CreateImm(BranchOffset));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 1;
+ Inst.addOperand(MCOperand::CreateImm(JumpOffset));
+ return MCDisassembler::Success;
+}
static DecodeStatus DecodeSimm16(MCInst &Inst,
unsigned Insn,
@@ -542,6 +792,15 @@ static DecodeStatus DecodeSimm16(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeLSAImm(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // We add one to the immediate field as it was encoded as 'imm - 1'.
+ Inst.addOperand(MCOperand::CreateImm(Insn + 1));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeInsSize(MCInst &Inst,
unsigned Insn,
uint64_t Address,
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 369fece..7884589 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -184,6 +184,15 @@ void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
printOperand(MI, opNum, O);
}
+void MipsInstPrinter::printUnsignedImm8(const MCInst *MI, int opNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (unsigned short int)(unsigned char)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
void MipsInstPrinter::
printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
// Load/Store memory operands -- imm($reg)
@@ -211,6 +220,11 @@ printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
O << MipsFCCToString((Mips::CondCode)MO.getImm());
}
+void MipsInstPrinter::
+printSHFMask(const MCInst *MI, int opNum, raw_ostream &O) {
+ llvm_unreachable("TODO");
+}
+
bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
unsigned OpNo, raw_ostream &OS) {
OS << "\t" << Str << "\t";
@@ -230,8 +244,11 @@ bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) {
switch (MI.getOpcode()) {
case Mips::BEQ:
+ // beq $zero, $zero, $L2 => b $L2
// beq $r0, $zero, $L2 => beqz $r0, $L2
- return isReg<Mips::ZERO>(MI, 1) && printAlias("beqz", MI, 0, 2, OS);
+ return (isReg<Mips::ZERO>(MI, 0) && isReg<Mips::ZERO>(MI, 1) &&
+ printAlias("b", MI, 2, OS)) ||
+ (isReg<Mips::ZERO>(MI, 1) && printAlias("beqz", MI, 0, 2, OS));
case Mips::BEQ64:
// beq $r0, $zero, $L2 => beqz $r0, $L2
return isReg<Mips::ZERO_64>(MI, 1) && printAlias("beqz", MI, 0, 2, OS);
@@ -257,6 +274,7 @@ bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) {
// jalr $ra, $r1 => jalr $r1
return isReg<Mips::RA_64>(MI, 0) && printAlias("jalr", MI, 1, OS);
case Mips::NOR:
+ case Mips::NOR_MM:
// nor $r0, $r1, $zero => not $r0, $r1
return isReg<Mips::ZERO>(MI, 2) && printAlias("not", MI, 0, 1, OS);
case Mips::NOR64:
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 1253ab0..f75ae24 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -93,9 +93,11 @@ public:
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O);
+ void printUnsignedImm8(const MCInst *MI, int opNum, raw_ostream &O);
void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
+ void printSHFMask(const MCInst *MI, int opNum, raw_ostream &O);
bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo,
raw_ostream &OS);
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index 1f08789..9116748 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -5,7 +5,7 @@ add_llvm_library(LLVMMipsDesc
MipsMCTargetDesc.cpp
MipsELFObjectWriter.cpp
MipsReginfo.cpp
- MipsELFStreamer.cpp
+ MipsTargetStreamer.cpp
)
add_dependencies(LLVMMipsDesc MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 0b13607..3e70b23 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -45,6 +45,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case Mips::fixup_Mips_GOT_DISP:
case Mips::fixup_Mips_GOT_LO16:
case Mips::fixup_Mips_CALL_LO16:
+ case Mips::fixup_MICROMIPS_LO16:
+ case Mips::fixup_MICROMIPS_GOT_PAGE:
+ case Mips::fixup_MICROMIPS_GOT_OFST:
+ case Mips::fixup_MICROMIPS_GOT_DISP:
break;
case Mips::fixup_Mips_PC16:
// So far we are only using this type for branches.
@@ -65,6 +69,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case Mips::fixup_Mips_GOT_Local:
case Mips::fixup_Mips_GOT_HI16:
case Mips::fixup_Mips_CALL_HI16:
+ case Mips::fixup_MICROMIPS_HI16:
// Get the 2nd 16-bits. Also add 1 if bit 15 is 1.
Value = ((Value + 0x8000) >> 16) & 0xffff;
break;
@@ -76,6 +81,13 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
// Get the 4th 16-bits.
Value = ((Value + 0x800080008000LL) >> 48) & 0xffff;
break;
+ case Mips::fixup_MICROMIPS_26_S1:
+ Value >>= 1;
+ break;
+ case Mips::fixup_MICROMIPS_PC16_S1:
+ Value -= 4;
+ Value >>= 1;
+ break;
}
return Value;
@@ -188,7 +200,20 @@ public:
{ "fixup_Mips_GOT_HI16", 0, 16, 0 },
{ "fixup_Mips_GOT_LO16", 0, 16, 0 },
{ "fixup_Mips_CALL_HI16", 0, 16, 0 },
- { "fixup_Mips_CALL_LO16", 0, 16, 0 }
+ { "fixup_Mips_CALL_LO16", 0, 16, 0 },
+ { "fixup_MICROMIPS_26_S1", 0, 26, 0 },
+ { "fixup_MICROMIPS_HI16", 0, 16, 0 },
+ { "fixup_MICROMIPS_LO16", 0, 16, 0 },
+ { "fixup_MICROMIPS_GOT16", 0, 16, 0 },
+ { "fixup_MICROMIPS_PC16_S1", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MICROMIPS_CALL16", 0, 16, 0 },
+ { "fixup_MICROMIPS_GOT_DISP", 0, 16, 0 },
+ { "fixup_MICROMIPS_GOT_PAGE", 0, 16, 0 },
+ { "fixup_MICROMIPS_GOT_OFST", 0, 16, 0 },
+ { "fixup_MICROMIPS_TLS_DTPREL_HI16", 0, 16, 0 },
+ { "fixup_MICROMIPS_TLS_DTPREL_LO16", 0, 16, 0 },
+ { "fixup_MICROMIPS_TLS_TPREL_HI16", 0, 16, 0 },
+ { "fixup_MICROMIPS_TLS_TPREL_LO16", 0, 16, 0 }
};
if (Kind < FirstTargetFixupKind)
@@ -253,25 +278,33 @@ public:
} // namespace
// MCAsmBackend
-MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T, StringRef TT,
+MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
StringRef CPU) {
return new MipsAsmBackend(T, Triple(TT).getOS(),
/*IsLittle*/true, /*Is64Bit*/false);
}
-MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T, StringRef TT,
+MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
StringRef CPU) {
return new MipsAsmBackend(T, Triple(TT).getOS(),
/*IsLittle*/false, /*Is64Bit*/false);
}
-MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T, StringRef TT,
+MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
StringRef CPU) {
return new MipsAsmBackend(T, Triple(TT).getOS(),
/*IsLittle*/true, /*Is64Bit*/true);
}
-MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T, StringRef TT,
+MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
StringRef CPU) {
return new MipsAsmBackend(T, Triple(TT).getOS(),
/*IsLittle*/false, /*Is64Bit*/true);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 6471b51..83c7d4b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -183,6 +183,45 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case Mips::fixup_Mips_CALL_LO16:
Type = ELF::R_MIPS_CALL_LO16;
break;
+ case Mips::fixup_MICROMIPS_26_S1:
+ Type = ELF::R_MICROMIPS_26_S1;
+ break;
+ case Mips::fixup_MICROMIPS_HI16:
+ Type = ELF::R_MICROMIPS_HI16;
+ break;
+ case Mips::fixup_MICROMIPS_LO16:
+ Type = ELF::R_MICROMIPS_LO16;
+ break;
+ case Mips::fixup_MICROMIPS_GOT16:
+ Type = ELF::R_MICROMIPS_GOT16;
+ break;
+ case Mips::fixup_MICROMIPS_PC16_S1:
+ Type = ELF::R_MICROMIPS_PC16_S1;
+ break;
+ case Mips::fixup_MICROMIPS_CALL16:
+ Type = ELF::R_MICROMIPS_CALL16;
+ break;
+ case Mips::fixup_MICROMIPS_GOT_DISP:
+ Type = ELF::R_MICROMIPS_GOT_DISP;
+ break;
+ case Mips::fixup_MICROMIPS_GOT_PAGE:
+ Type = ELF::R_MICROMIPS_GOT_PAGE;
+ break;
+ case Mips::fixup_MICROMIPS_GOT_OFST:
+ Type = ELF::R_MICROMIPS_GOT_OFST;
+ break;
+ case Mips::fixup_MICROMIPS_TLS_DTPREL_HI16:
+ Type = ELF::R_MICROMIPS_TLS_DTPREL_HI16;
+ break;
+ case Mips::fixup_MICROMIPS_TLS_DTPREL_LO16:
+ Type = ELF::R_MICROMIPS_TLS_DTPREL_LO16;
+ break;
+ case Mips::fixup_MICROMIPS_TLS_TPREL_HI16:
+ Type = ELF::R_MICROMIPS_TLS_TPREL_HI16;
+ break;
+ case Mips::fixup_MICROMIPS_TLS_TPREL_LO16:
+ Type = ELF::R_MICROMIPS_TLS_TPREL_LO16;
+ break;
}
return Type;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
deleted file mode 100644
index cfcb877..0000000
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-//===-- MipsELFStreamer.cpp - MipsELFStreamer ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===-------------------------------------------------------------------===//
-#include "MCTargetDesc/MipsELFStreamer.h"
-#include "MipsSubtarget.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCELF.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-
- MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack) {
- MipsELFStreamer *S = new MipsELFStreamer(Context, TAB, OS, Emitter,
- RelaxAll, NoExecStack);
- return S;
- }
-
- // For llc. Set a group of ELF header flags
- void
- MipsELFStreamer::emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget) {
-
- if (hasRawTextSupport())
- return;
-
- // Update e_header flags
- MCAssembler& MCA = getAssembler();
- unsigned EFlags = MCA.getELFHeaderEFlags();
-
- // TODO: Need to add -mabicalls and -mno-abicalls flags.
- // Currently we assume that -mabicalls is the default.
- EFlags |= ELF::EF_MIPS_CPIC;
-
- if (Subtarget.inMips16Mode())
- EFlags |= ELF::EF_MIPS_ARCH_ASE_M16;
- else
- EFlags |= ELF::EF_MIPS_NOREORDER;
-
- // Architecture
- if (Subtarget.hasMips64r2())
- EFlags |= ELF::EF_MIPS_ARCH_64R2;
- else if (Subtarget.hasMips64())
- EFlags |= ELF::EF_MIPS_ARCH_64;
- else if (Subtarget.hasMips32r2())
- EFlags |= ELF::EF_MIPS_ARCH_32R2;
- else
- EFlags |= ELF::EF_MIPS_ARCH_32;
-
- if (Subtarget.inMicroMipsMode())
- EFlags |= ELF::EF_MIPS_MICROMIPS;
-
- // ABI
- if (Subtarget.isABI_O32())
- EFlags |= ELF::EF_MIPS_ABI_O32;
-
- // Relocation Model
- Reloc::Model RM = Subtarget.getRelocationModel();
- if (RM == Reloc::PIC_ || RM == Reloc::Default)
- EFlags |= ELF::EF_MIPS_PIC;
- else if (RM == Reloc::Static)
- ; // Do nothing for Reloc::Static
- else
- llvm_unreachable("Unsupported relocation model for e_flags");
-
- MCA.setELFHeaderEFlags(EFlags);
- }
-
- // For llc. Set a symbol's STO flags
- void
- MipsELFStreamer::emitMipsSTOCG(const MipsSubtarget &Subtarget,
- MCSymbol *Sym,
- unsigned Val) {
-
- if (hasRawTextSupport())
- return;
-
- MCSymbolData &Data = getOrCreateSymbolData(Sym);
- // The "other" values are stored in the last 6 bits of the second byte
- // The traditional defines for STO values assume the full byte and thus
- // the shift to pack it.
- MCELF::setOther(Data, Val >> 2);
- }
-
-} // namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
deleted file mode 100644
index b10ccc7..0000000
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//=== MipsELFStreamer.h - MipsELFStreamer ------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENCE.TXT for details.
-//
-//===-------------------------------------------------------------------===//
-#ifndef MIPSELFSTREAMER_H_
-#define MIPSELFSTREAMER_H_
-
-#include "llvm/MC/MCELFStreamer.h"
-
-namespace llvm {
-class MipsAsmPrinter;
-class MipsSubtarget;
-class MCSymbol;
-
-class MipsELFStreamer : public MCELFStreamer {
-public:
- MipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack)
- : MCELFStreamer(SK_MipsELFStreamer, Context, TAB, OS, Emitter) {
- }
-
- ~MipsELFStreamer() {}
- void emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget);
- void emitMipsSTOCG(const MipsSubtarget &Subtarget,
- MCSymbol *Sym,
- unsigned Val);
-
- static bool classof(const MCStreamer *S) {
- return S->getKind() == SK_MipsELFStreamer;
- }
-};
-
- MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack);
-}
-
-#endif /* MIPSELFSTREAMER_H_ */
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index f963900..6ed44b7 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -128,6 +128,45 @@ namespace Mips {
// resulting in - R_MIPS_CALL_LO16
fixup_Mips_CALL_LO16,
+ // resulting in - R_MICROMIPS_26_S1
+ fixup_MICROMIPS_26_S1,
+
+ // resulting in - R_MICROMIPS_HI16
+ fixup_MICROMIPS_HI16,
+
+ // resulting in - R_MICROMIPS_LO16
+ fixup_MICROMIPS_LO16,
+
+ // resulting in - R_MICROMIPS_GOT16
+ fixup_MICROMIPS_GOT16,
+
+ // resulting in - R_MICROMIPS_PC16_S1
+ fixup_MICROMIPS_PC16_S1,
+
+ // resulting in - R_MICROMIPS_CALL16
+ fixup_MICROMIPS_CALL16,
+
+ // resulting in - R_MICROMIPS_GOT_DISP
+ fixup_MICROMIPS_GOT_DISP,
+
+ // resulting in - R_MICROMIPS_GOT_PAGE
+ fixup_MICROMIPS_GOT_PAGE,
+
+ // resulting in - R_MICROMIPS_GOT_OFST
+ fixup_MICROMIPS_GOT_OFST,
+
+ // resulting in - R_MICROMIPS_TLS_DTPREL_HI16
+ fixup_MICROMIPS_TLS_DTPREL_HI16,
+
+ // resulting in - R_MICROMIPS_TLS_DTPREL_LO16
+ fixup_MICROMIPS_TLS_DTPREL_LO16,
+
+ // resulting in - R_MICROMIPS_TLS_TPREL_HI16
+ fixup_MICROMIPS_TLS_TPREL_HI16,
+
+ // resulting in - R_MICROMIPS_TLS_TPREL_LO16
+ fixup_MICROMIPS_TLS_TPREL_LO16,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 33f6f96..6aa3c76 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -38,7 +38,6 @@ MipsMCAsmInfo::MipsMCAsmInfo(StringRef TT) {
ZeroDirective = "\t.space\t";
GPRel32Directive = "\t.gpword\t";
GPRel64Directive = "\t.gpdword\t";
- WeakRefDirective = "\t.weak\t";
DebugLabelSuffix = "=.";
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 772234e..1000113 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -14,12 +14,12 @@
#ifndef MIPSTARGETASMINFO_H
#define MIPSTARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class StringRef;
- class MipsMCAsmInfo : public MCAsmInfo {
+ class MipsMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit MipsMCAsmInfo(StringRef TT);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 4dc6917..66428bd 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -39,11 +39,14 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
MCContext &Ctx;
const MCSubtargetInfo &STI;
bool IsLittleEndian;
+ bool IsMicroMips;
public:
MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_,
const MCSubtargetInfo &sti, bool IsLittle) :
- MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) {}
+ MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) {
+ IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips;
+ }
~MipsMCCodeEmitter() {}
@@ -53,9 +56,17 @@ public:
void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const {
// Output the instruction encoding in little endian byte order.
- for (unsigned i = 0; i < Size; ++i) {
- unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
- EmitByte((Val >> Shift) & 0xff, OS);
+ // Little-endian byte ordering:
+ // mips32r2: 4 | 3 | 2 | 1
+ // microMIPS: 2 | 1 | 4 | 3
+ if (IsLittleEndian && Size == 4 && IsMicroMips) {
+ EmitInstruction(Val>>16, 2, OS);
+ EmitInstruction(Val, 2, OS);
+ } else {
+ for (unsigned i = 0; i < Size; ++i) {
+ unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
+ EmitByte((Val >> Shift) & 0xff, OS);
+ }
}
}
@@ -73,12 +84,24 @@ public:
unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ // getBranchJumpOpValueMM - Return binary encoding of the microMIPS jump
+ // target operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getJumpTargetOpValueMM(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
// getBranchTargetOpValue - Return binary encoding of the branch
// target operand. If the machine operand requires relocation,
// record the relocation and return zero.
unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ // getBranchTargetOpValue - Return binary encoding of the microMIPS branch
+ // target operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getBranchTargetOpValueMM(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
// getMachineOpValue - Return binary encoding of operand. If the machin
// operand requires relocation, record the relocation and return zero.
unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
@@ -86,11 +109,17 @@ public:
unsigned getMemEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ // getLSAImmEncoding - Return binary encoding of LSA immediate.
+ unsigned getLSAImmEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
unsigned
getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const;
@@ -142,6 +171,9 @@ static void LowerLargeShift(MCInst& Inst) {
case Mips::DSRA:
Inst.setOpcode(Mips::DSRA32);
return;
+ case Mips::DROTR:
+ Inst.setOpcode(Mips::DROTR32);
+ return;
}
}
@@ -177,7 +209,7 @@ static void LowerDextDins(MCInst& InstIn) {
}
/// EncodeInstruction - Emit the instruction.
-/// Size the instruction (currently only 4 bytes
+/// Size the instruction with Desc.getSize().
void MipsMCCodeEmitter::
EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const
@@ -193,6 +225,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case Mips::DSLL:
case Mips::DSRL:
case Mips::DSRA:
+ case Mips::DROTR:
LowerLargeShift(TmpInst);
break;
// Double extract instruction is chosen by pos and size operands
@@ -201,6 +234,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
LowerDextDins(TmpInst);
}
+ unsigned long N = Fixups.size();
uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups);
// Check for unimplemented opcodes.
@@ -213,6 +247,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (STI.getFeatureBits() & Mips::FeatureMicroMips) {
int NewOpcode = Mips::Std2MicroMips (Opcode, Mips::Arch_micromips);
if (NewOpcode != -1) {
+ if (Fixups.size() > N)
+ Fixups.pop_back();
Opcode = NewOpcode;
TmpInst.setOpcode (NewOpcode);
Binary = getBinaryCodeForInstr(TmpInst, Fixups);
@@ -250,6 +286,28 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
return 0;
}
+/// getBranchTargetOpValue - Return binary encoding of the microMIPS branch
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getBranchTargetOpValueMM(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ // If the destination is an immediate, divide by 2.
+ if (MO.isImm()) return MO.getImm() >> 1;
+
+ assert(MO.isExpr() &&
+ "getBranchTargetOpValueMM expects only expressions or immediates");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::
+ fixup_MICROMIPS_PC16_S1)));
+ return 0;
+}
+
/// getJumpTargetOpValue - Return binary encoding of the jump
/// target operand. If the machine operand requires relocation,
/// record the relocation and return zero.
@@ -271,6 +329,23 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
}
unsigned MipsMCCodeEmitter::
+getJumpTargetOpValueMM(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ // If the destination is an immediate, divide by 2.
+ if (MO.isImm()) return MO.getImm() >> 1;
+
+ assert(MO.isExpr() &&
+ "getJumpTargetOpValueMM expects only expressions or an immediate");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_MICROMIPS_26_S1)));
+ return 0;
+}
+
+unsigned MipsMCCodeEmitter::
getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const {
int64_t Res;
@@ -300,31 +375,39 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const {
FixupKind = Mips::fixup_Mips_GPOFF_LO;
break;
case MCSymbolRefExpr::VK_Mips_GOT_PAGE :
- FixupKind = Mips::fixup_Mips_GOT_PAGE;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT_PAGE
+ : Mips::fixup_Mips_GOT_PAGE;
break;
case MCSymbolRefExpr::VK_Mips_GOT_OFST :
- FixupKind = Mips::fixup_Mips_GOT_OFST;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT_OFST
+ : Mips::fixup_Mips_GOT_OFST;
break;
case MCSymbolRefExpr::VK_Mips_GOT_DISP :
- FixupKind = Mips::fixup_Mips_GOT_DISP;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT_DISP
+ : Mips::fixup_Mips_GOT_DISP;
break;
case MCSymbolRefExpr::VK_Mips_GPREL:
FixupKind = Mips::fixup_Mips_GPREL16;
break;
case MCSymbolRefExpr::VK_Mips_GOT_CALL:
- FixupKind = Mips::fixup_Mips_CALL16;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_CALL16
+ : Mips::fixup_Mips_CALL16;
break;
case MCSymbolRefExpr::VK_Mips_GOT16:
- FixupKind = Mips::fixup_Mips_GOT_Global;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT16
+ : Mips::fixup_Mips_GOT_Global;
break;
case MCSymbolRefExpr::VK_Mips_GOT:
- FixupKind = Mips::fixup_Mips_GOT_Local;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT16
+ : Mips::fixup_Mips_GOT_Local;
break;
case MCSymbolRefExpr::VK_Mips_ABS_HI:
- FixupKind = Mips::fixup_Mips_HI16;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_HI16
+ : Mips::fixup_Mips_HI16;
break;
case MCSymbolRefExpr::VK_Mips_ABS_LO:
- FixupKind = Mips::fixup_Mips_LO16;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_LO16
+ : Mips::fixup_Mips_LO16;
break;
case MCSymbolRefExpr::VK_Mips_TLSGD:
FixupKind = Mips::fixup_Mips_TLSGD;
@@ -333,19 +416,23 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const {
FixupKind = Mips::fixup_Mips_TLSLDM;
break;
case MCSymbolRefExpr::VK_Mips_DTPREL_HI:
- FixupKind = Mips::fixup_Mips_DTPREL_HI;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_DTPREL_HI16
+ : Mips::fixup_Mips_DTPREL_HI;
break;
case MCSymbolRefExpr::VK_Mips_DTPREL_LO:
- FixupKind = Mips::fixup_Mips_DTPREL_LO;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_DTPREL_LO16
+ : Mips::fixup_Mips_DTPREL_LO;
break;
case MCSymbolRefExpr::VK_Mips_GOTTPREL:
FixupKind = Mips::fixup_Mips_GOTTPREL;
break;
case MCSymbolRefExpr::VK_Mips_TPREL_HI:
- FixupKind = Mips::fixup_Mips_TPREL_HI;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_TPREL_HI16
+ : Mips::fixup_Mips_TPREL_HI;
break;
case MCSymbolRefExpr::VK_Mips_TPREL_LO:
- FixupKind = Mips::fixup_Mips_TPREL_LO;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_TPREL_LO16
+ : Mips::fixup_Mips_TPREL_LO;
break;
case MCSymbolRefExpr::VK_Mips_HIGHER:
FixupKind = Mips::fixup_Mips_HIGHER;
@@ -406,6 +493,17 @@ MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo,
return (OffBits & 0xFFFF) | RegBits;
}
+unsigned MipsMCCodeEmitter::
+getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Base register is encoded in bits 20-16, offset is encoded in bits 11-0.
+ assert(MI.getOperand(OpNo).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups) << 16;
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups);
+
+ return (OffBits & 0x0FFF) | RegBits;
+}
+
unsigned
MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
@@ -427,5 +525,13 @@ MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
return Position + Size - 1;
}
+unsigned
+MipsMCCodeEmitter::getLSAImmEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpNo).isImm());
+ // The immediate is encoded as 'immediate - 1'.
+ return getMachineOpValue(MI, MI.getOperand(OpNo), Fixups) - 1;
+}
+
#include "MipsGenMCCodeEmitter.inc"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 837fabe..5548aaa 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -11,17 +11,21 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/MipsELFStreamer.h"
#include "MipsMCTargetDesc.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MipsMCAsmInfo.h"
+#include "MipsTargetStreamer.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
@@ -125,14 +129,23 @@ static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
}
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- bool RelaxAll,
- bool NoExecStack) {
- Triple TheTriple(TT);
-
- return createMipsELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+ MCContext &Context, MCAsmBackend &MAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack) {
+ MipsTargetELFStreamer *S = new MipsTargetELFStreamer();
+ return createELFStreamer(Context, S, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+static MCStreamer *
+createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory, MCInstPrinter *InstPrint,
+ MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) {
+ MipsTargetAsmStreamer *S = new MipsTargetAsmStreamer(OS);
+
+ return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI,
+ useDwarfDirectory, InstPrint, CE, TAB,
+ ShowInst);
}
extern "C" void LLVMInitializeMipsTargetMC() {
@@ -183,6 +196,12 @@ extern "C" void LLVMInitializeMipsTargetMC() {
TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget,
createMCStreamer);
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmStreamer(TheMipsTarget, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheMipselTarget, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheMips64Target, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheMips64elTarget, createMCAsmStreamer);
+
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(TheMipsTarget,
createMipsAsmBackendEB32);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 71954a4..eabebfe 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -42,14 +42,14 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createMipsAsmBackendEB32(const Target &T, StringRef TT,
- StringRef CPU);
-MCAsmBackend *createMipsAsmBackendEL32(const Target &T, StringRef TT,
- StringRef CPU);
-MCAsmBackend *createMipsAsmBackendEB64(const Target &T, StringRef TT,
- StringRef CPU);
-MCAsmBackend *createMipsAsmBackendEL64(const Target &T, StringRef TT,
- StringRef CPU);
+MCAsmBackend *createMipsAsmBackendEB32(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
+MCAsmBackend *createMipsAsmBackendEL32(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
+MCAsmBackend *createMipsAsmBackendEB64(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
+MCAsmBackend *createMipsAsmBackendEL64(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS,
uint8_t OSABI,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
new file mode 100644
index 0000000..5e90bbc
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -0,0 +1,67 @@
+//===-- MipsTargetStreamer.cpp - Mips Target Streamer Methods -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Mips specific target streamer methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetStreamer.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> PrintHackDirectives("print-hack-directives",
+ cl::init(false), cl::Hidden);
+
+// pin vtable to this file
+void MipsTargetStreamer::anchor() {}
+
+MipsTargetAsmStreamer::MipsTargetAsmStreamer(formatted_raw_ostream &OS)
+ : OS(OS) {}
+
+void MipsTargetAsmStreamer::emitMipsHackELFFlags(unsigned Flags) {
+ if (!PrintHackDirectives)
+ return;
+
+ OS << "\t.mips_hack_elf_flags 0x";
+ OS.write_hex(Flags);
+ OS << '\n';
+}
+void MipsTargetAsmStreamer::emitMipsHackSTOCG(MCSymbol *Sym, unsigned Val) {
+ if (!PrintHackDirectives)
+ return;
+
+ OS << "\t.mips_hack_stocg ";
+ OS << Sym->getName();
+ OS << ", ";
+ OS << Val;
+ OS << '\n';
+}
+
+MCELFStreamer &MipsTargetELFStreamer::getStreamer() {
+ return static_cast<MCELFStreamer &>(*Streamer);
+}
+
+void MipsTargetELFStreamer::emitMipsHackELFFlags(unsigned Flags) {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCA.setELFHeaderEFlags(Flags);
+}
+
+// Set a symbol's STO flags
+void MipsTargetELFStreamer::emitMipsHackSTOCG(MCSymbol *Sym, unsigned Val) {
+ MCSymbolData &Data = getStreamer().getOrCreateSymbolData(Sym);
+ // The "other" values are stored in the last 6 bits of the second byte
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ MCELF::setOther(Data, Val >> 2);
+}
diff --git a/lib/Target/Mips/MSA.txt b/lib/Target/Mips/MSA.txt
new file mode 100644
index 0000000..d1c4193
--- /dev/null
+++ b/lib/Target/Mips/MSA.txt
@@ -0,0 +1,78 @@
+Code Generation Notes for MSA
+=============================
+
+Intrinsics are lowered to SelectionDAG nodes where possible in order to enable
+optimisation, reduce the size of the ISel matcher, and reduce repetition in
+the implementation. In a small number of cases, this can cause different
+(semantically equivalent) instructions to be used in place of the requested
+instruction, even when no optimisation has taken place.
+
+Instructions
+============
+
+This section describes any quirks of instruction selection for MSA. For
+example, two instructions might be equally valid for some given IR and one is
+chosen in preference to the other.
+
+bclri.b:
+ It is not possible to emit bclri.b since andi.b covers exactly the
+ same cases. andi.b should use fractionally less power than bclri.b in
+ most hardware implementations so it is used in preference to bclri.b.
+
+vshf.w:
+ It is not possible to emit vshf.w when the shuffle description is
+ constant since shf.w covers exactly the same cases. shf.w is used
+ instead. It is also impossible for the shuffle description to be
+ unknown at compile-time due to the definition of shufflevector in
+ LLVM IR.
+
+vshf.[bhwd]
+ When the shuffle description describes a splat operation, splat.[bhwd]
+ instructions will be selected instead of vshf.[bhwd]. Unlike the ilv*,
+ and pck* instructions, this is matched from MipsISD::VSHF instead of
+ a special-case MipsISD node.
+
+ilvl.d, pckev.d:
+ It is not possible to emit ilvl.d, or pckev.d since ilvev.d covers the
+ same shuffle. ilvev.d will be emitted instead.
+
+ilvr.d, ilvod.d, pckod.d:
+ It is not possible to emit ilvr.d, or pckod.d since ilvod.d covers the
+ same shuffle. ilvod.d will be emitted instead.
+
+splat.[bhwd]
+ The intrinsic will work as expected. However, unlike other intrinsics
+ it lowers directly to MipsISD::VSHF instead of using common IR.
+
+splati.w:
+ It is not possible to emit splati.w since shf.w covers the same cases.
+ shf.w will be emitted instead.
+
+copy_s.w:
+ On MIPS32, the copy_u.d intrinsic will emit this instruction instead of
+ copy_u.w. This is semantically equivalent since the general-purpose
+ register file is 32-bits wide.
+
+binsri.[bhwd], binsli.[bhwd]:
+ These two operations are equivalent to each other with the operands
+ swapped and condition inverted. The compiler may use either one as
+ appropriate.
+ Furthermore, the compiler may use bsel.[bhwd] for some masks that do
+ not survive the legalization process (this is a bug and will be fixed).
+
+bmnz.v, bmz.v, bsel.v:
+ These three operations differ only in the operand that is tied to the
+ result.
+ It is (currently) not possible to emit bmz.v, or bsel.v since bmnz.v is
+ the same operation and will be emitted instead.
+ In future, the compiler may choose between these three instructions
+ according to register allocation.
+
+bmnzi.b, bmzi.b:
+ Like their non-immediate counterparts, bmnzi.v and bmzi.v are the same
+ operation with the operands swapped. bmnzi.v will (currently) be emitted
+ for both cases.
+
+bseli.v:
+ Unlike the non-immediate versions, bseli.v is distinguishable from
+ bmnzi.b and bmzi.b and can be emitted.
diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td
index 665b4d2..c12a32e 100644
--- a/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -39,8 +39,8 @@ class SLTI_FM_MM<bits<6> op> : MMArch {
bits<32> Inst;
let Inst{31-26} = op;
- let Inst{25-21} = rs;
- let Inst{20-16} = rt;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
let Inst{15-0} = imm16;
}
@@ -110,3 +110,195 @@ class LW_FM_MM<bits<6> op> : MMArch {
let Inst{20-16} = addr{20-16};
let Inst{15-0} = addr{15-0};
}
+
+class LWL_FM_MM<bits<4> funct> {
+ bits<5> rt;
+ bits<21> addr;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x18;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = addr{20-16};
+ let Inst{15-12} = funct;
+ let Inst{11-0} = addr{11-0};
+}
+
+class CMov_F_I_FM_MM<bits<7> func> : MMArch {
+ bits<5> rd;
+ bits<5> rs;
+ bits<3> fcc;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x15;
+ let Inst{25-21} = rd;
+ let Inst{20-16} = rs;
+ let Inst{15-13} = fcc;
+ let Inst{12-6} = func;
+ let Inst{5-0} = 0x3b;
+}
+
+class MTLO_FM_MM<bits<10> funct> : MMArch {
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = 0x00;
+ let Inst{20-16} = rs;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class MFLO_FM_MM<bits<10> funct> : MMArch {
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = 0x00;
+ let Inst{20-16} = rd;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class CLO_FM_MM<bits<10> funct> : MMArch {
+ bits<5> rd;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rd;
+ let Inst{20-16} = rs;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class SEB_FM_MM<bits<10> funct> : MMArch {
+ bits<5> rd;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rd;
+ let Inst{20-16} = rt;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class EXT_FM_MM<bits<6> funct> : MMArch {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> pos;
+ bits<5> size;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = size;
+ let Inst{10-6} = pos;
+ let Inst{5-0} = funct;
+}
+
+class J_FM_MM<bits<6> op> : MMArch {
+ bits<26> target;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-0} = target;
+}
+
+class JR_FM_MM<bits<8> funct> : MMArch {
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-21} = 0x00;
+ let Inst{20-16} = rs;
+ let Inst{15-14} = 0x0;
+ let Inst{13-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class JALR_FM_MM<bits<10> funct> : MMArch {
+ bits<5> rs;
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rd;
+ let Inst{20-16} = rs;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class BEQ_FM_MM<bits<6> op> : MMArch {
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = offset;
+}
+
+class BGEZ_FM_MM<bits<5> funct> : MMArch {
+ bits<5> rs;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x10;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = offset;
+}
+
+class BGEZAL_FM_MM<bits<5> funct> : MMArch {
+ bits<5> rs;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x10;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = offset;
+}
+
+class TEQ_FM_MM<bits<6> funct> : MMArch {
+ bits<5> rs;
+ bits<5> rt;
+ bits<4> code_;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-12} = code_;
+ let Inst{11-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class TEQI_FM_MM<bits<5> funct> : MMArch {
+ bits<5> rs;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x10;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = imm16;
+}
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 249d712..d9507fa 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -1,4 +1,51 @@
-let isCodeGenOnly = 1 in {
+def addrimm12 : ComplexPattern<iPTR, 2, "selectIntAddrMM", [frameindex]>;
+
+def simm12 : Operand<i32> {
+ let DecoderMethod = "DecodeSimm12";
+}
+
+def mem_mm_12 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR32, simm12);
+ let EncoderMethod = "getMemEncodingMMImm12";
+ let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+def jmptarget_mm : Operand<OtherVT> {
+ let EncoderMethod = "getJumpTargetOpValueMM";
+}
+
+def calltarget_mm : Operand<iPTR> {
+ let EncoderMethod = "getJumpTargetOpValueMM";
+}
+
+def brtarget_mm : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValueMM";
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeBranchTargetMM";
+}
+
+let canFoldAsLoad = 1 in
+class LoadLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
+ Operand MemOpnd> :
+ InstSE<(outs RO:$rt), (ins MemOpnd:$addr, RO:$src),
+ !strconcat(opstr, "\t$rt, $addr"),
+ [(set RO:$rt, (OpNode addrimm12:$addr, RO:$src))],
+ NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMemMMImm12";
+ string Constraints = "$src = $rt";
+}
+
+class StoreLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
+ Operand MemOpnd>:
+ InstSE<(outs), (ins RO:$rt, MemOpnd:$addr),
+ !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RO:$rt, addrimm12:$addr)], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMemMMImm12";
+}
+
+let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
/// Arithmetic Instructions (ALU Immediate)
def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd>,
ADDI_FM_MM<0xc>;
@@ -32,17 +79,21 @@ let isCodeGenOnly = 1 in {
def XOR_MM : MMRel, ArithLogicR<"xor", GPR32Opnd, 1, IIAlu, xor>,
ADD_FM_MM<0, 0x310>;
def NOR_MM : MMRel, LogicNOR<"nor", GPR32Opnd>, ADD_FM_MM<0, 0x2d0>;
- def MULT_MM : MMRel, Mult<"mult", IIImul, GPR32Opnd, [HI, LO]>,
+ def MULT_MM : MMRel, Mult<"mult", IIImul, GPR32Opnd, [HI0, LO0]>,
MULT_FM_MM<0x22c>;
- def MULTu_MM : MMRel, Mult<"multu", IIImul, GPR32Opnd, [HI, LO]>,
+ def MULTu_MM : MMRel, Mult<"multu", IIImul, GPR32Opnd, [HI0, LO0]>,
MULT_FM_MM<0x26c>;
+ def SDIV_MM : MMRel, Div<"div", IIIdiv, GPR32Opnd, [HI0, LO0]>,
+ MULT_FM_MM<0x2ac>;
+ def UDIV_MM : MMRel, Div<"divu", IIIdiv, GPR32Opnd, [HI0, LO0]>,
+ MULT_FM_MM<0x2ec>;
/// Shift Instructions
- def SLL_MM : MMRel, shift_rotate_imm<"sll", shamt, GPR32Opnd>,
+ def SLL_MM : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd>,
SRA_FM_MM<0, 0>;
- def SRL_MM : MMRel, shift_rotate_imm<"srl", shamt, GPR32Opnd>,
+ def SRL_MM : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd>,
SRA_FM_MM<0x40, 0>;
- def SRA_MM : MMRel, shift_rotate_imm<"sra", shamt, GPR32Opnd>,
+ def SRA_MM : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd>,
SRA_FM_MM<0x80, 0>;
def SLLV_MM : MMRel, shift_rotate_reg<"sllv", GPR32Opnd>,
SRLV_FM_MM<0x10, 0>;
@@ -50,18 +101,119 @@ let isCodeGenOnly = 1 in {
SRLV_FM_MM<0x50, 0>;
def SRAV_MM : MMRel, shift_rotate_reg<"srav", GPR32Opnd>,
SRLV_FM_MM<0x90, 0>;
- def ROTR_MM : MMRel, shift_rotate_imm<"rotr", shamt, GPR32Opnd>,
+ def ROTR_MM : MMRel, shift_rotate_imm<"rotr", uimm5, GPR32Opnd>,
SRA_FM_MM<0xc0, 0>;
def ROTRV_MM : MMRel, shift_rotate_reg<"rotrv", GPR32Opnd>,
SRLV_FM_MM<0xd0, 0>;
/// Load and Store Instructions - aligned
- defm LB_MM : LoadM<"lb", GPR32Opnd, sextloadi8>, MMRel, LW_FM_MM<0x7>;
- defm LBu_MM : LoadM<"lbu", GPR32Opnd, zextloadi8>, MMRel, LW_FM_MM<0x5>;
- defm LH_MM : LoadM<"lh", GPR32Opnd, sextloadi16>, MMRel, LW_FM_MM<0xf>;
- defm LHu_MM : LoadM<"lhu", GPR32Opnd, zextloadi16>, MMRel, LW_FM_MM<0xd>;
- defm LW_MM : LoadM<"lw", GPR32Opnd>, MMRel, LW_FM_MM<0x3f>;
- defm SB_MM : StoreM<"sb", GPR32Opnd, truncstorei8>, MMRel, LW_FM_MM<0x6>;
- defm SH_MM : StoreM<"sh", GPR32Opnd, truncstorei16>, MMRel, LW_FM_MM<0xe>;
- defm SW_MM : StoreM<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>;
+ let DecoderMethod = "DecodeMemMMImm16" in {
+ def LB_MM : Load<"lb", GPR32Opnd>, MMRel, LW_FM_MM<0x7>;
+ def LBu_MM : Load<"lbu", GPR32Opnd>, MMRel, LW_FM_MM<0x5>;
+ def LH_MM : Load<"lh", GPR32Opnd>, MMRel, LW_FM_MM<0xf>;
+ def LHu_MM : Load<"lhu", GPR32Opnd>, MMRel, LW_FM_MM<0xd>;
+ def LW_MM : Load<"lw", GPR32Opnd>, MMRel, LW_FM_MM<0x3f>;
+ def SB_MM : Store<"sb", GPR32Opnd>, MMRel, LW_FM_MM<0x6>;
+ def SH_MM : Store<"sh", GPR32Opnd>, MMRel, LW_FM_MM<0xe>;
+ def SW_MM : Store<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>;
+ }
+
+ /// Load and Store Instructions - unaligned
+ def LWL_MM : LoadLeftRightMM<"lwl", MipsLWL, GPR32Opnd, mem_mm_12>,
+ LWL_FM_MM<0x0>;
+ def LWR_MM : LoadLeftRightMM<"lwr", MipsLWR, GPR32Opnd, mem_mm_12>,
+ LWL_FM_MM<0x1>;
+ def SWL_MM : StoreLeftRightMM<"swl", MipsSWL, GPR32Opnd, mem_mm_12>,
+ LWL_FM_MM<0x8>;
+ def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>,
+ LWL_FM_MM<0x9>;
+
+ /// Move Conditional
+ def MOVZ_I_MM : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd,
+ NoItinerary>, ADD_FM_MM<0, 0x58>;
+ def MOVN_I_MM : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd,
+ NoItinerary>, ADD_FM_MM<0, 0x18>;
+ def MOVT_I_MM : MMRel, CMov_F_I_FT<"movt", GPR32Opnd, IIAlu>,
+ CMov_F_I_FM_MM<0x25>;
+ def MOVF_I_MM : MMRel, CMov_F_I_FT<"movf", GPR32Opnd, IIAlu>,
+ CMov_F_I_FM_MM<0x5>;
+
+ /// Move to/from HI/LO
+ def MTHI_MM : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>,
+ MTLO_FM_MM<0x0b5>;
+ def MTLO_MM : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>,
+ MTLO_FM_MM<0x0f5>;
+ def MFHI_MM : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, AC0>,
+ MFLO_FM_MM<0x035>;
+ def MFLO_MM : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>,
+ MFLO_FM_MM<0x075>;
+
+ /// Multiply Add/Sub Instructions
+ def MADD_MM : MMRel, MArithR<"madd", 1>, MULT_FM_MM<0x32c>;
+ def MADDU_MM : MMRel, MArithR<"maddu", 1>, MULT_FM_MM<0x36c>;
+ def MSUB_MM : MMRel, MArithR<"msub">, MULT_FM_MM<0x3ac>;
+ def MSUBU_MM : MMRel, MArithR<"msubu">, MULT_FM_MM<0x3ec>;
+
+ /// Count Leading
+ def CLZ_MM : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM_MM<0x16c>;
+ def CLO_MM : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM_MM<0x12c>;
+
+ /// Sign Ext In Register Instructions.
+ def SEB_MM : MMRel, SignExtInReg<"seb", i8, GPR32Opnd>, SEB_FM_MM<0x0ac>;
+ def SEH_MM : MMRel, SignExtInReg<"seh", i16, GPR32Opnd>, SEB_FM_MM<0x0ec>;
+
+ /// Word Swap Bytes Within Halfwords
+ def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM_MM<0x1ec>;
+
+ def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>,
+ EXT_FM_MM<0x2c>;
+ def INS_MM : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>,
+ EXT_FM_MM<0x0c>;
+
+ /// Jump Instructions
+ let DecoderMethod = "DecodeJumpTargetMM" in {
+ def J_MM : MMRel, JumpFJ<jmptarget_mm, "j", br, bb, "j">,
+ J_FM_MM<0x35>;
+ def JAL_MM : MMRel, JumpLink<"jal", calltarget_mm>, J_FM_MM<0x3d>;
+ def TAILCALL_MM : MMRel, JumpFJ<calltarget_mm, "j", MipsTailCall, imm,
+ "tcall">, J_FM_MM<0x3d>, IsTailCall;
+ }
+ def JR_MM : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>;
+ def JALR_MM : MMRel, JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>;
+ def TAILCALL_R_MM : MMRel, JumpFR<"tcallr", GPR32Opnd, MipsTailCall>,
+ JR_FM_MM<0x3c>, IsTailCall;
+ def RET_MM : MMRel, RetBase<"ret", GPR32Opnd>, JR_FM_MM<0x3c>;
+
+ /// Branch Instructions
+ def BEQ_MM : MMRel, CBranch<"beq", brtarget_mm, seteq, GPR32Opnd>,
+ BEQ_FM_MM<0x25>;
+ def BNE_MM : MMRel, CBranch<"bne", brtarget_mm, setne, GPR32Opnd>,
+ BEQ_FM_MM<0x2d>;
+ def BGEZ_MM : MMRel, CBranchZero<"bgez", brtarget_mm, setge, GPR32Opnd>,
+ BGEZ_FM_MM<0x2>;
+ def BGTZ_MM : MMRel, CBranchZero<"bgtz", brtarget_mm, setgt, GPR32Opnd>,
+ BGEZ_FM_MM<0x6>;
+ def BLEZ_MM : MMRel, CBranchZero<"blez", brtarget_mm, setle, GPR32Opnd>,
+ BGEZ_FM_MM<0x4>;
+ def BLTZ_MM : MMRel, CBranchZero<"bltz", brtarget_mm, setlt, GPR32Opnd>,
+ BGEZ_FM_MM<0x0>;
+ def BGEZAL_MM : MMRel, BGEZAL_FT<"bgezal", brtarget_mm, GPR32Opnd>,
+ BGEZAL_FM_MM<0x03>;
+ def BLTZAL_MM : MMRel, BGEZAL_FT<"bltzal", brtarget_mm, GPR32Opnd>,
+ BGEZAL_FM_MM<0x01>;
+
+ /// Trap Instructions
+ def TEQ_MM : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM_MM<0x0>;
+ def TGE_MM : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM_MM<0x08>;
+ def TGEU_MM : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM_MM<0x10>;
+ def TLT_MM : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM_MM<0x20>;
+ def TLTU_MM : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM_MM<0x28>;
+ def TNE_MM : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM_MM<0x30>;
+
+ def TEQI_MM : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM_MM<0x0e>;
+ def TGEI_MM : MMRel, TEQI_FT<"tgei", GPR32Opnd>, TEQI_FM_MM<0x09>;
+ def TGEIU_MM : MMRel, TEQI_FT<"tgeiu", GPR32Opnd>, TEQI_FM_MM<0x0b>;
+ def TLTI_MM : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM_MM<0x08>;
+ def TLTIU_MM : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM_MM<0x0a>;
+ def TNEI_MM : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM_MM<0x0c>;
}
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index b88c0d2..e796deb 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -28,7 +28,6 @@ namespace llvm {
FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
JITCodeEmitter &JCE);
FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
- FunctionPass *createMipsOptimizeMathLibCalls(MipsTargetMachine &TM);
} // end namespace llvm;
#endif
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 2595e41..b8e3f39 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -78,6 +78,8 @@ def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">;
def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true",
"Mips DSP-R2 ASE", [FeatureDSP]>;
+def FeatureMSA : SubtargetFeature<"msa", "HasMSA", "true", "Mips MSA ASE">;
+
def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true",
"microMips mode">;
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 54fdb78..8ce2ced 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -20,7 +20,7 @@ namespace llvm {
class Mips16FrameLowering : public MipsFrameLowering {
public:
explicit Mips16FrameLowering(const MipsSubtarget &STI)
- : MipsFrameLowering(STI, 8) {}
+ : MipsFrameLowering(STI, STI.stackAlignment()) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index 7e456aa..81bf18c 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
#include <string>
static void inlineAsmOut
@@ -321,6 +322,37 @@ static void assureFPCallStub(Function &F, Module *M,
}
//
+// Functions that are llvm intrinsics and don't need helpers.
+//
+static const char *IntrinsicInline[] =
+ {"fabs",
+ "fabsf",
+ "llvm.ceil.f32", "llvm.ceil.f64",
+ "llvm.copysign.f32", "llvm.copysign.f64",
+ "llvm.cos.f32", "llvm.cos.f64",
+ "llvm.exp.f32", "llvm.exp.f64",
+ "llvm.exp2.f32", "llvm.exp2.f64",
+ "llvm.fabs.f32", "llvm.fabs.f64",
+ "llvm.floor.f32", "llvm.floor.f64",
+ "llvm.fma.f32", "llvm.fma.f64",
+ "llvm.log.f32", "llvm.log.f64",
+ "llvm.log10.f32", "llvm.log10.f64",
+ "llvm.nearbyint.f32", "llvm.nearbyint.f64",
+ "llvm.pow.f32", "llvm.pow.f64",
+ "llvm.powi.f32", "llvm.powi.f64",
+ "llvm.rint.f32", "llvm.rint.f64",
+ "llvm.round.f32", "llvm.round.f64",
+ "llvm.sin.f32", "llvm.sin.f64",
+ "llvm.sqrt.f32", "llvm.sqrt.f64",
+ "llvm.trunc.f32", "llvm.trunc.f64",
+ };
+
+static bool isIntrinsicInline(Function *F) {
+ return std::binary_search(
+ IntrinsicInline, array_endof(IntrinsicInline),
+ F->getName());
+}
+//
// Returns of float, double and complex need to be handled with a helper
// function.
//
@@ -372,7 +404,7 @@ static bool fixupFPReturnAndCall
// helper functions
if (Subtarget.getRelocationModel() != Reloc::PIC_ ) {
Function *F_ = CI->getCalledFunction();
- if (F_ && needsFPHelperFromSig(*F_)) {
+ if (F_ && !isIntrinsicInline(F_) && needsFPHelperFromSig(*F_)) {
assureFPCallStub(*F_, M, Subtarget);
Modified=true;
}
@@ -390,7 +422,7 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
std::string Name = F->getName();
std::string SectionName = ".mips16.fn." + Name;
std::string StubName = "__fn_stub_" + Name;
- std::string LocalName = "__fn_local_" + Name;
+ std::string LocalName = "$$__fn_local_" + Name;
Function *FStub = Function::Create
(F->getFunctionType(),
Function::InternalLinkage, StubName, M);
@@ -405,19 +437,36 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
IAH.Out(" .set macro");
if (PicMode) {
IAH.Out(".set noreorder");
- IAH.Out(".cpload $$2");
+ IAH.Out(".cpload $$25");
IAH.Out(".set reorder");
IAH.Out(".reloc 0,R_MIPS_NONE," + Name);
IAH.Out("la $$25," + LocalName);
}
- else
- IAH.Out("la $$25, " + Name);
+ else {
+ IAH.Out(".set reorder");
+ IAH.Out("la $$25," + Name);
+ }
swapFPIntParams(PV, M, IAH, LE, false);
IAH.Out("jr $$25");
IAH.Out(LocalName + " = " + Name);
new UnreachableInst(FStub->getContext(), BB);
}
+//
+// remove the use-soft-float attribute
+//
+static void removeUseSoftFloat(Function &F) {
+ AttributeSet A;
+ DEBUG(errs() << "removing -use-soft-float\n");
+ A = A.addAttribute(F.getContext(), AttributeSet::FunctionIndex,
+ "use-soft-float", "false");
+ F.removeAttributes(AttributeSet::FunctionIndex, A);
+ if (F.hasFnAttribute("use-soft-float")) {
+ DEBUG(errs() << "still has -use-soft-float\n");
+ }
+ F.addAttributes(AttributeSet::FunctionIndex, A);
+}
+
namespace llvm {
//
@@ -441,6 +490,11 @@ bool Mips16HardFloat::runOnModule(Module &M) {
DEBUG(errs() << "Run on Module Mips16HardFloat\n");
bool Modified = false;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->hasFnAttribute("nomips16") &&
+ F->hasFnAttribute("use-soft-float")) {
+ removeUseSoftFloat(*F);
+ continue;
+ }
if (F->isDeclaration() || F->hasFnAttribute("mips16_fp_stub") ||
F->hasFnAttribute("nomips16")) continue;
Modified |= fixupFPReturnAndCall(*F, &M, Subtarget);
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 0caa277..4948f40 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -80,10 +80,11 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
V1 = RegInfo.createVirtualRegister(RC);
V2 = RegInfo.createVirtualRegister(RC);
- BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ BuildMI(MBB, I, DL, TII.get(Mips::GotPrologue16), V0).
+ addReg(V1, RegState::Define).
+ addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI).
+ addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+
BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
.addReg(V1).addReg(V2);
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 6ed1d9e..61d8bb8 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -90,6 +90,7 @@ static const Mips16Libcall HardFloatLibCalls[] = {
};
static const Mips16IntrinsicHelperType Mips16IntrinsicHelper[] = {
+ {"__fixunsdfsi", "__mips16_call_stub_2" },
{"ceil", "__mips16_call_stub_df_2"},
{"ceilf", "__mips16_call_stub_sf_1"},
{"copysign", "__mips16_call_stub_df_10"},
@@ -144,6 +145,11 @@ Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
computeRegisterProperties();
}
@@ -168,57 +174,57 @@ Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case Mips::SelBneZ:
return emitSel16(Mips::BnezRxImm16, MI, BB);
case Mips::SelTBteqZCmpi:
- return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
+ return emitSeliT16(Mips::Bteqz16, Mips::CmpiRxImmX16, MI, BB);
case Mips::SelTBteqZSlti:
- return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
+ return emitSeliT16(Mips::Bteqz16, Mips::SltiRxImmX16, MI, BB);
case Mips::SelTBteqZSltiu:
- return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
+ return emitSeliT16(Mips::Bteqz16, Mips::SltiuRxImmX16, MI, BB);
case Mips::SelTBtneZCmpi:
- return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
+ return emitSeliT16(Mips::Btnez16, Mips::CmpiRxImmX16, MI, BB);
case Mips::SelTBtneZSlti:
- return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
+ return emitSeliT16(Mips::Btnez16, Mips::SltiRxImmX16, MI, BB);
case Mips::SelTBtneZSltiu:
- return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
+ return emitSeliT16(Mips::Btnez16, Mips::SltiuRxImmX16, MI, BB);
case Mips::SelTBteqZCmp:
- return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ return emitSelT16(Mips::Bteqz16, Mips::CmpRxRy16, MI, BB);
case Mips::SelTBteqZSlt:
- return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ return emitSelT16(Mips::Bteqz16, Mips::SltRxRy16, MI, BB);
case Mips::SelTBteqZSltu:
- return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ return emitSelT16(Mips::Bteqz16, Mips::SltuRxRy16, MI, BB);
case Mips::SelTBtneZCmp:
- return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ return emitSelT16(Mips::Btnez16, Mips::CmpRxRy16, MI, BB);
case Mips::SelTBtneZSlt:
- return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ return emitSelT16(Mips::Btnez16, Mips::SltRxRy16, MI, BB);
case Mips::SelTBtneZSltu:
- return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ return emitSelT16(Mips::Btnez16, Mips::SltuRxRy16, MI, BB);
case Mips::BteqzT8CmpX16:
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ return emitFEXT_T8I816_ins(Mips::Bteqz16, Mips::CmpRxRy16, MI, BB);
case Mips::BteqzT8SltX16:
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ return emitFEXT_T8I816_ins(Mips::Bteqz16, Mips::SltRxRy16, MI, BB);
case Mips::BteqzT8SltuX16:
// TBD: figure out a way to get this or remove the instruction
// altogether.
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ return emitFEXT_T8I816_ins(Mips::Bteqz16, Mips::SltuRxRy16, MI, BB);
case Mips::BtnezT8CmpX16:
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ return emitFEXT_T8I816_ins(Mips::Btnez16, Mips::CmpRxRy16, MI, BB);
case Mips::BtnezT8SltX16:
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ return emitFEXT_T8I816_ins(Mips::Btnez16, Mips::SltRxRy16, MI, BB);
case Mips::BtnezT8SltuX16:
// TBD: figure out a way to get this or remove the instruction
// altogether.
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ return emitFEXT_T8I816_ins(Mips::Btnez16, Mips::SltuRxRy16, MI, BB);
case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, false, MI, BB);
+ Mips::Bteqz16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, false, MI, BB);
case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, true, MI, BB);
+ Mips::Bteqz16, Mips::SltiRxImm16, Mips::SltiRxImmX16, true, MI, BB);
case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, false, MI, BB);
+ Mips::Bteqz16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, false, MI, BB);
case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, false, MI, BB);
+ Mips::Btnez16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, false, MI, BB);
case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, true, MI, BB);
+ Mips::Btnez16, Mips::SltiRxImm16, Mips::SltiRxImmX16, true, MI, BB);
case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, false, MI, BB);
+ Mips::Btnez16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, false, MI, BB);
break;
case Mips::SltCCRxRy16:
return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
@@ -418,6 +424,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
SelectionDAG &DAG = CLI.DAG;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
const char* Mips16HelperFunction = 0;
bool NeedMips16Helper = false;
@@ -473,7 +481,10 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
if (NeedMips16Helper) {
RegsToPass.push_front(std::make_pair(V0Reg, Callee));
JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
- JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
+ ExternalSymbolSDNode *S = cast<ExternalSymbolSDNode>(JumpTarget);
+ JumpTarget = getAddrGlobal(S, JumpTarget.getValueType(), DAG,
+ MipsII::MO_GOT, Chain,
+ FuncInfo->callPtrInfo(S->getSymbol()));
} else
RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee));
}
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 05e70ab..000ea28 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -10,7 +10,6 @@
// This file contains the Mips16 implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
-#include <stdio.h>
#include "Mips16InstrInfo.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MipsMachineFunction.h"
@@ -20,10 +19,12 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+#include <cctype>
using namespace llvm;
@@ -36,7 +37,7 @@ static cl::opt<bool> NeverUseSaveRestore(
Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
- : MipsInstrInfo(tm, Mips::BimmX16),
+ : MipsInstrInfo(tm, Mips::Bimm16),
RI(*tm.getSubtargetImpl()) {}
const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
@@ -77,11 +78,11 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (Mips::GPR32RegClass.contains(DestReg) &&
Mips::CPU16RegsRegClass.contains(SrcReg))
Opc = Mips::Move32R16;
- else if ((SrcReg == Mips::HI) &&
+ else if ((SrcReg == Mips::HI0) &&
(Mips::CPU16RegsRegClass.contains(DestReg)))
Opc = Mips::Mfhi16, SrcReg = 0;
- else if ((SrcReg == Mips::LO) &&
+ else if ((SrcReg == Mips::LO0) &&
(Mips::CPU16RegsRegClass.contains(DestReg)))
Opc = Mips::Mflo16, SrcReg = 0;
@@ -151,13 +152,17 @@ unsigned Mips16InstrInfo::getOppositeBranchOpc(unsigned Opc) const {
default: llvm_unreachable("Illegal opcode!");
case Mips::BeqzRxImmX16: return Mips::BnezRxImmX16;
case Mips::BnezRxImmX16: return Mips::BeqzRxImmX16;
+ case Mips::BeqzRxImm16: return Mips::BnezRxImm16;
+ case Mips::BnezRxImm16: return Mips::BeqzRxImm16;
case Mips::BteqzT8CmpX16: return Mips::BtnezT8CmpX16;
case Mips::BteqzT8SltX16: return Mips::BtnezT8SltX16;
case Mips::BteqzT8SltiX16: return Mips::BtnezT8SltiX16;
+ case Mips::Btnez16: return Mips::Bteqz16;
case Mips::BtnezX16: return Mips::BteqzX16;
case Mips::BtnezT8CmpiX16: return Mips::BteqzT8CmpiX16;
case Mips::BtnezT8SltuX16: return Mips::BteqzT8SltuX16;
case Mips::BtnezT8SltiuX16: return Mips::BteqzT8SltiuX16;
+ case Mips::Bteqz16: return Mips::Btnez16;
case Mips::BteqzX16: return Mips::BtnezX16;
case Mips::BteqzT8CmpiX16: return Mips::BtnezT8CmpiX16;
case Mips::BteqzT8SltuX16: return Mips::BtnezT8SltuX16;
@@ -439,6 +444,9 @@ Mips16InstrInfo::basicLoadImmediate(
unsigned Mips16InstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
return (Opc == Mips::BeqzRxImmX16 || Opc == Mips::BimmX16 ||
+ Opc == Mips::Bimm16 ||
+ Opc == Mips::Bteqz16 || Opc == Mips::Btnez16 ||
+ Opc == Mips::BeqzRxImm16 || Opc == Mips::BnezRxImm16 ||
Opc == Mips::BnezRxImmX16 || Opc == Mips::BteqzX16 ||
Opc == Mips::BteqzT8CmpX16 || Opc == Mips::BteqzT8CmpiX16 ||
Opc == Mips::BteqzT8SltX16 || Opc == Mips::BteqzT8SltuX16 ||
@@ -473,7 +481,6 @@ const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) {
return new Mips16InstrInfo(TM);
}
-#include <stdio.h>
bool Mips16InstrInfo::validImmediate(unsigned Opcode, unsigned Reg,
int64_t Amount) {
switch (Opcode) {
@@ -493,6 +500,49 @@ bool Mips16InstrInfo::validImmediate(unsigned Opcode, unsigned Reg,
return isInt<16>(Amount);
return isInt<15>(Amount);
}
- printf("Unexpected opcode %i \n", Opcode);
llvm_unreachable("unexpected Opcode in validImmediate");
}
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+/// We implement the special case of the .space directive taking only an
+/// integer argument, which is the size in bytes. This is used for creating
+/// inline code spacing for testing purposes using inline assembly.
+///
+unsigned Mips16InstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const {
+
+
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0)
+ atInsnStart = true;
+ if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+ if (strncmp(Str, ".space", 6)==0) {
+ char *EStr; int Sz;
+ Sz = strtol(Str+6, &EStr, 10);
+ while (isspace(*EStr)) ++EStr;
+ if (*EStr=='\0') {
+ DEBUG(dbgs() << "parsed .space " << Sz << '\n');
+ return Sz;
+ }
+ }
+ Length += MAI.getMaxInstLength();
+ atInsnStart = false;
+ }
+ if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+ strlen(MAI.getCommentString())) == 0)
+ atInsnStart = false;
+ }
+
+ return Length;
+}
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index 118d258..d9a594b 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -108,6 +108,8 @@ public:
void BuildAddiuSpImm
(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const;
+ unsigned getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const;
private:
virtual unsigned getAnalyzableBrOpc(unsigned Opc) const;
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index aef4e92..7441c78 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -32,6 +32,16 @@ def mem16_ea : Operand<i32> {
}
//
+// I-type instruction format
+//
+// this is only used by bimm. the actual assembly value is a 12 bit signed
+// number
+//
+class FI16_ins<bits<5> op, string asmstr, InstrItinClass itin>:
+ FI16<op, (outs), (ins brtarget:$imm16),
+ !strconcat(asmstr, "\t$imm16 # 16 bit inst"), [], itin>;
+
+//
//
// I8 instruction format
//
@@ -41,7 +51,10 @@ class FI816_ins_base<bits<3> _func, string asmstr,
FI816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2),
[], itin>;
-
+class FI816_ins<bits<3> _func, string asmstr,
+ InstrItinClass itin>:
+ FI816_ins_base<_func, asmstr, "\t$imm # 16 bit inst", itin>;
+
class FI816_SP_ins<bits<3> _func, string asmstr,
InstrItinClass itin>:
FI816_ins_base<_func, asmstr, "\t$$sp, $imm # 16 bit inst", itin>;
@@ -60,6 +73,11 @@ class FRI16_ins<bits<5> op, string asmstr,
InstrItinClass itin>:
FRI16_ins_base<op, asmstr, "\t$rx, $imm \t# 16 bit inst", itin>;
+class FRI16_TCP_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FRI16<_op, (outs CPU16Regs:$rx), (ins pcrel16:$imm, i32imm:$size),
+ !strconcat(asmstr, "\t$rx, $imm\t# 16 bit inst"), [], itin>;
+
class FRI16R_ins_base<bits<5> op, string asmstr, string asmstr2,
InstrItinClass itin>:
FRI16<op, (outs), (ins CPU16Regs:$rx, simm16:$imm),
@@ -172,6 +190,11 @@ class FEXT_RI16_B_ins<bits<5> _op, string asmstr,
FEXT_RI16<_op, (outs), (ins CPU16Regs:$rx, brtarget:$imm),
!strconcat(asmstr, "\t$rx, $imm"), [], itin>;
+class FEXT_RI16_TCP_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins pcrel16:$imm, i32imm:$size),
+ !strconcat(asmstr, "\t$rx, $imm"), [], itin>;
+
class FEXT_2RI16_ins<bits<5> _op, string asmstr,
InstrItinClass itin>:
FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
@@ -220,7 +243,7 @@ class FEXT_RRI_A16_mem_ins<bits<1> op, string asmstr, Operand MemOpnd,
// EXT-SHIFT instruction format
//
class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>:
- FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa),
+ FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, uimm5:$sa),
!strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>;
//
@@ -343,6 +366,14 @@ class FRR16_JALRC_ins<bits<1> nd, bits<1> l, bits<1> ra,
FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx),
!strconcat(asmstr, "\t $rx"), [], itin> ;
+class FRR_SF16_ins
+ <bits<5> _funct, bits<3> _subfunc,
+ string asmstr, InstrItinClass itin>:
+ FRR_SF16<_funct, _subfunc, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_),
+ !strconcat(asmstr, "\t $rx"),
+ [], itin> {
+ let Constraints = "$rx_ = $rx";
+ }
//
// RRR-type instruction format
//
@@ -447,7 +478,7 @@ def Constant32:
MipsPseudo16<(outs), (ins imm32:$imm), "\t.word $imm", []>;
def LwConstant32:
- MipsPseudo16<(outs CPU16Regs:$rx), (ins imm32:$imm),
+ MipsPseudo16<(outs CPU16Regs:$rx), (ins imm32:$imm, imm32:$constid),
"lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>;
@@ -559,6 +590,14 @@ def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
//
def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+//
+// Format: B offset MIPS16e
+// Purpose: Unconditional Branch (Extended)
+// To do an unconditional PC-relative branch.
+//
+
+def Bimm16: FI16_ins<0b00010, "b", IIAlu>, branch16;
+
// Format: B offset MIPS16e
// Purpose: Unconditional Branch
// To do an unconditional PC-relative branch.
@@ -591,6 +630,10 @@ def Break16: FRRBreakNull16_ins<"break 0", NoItinerary>;
// Purpose: Branch on T Equal to Zero (Extended)
// To test special register T then do a PC-relative conditional branch.
//
+def Bteqz16: FI816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
+ let Uses = [T8];
+}
+
def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
let Uses = [T8];
}
@@ -614,6 +657,11 @@ def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">,
// Purpose: Branch on T Not Equal to Zero (Extended)
// To test special register T then do a PC-relative conditional branch.
//
+
+def Btnez16: FI816_ins<0b001, "btnez", IIAlu>, cbranch16 {
+ let Uses = [T8];
+}
+
def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16 {
let Uses = [T8];
}
@@ -665,7 +713,7 @@ def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> {
// To divide 32-bit signed integers.
//
def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
- let Defs = [HI, LO];
+ let Defs = [HI0, LO0];
}
//
@@ -674,7 +722,7 @@ def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
// To divide 32-bit unsigned integers.
//
def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
- let Defs = [HI, LO];
+ let Defs = [HI0, LO0];
}
//
// Format: JAL target MIPS16e
@@ -684,10 +732,7 @@ def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
//
def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> {
- let isBranch = 1;
let hasDelaySlot = 0; // not true, but we add the nop for now
- let isTerminator=1;
- let isBarrier=1;
let isCall=1;
}
@@ -771,6 +816,10 @@ def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>;
//
def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
+def LiRxImmAlignX16: FEXT_RI16_ins<0b01101, ".align 2\n\tli", IIAlu> {
+ let isCodeGenOnly = 1;
+}
+
//
// Format: LW ry, offset(rx) MIPS16e
// Purpose: Load Word (Extended)
@@ -784,10 +833,13 @@ def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad{
// Purpose: Load Word (SP-Relative, Extended)
// To load an SP-relative word from memory as a signed value.
//
-def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad{
+def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10010, "lw", IILoad>, MayLoad{
let Uses = [SP];
}
+def LwRxPcTcp16: FRI16_TCP_ins<0b10110, "lw", IILoad>, MayLoad;
+
+def LwRxPcTcpX16: FEXT_RI16_TCP_ins<0b10110, "lw", IILoad>, MayLoad;
//
// Format: MOVE r32, rz MIPS16e
// Purpose: Move
@@ -808,7 +860,7 @@ def MoveR3216: FI8_MOVR3216_ins<"move", IIAlu>;
// To copy the special purpose HI register to a GPR.
//
def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
- let Uses = [HI];
+ let Uses = [HI0];
let neverHasSideEffects = 1;
}
@@ -818,7 +870,7 @@ def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
// To copy the special purpose LO register to a GPR.
//
def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
- let Uses = [LO];
+ let Uses = [LO0];
let neverHasSideEffects = 1;
}
@@ -828,13 +880,13 @@ def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
def MultRxRy16: FMULT16_ins<"mult", IIAlu> {
let isCommutable = 1;
let neverHasSideEffects = 1;
- let Defs = [HI, LO];
+ let Defs = [HI0, LO0];
}
def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
let isCommutable = 1;
let neverHasSideEffects = 1;
- let Defs = [HI, LO];
+ let Defs = [HI0, LO0];
}
//
@@ -845,7 +897,7 @@ def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
let isCommutable = 1;
let neverHasSideEffects = 1;
- let Defs = [HI, LO];
+ let Defs = [HI0, LO0];
}
//
@@ -856,7 +908,7 @@ def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> {
let isCommutable = 1;
let neverHasSideEffects = 1;
- let Defs = [HI, LO];
+ let Defs = [HI0, LO0];
}
//
@@ -951,6 +1003,22 @@ def SbRxRyOffMemX16:
FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIStore>, MayStore;
//
+// Format: SEB rx MIPS16e
+// Purpose: Sign-Extend Byte
+// Sign-extend least significant byte in register rx.
+//
+def SebRx16
+ : FRR_SF16_ins<0b10001, 0b100, "seb", IIAlu>;
+
+//
+// Format: SEH rx MIPS16e
+// Purpose: Sign-Extend Halfword
+// Sign-extend least significant word in register rx.
+//
+def SehRx16
+ : FRR_SF16_ins<0b10001, 0b101, "seh", IIAlu>;
+
+//
// The Sel(T) instructions are pseudos
// T means that they use T8 implicitly.
//
@@ -1075,7 +1143,7 @@ def ShRxRyOffMemX16:
//
// Format: SLL rx, ry, sa MIPS16e
// Purpose: Shift Word Left Logical (Extended)
-// To execute a left-shift of a word by a fixed number of bits—0 to 31 bits.
+// To execute a left-shift of a word by a fixed number of bits-0 to 31 bits.
//
def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
@@ -1171,7 +1239,7 @@ def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
// Format: SRA rx, ry, sa MIPS16e
// Purpose: Shift Word Right Arithmetic (Extended)
// To execute an arithmetic right-shift of a word by a fixed
-// number of bits—1 to 8 bits.
+// number of bits-1 to 8 bits.
//
def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
@@ -1189,7 +1257,7 @@ def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
// Format: SRL rx, ry, sa MIPS16e
// Purpose: Shift Word Right Logical (Extended)
// To execute a logical right-shift of a word by a fixed
-// number of bits—1 to 31 bits.
+// number of bits-1 to 31 bits.
//
def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>;
@@ -1330,9 +1398,7 @@ def: Mips16Pat<(i32 addr16:$addr),
// Large (>16 bit) immediate loads
-def : Mips16Pat<(i32 imm:$imm),
- (OrRxRxRy16 (SllX16 (LiRxImmX16 (HI16 imm:$imm)), 16),
- (LiRxImmX16 (LO16 imm:$imm)))>;
+def : Mips16Pat<(i32 imm:$imm), (LwConstant32 imm:$imm, -1)>;
// Carry MipsPatterns
def : Mips16Pat<(subc CPU16Regs:$lhs, CPU16Regs:$rhs),
@@ -1373,7 +1439,7 @@ def: Mips16Pat
def: Mips16Pat
<(brcond (i32 (seteq CPU16Regs:$rx, 0)), bb:$targ16),
- (BeqzRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ (BeqzRxImm16 CPU16Regs:$rx, bb:$targ16)
>;
//
@@ -1435,7 +1501,7 @@ def: Mips16Pat
def: Mips16Pat
<(brcond (i32 (setne CPU16Regs:$rx, 0)), bb:$targ16),
- (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ (BnezRxImm16 CPU16Regs:$rx, bb:$targ16)
>;
//
@@ -1443,7 +1509,7 @@ def: Mips16Pat
//
def: Mips16Pat
<(brcond CPU16Regs:$rx, bb:$targ16),
- (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ (BnezRxImm16 CPU16Regs:$rx, bb:$targ16)
>;
//
@@ -1473,7 +1539,7 @@ def: Mips16Pat
// (BtnezT8SltuX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
// >;
-def: UncondBranch16_pat<br, BimmX16>;
+def: UncondBranch16_pat<br, Bimm16>;
// Small immediates
def: Mips16Pat<(i32 immSExt16:$in),
@@ -1787,7 +1853,8 @@ def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
(AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>;
// hi/lo relocs
-
+def : Mips16Pat<(MipsHi tblockaddress:$in),
+ (SllX16 (LiRxImmX16 tblockaddress:$in), 16)>;
def : Mips16Pat<(MipsHi tglobaladdr:$in),
(SllX16 (LiRxImmX16 tglobaladdr:$in), 16)>;
def : Mips16Pat<(MipsHi tjumptable:$in),
@@ -1795,6 +1862,8 @@ def : Mips16Pat<(MipsHi tjumptable:$in),
def : Mips16Pat<(MipsHi tglobaltlsaddr:$in),
(SllX16 (LiRxImmX16 tglobaltlsaddr:$in), 16)>;
+def : Mips16Pat<(MipsLo tblockaddress:$in), (LiRxImmX16 tblockaddress:$in)>;
+
// wrapper_pic
class Wrapper16Pat<SDNode node, Instruction ADDiuOp, RegisterClass RC>:
Mips16Pat<(MipsWrapper RC:$gp, node:$in),
@@ -1811,3 +1880,30 @@ def : Mips16Pat<(i32 (extloadi16 addr16:$src)),
def: Mips16Pat<(trap), (Break16)>;
+def : Mips16Pat<(sext_inreg CPU16Regs:$val, i8),
+ (SebRx16 CPU16Regs:$val)>;
+
+def : Mips16Pat<(sext_inreg CPU16Regs:$val, i16),
+ (SehRx16 CPU16Regs:$val)>;
+
+def GotPrologue16:
+ MipsPseudo16<
+ (outs CPU16Regs:$rh, CPU16Regs:$rl),
+ (ins simm16:$immHi, simm16:$immLo),
+ ".align 2\n\tli\t$rh, $immHi\n\taddiu\t$rl, $$pc, $immLo\n ",[]> ;
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+ // let PrintMethod = "printCPInstOperand";
+}
+
+// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+// the function. The first operand is the ID# for this instruction, the second
+// is the index into the MachineConstantPool that this is, the third is the
+// size in bytes of this constant pool entry.
+//
+let neverHasSideEffects = 1, isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+MipsPseudo16<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), "foo", []>;
+
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index a752ab8..15ef654 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -15,9 +15,6 @@
// Mips Operand, Complex Patterns and Transformations Definitions.
//===----------------------------------------------------------------------===//
-// Instruction operand types
-def shamt_64 : Operand<i64>;
-
// Unsigned Operand
def uimm16_64 : Operand<i64> {
let PrintMethod = "printUnsignedImm";
@@ -34,36 +31,21 @@ def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
-let DecoderNamespace = "Mips64" in {
-
-multiclass Atomic2Ops64<PatFrag Op> {
- def NAME : Atomic2Ops<Op, GPR64, GPR32>, Requires<[NotN64, HasStdEnc]>;
- def _P8 : Atomic2Ops<Op, GPR64, GPR64>, Requires<[IsN64, HasStdEnc]>;
-}
-
-multiclass AtomicCmpSwap64<PatFrag Op> {
- def NAME : AtomicCmpSwap<Op, GPR64, GPR32>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : AtomicCmpSwap<Op, GPR64, GPR64>,
- Requires<[IsN64, HasStdEnc]>;
-}
-}
-let usesCustomInserter = 1, Predicates = [HasStdEnc],
- DecoderNamespace = "Mips64" in {
- defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64>;
- defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64>;
- defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64>;
- defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64<atomic_load_or_64>;
- defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64<atomic_load_xor_64>;
- defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64>;
- defm ATOMIC_SWAP_I64 : Atomic2Ops64<atomic_swap_64>;
- defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64>;
+let usesCustomInserter = 1 in {
+ def ATOMIC_LOAD_ADD_I64 : Atomic2Ops<atomic_load_add_64, GPR64>;
+ def ATOMIC_LOAD_SUB_I64 : Atomic2Ops<atomic_load_sub_64, GPR64>;
+ def ATOMIC_LOAD_AND_I64 : Atomic2Ops<atomic_load_and_64, GPR64>;
+ def ATOMIC_LOAD_OR_I64 : Atomic2Ops<atomic_load_or_64, GPR64>;
+ def ATOMIC_LOAD_XOR_I64 : Atomic2Ops<atomic_load_xor_64, GPR64>;
+ def ATOMIC_LOAD_NAND_I64 : Atomic2Ops<atomic_load_nand_64, GPR64>;
+ def ATOMIC_SWAP_I64 : Atomic2Ops<atomic_swap_64, GPR64>;
+ def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<atomic_cmp_swap_64, GPR64>;
}
/// Pseudo instructions for loading and storing accumulator registers.
let isPseudo = 1, isCodeGenOnly = 1 in {
- defm LOAD_AC128 : LoadM<"", ACRegs128>;
- defm STORE_AC128 : StoreM<"", ACRegs128>;
+ def LOAD_ACC128 : Load<"", ACC128>;
+ def STORE_ACC128 : Store<"", ACC128>;
}
//===----------------------------------------------------------------------===//
@@ -110,103 +92,101 @@ def NOR64 : LogicNOR<"nor", GPR64Opnd>, ADD_FM<0, 0x27>;
}
/// Shift Instructions
-def DSLL : shift_rotate_imm<"dsll", shamt, GPR64Opnd, shl, immZExt6>,
+def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, shl, immZExt6>,
SRA_FM<0x38, 0>;
-def DSRL : shift_rotate_imm<"dsrl", shamt, GPR64Opnd, srl, immZExt6>,
+def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, srl, immZExt6>,
SRA_FM<0x3a, 0>;
-def DSRA : shift_rotate_imm<"dsra", shamt, GPR64Opnd, sra, immZExt6>,
+def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, sra, immZExt6>,
SRA_FM<0x3b, 0>;
def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, shl>, SRLV_FM<0x14, 0>;
def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, srl>, SRLV_FM<0x16, 0>;
def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, sra>, SRLV_FM<0x17, 0>;
-def DSLL32 : shift_rotate_imm<"dsll32", shamt, GPR64Opnd>, SRA_FM<0x3c, 0>;
-def DSRL32 : shift_rotate_imm<"dsrl32", shamt, GPR64Opnd>, SRA_FM<0x3e, 0>;
-def DSRA32 : shift_rotate_imm<"dsra32", shamt, GPR64Opnd>, SRA_FM<0x3f, 0>;
+def DSLL32 : shift_rotate_imm<"dsll32", uimm5, GPR64Opnd>, SRA_FM<0x3c, 0>;
+def DSRL32 : shift_rotate_imm<"dsrl32", uimm5, GPR64Opnd>, SRA_FM<0x3e, 0>;
+def DSRA32 : shift_rotate_imm<"dsra32", uimm5, GPR64Opnd>, SRA_FM<0x3f, 0>;
// Rotate Instructions
let Predicates = [HasMips64r2, HasStdEnc] in {
- def DROTR : shift_rotate_imm<"drotr", shamt, GPR64Opnd, rotr, immZExt6>,
+ def DROTR : shift_rotate_imm<"drotr", uimm6, GPR64Opnd, rotr, immZExt6>,
SRA_FM<0x3a, 1>;
def DROTRV : shift_rotate_reg<"drotrv", GPR64Opnd, rotr>,
SRLV_FM<0x16, 1>;
+ def DROTR32 : shift_rotate_imm<"drotr32", uimm5, GPR64Opnd>, SRA_FM<0x3e, 1>;
}
/// Load and Store Instructions
/// aligned
let isCodeGenOnly = 1 in {
-defm LB64 : LoadM<"lb", GPR64Opnd, sextloadi8, IILoad>, LW_FM<0x20>;
-defm LBu64 : LoadM<"lbu", GPR64Opnd, zextloadi8, IILoad>, LW_FM<0x24>;
-defm LH64 : LoadM<"lh", GPR64Opnd, sextloadi16, IILoad>, LW_FM<0x21>;
-defm LHu64 : LoadM<"lhu", GPR64Opnd, zextloadi16, IILoad>, LW_FM<0x25>;
-defm LW64 : LoadM<"lw", GPR64Opnd, sextloadi32, IILoad>, LW_FM<0x23>;
-defm SB64 : StoreM<"sb", GPR64Opnd, truncstorei8, IIStore>, LW_FM<0x28>;
-defm SH64 : StoreM<"sh", GPR64Opnd, truncstorei16, IIStore>, LW_FM<0x29>;
-defm SW64 : StoreM<"sw", GPR64Opnd, truncstorei32, IIStore>, LW_FM<0x2b>;
+def LB64 : Load<"lb", GPR64Opnd, sextloadi8, IILoad>, LW_FM<0x20>;
+def LBu64 : Load<"lbu", GPR64Opnd, zextloadi8, IILoad>, LW_FM<0x24>;
+def LH64 : Load<"lh", GPR64Opnd, sextloadi16, IILoad>, LW_FM<0x21>;
+def LHu64 : Load<"lhu", GPR64Opnd, zextloadi16, IILoad>, LW_FM<0x25>;
+def LW64 : Load<"lw", GPR64Opnd, sextloadi32, IILoad>, LW_FM<0x23>;
+def SB64 : Store<"sb", GPR64Opnd, truncstorei8, IIStore>, LW_FM<0x28>;
+def SH64 : Store<"sh", GPR64Opnd, truncstorei16, IIStore>, LW_FM<0x29>;
+def SW64 : Store<"sw", GPR64Opnd, truncstorei32, IIStore>, LW_FM<0x2b>;
}
-defm LWu : LoadM<"lwu", GPR64Opnd, zextloadi32, IILoad>, LW_FM<0x27>;
-defm LD : LoadM<"ld", GPR64Opnd, load, IILoad>, LW_FM<0x37>;
-defm SD : StoreM<"sd", GPR64Opnd, store, IIStore>, LW_FM<0x3f>;
+def LWu : Load<"lwu", GPR64Opnd, zextloadi32, IILoad>, LW_FM<0x27>;
+def LD : Load<"ld", GPR64Opnd, load, IILoad>, LW_FM<0x37>;
+def SD : Store<"sd", GPR64Opnd, store, IIStore>, LW_FM<0x3f>;
/// load/store left/right
let isCodeGenOnly = 1 in {
-defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, GPR64Opnd>, LW_FM<0x22>;
-defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, GPR64Opnd>, LW_FM<0x26>;
-defm SWL64 : StoreLeftRightM<"swl", MipsSWL, GPR64Opnd>, LW_FM<0x2a>;
-defm SWR64 : StoreLeftRightM<"swr", MipsSWR, GPR64Opnd>, LW_FM<0x2e>;
+def LWL64 : LoadLeftRight<"lwl", MipsLWL, GPR64Opnd, IILoad>, LW_FM<0x22>;
+def LWR64 : LoadLeftRight<"lwr", MipsLWR, GPR64Opnd, IILoad>, LW_FM<0x26>;
+def SWL64 : StoreLeftRight<"swl", MipsSWL, GPR64Opnd, IIStore>, LW_FM<0x2a>;
+def SWR64 : StoreLeftRight<"swr", MipsSWR, GPR64Opnd, IIStore>, LW_FM<0x2e>;
}
-defm LDL : LoadLeftRightM<"ldl", MipsLDL, GPR64Opnd>, LW_FM<0x1a>;
-defm LDR : LoadLeftRightM<"ldr", MipsLDR, GPR64Opnd>, LW_FM<0x1b>;
-defm SDL : StoreLeftRightM<"sdl", MipsSDL, GPR64Opnd>, LW_FM<0x2c>;
-defm SDR : StoreLeftRightM<"sdr", MipsSDR, GPR64Opnd>, LW_FM<0x2d>;
+def LDL : LoadLeftRight<"ldl", MipsLDL, GPR64Opnd, IILoad>, LW_FM<0x1a>;
+def LDR : LoadLeftRight<"ldr", MipsLDR, GPR64Opnd, IILoad>, LW_FM<0x1b>;
+def SDL : StoreLeftRight<"sdl", MipsSDL, GPR64Opnd, IIStore>, LW_FM<0x2c>;
+def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, IIStore>, LW_FM<0x2d>;
/// Load-linked, Store-conditional
-let Predicates = [NotN64, HasStdEnc] in {
- def LLD : LLBase<"lld", GPR64Opnd, mem>, LW_FM<0x34>;
- def SCD : SCBase<"scd", GPR64Opnd, mem>, LW_FM<0x3c>;
-}
-
-let Predicates = [IsN64, HasStdEnc], isCodeGenOnly = 1 in {
- def LLD_P8 : LLBase<"lld", GPR64Opnd, mem64>, LW_FM<0x34>;
- def SCD_P8 : SCBase<"scd", GPR64Opnd, mem64>, LW_FM<0x3c>;
-}
+def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>;
+def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>;
/// Jump and Branch Instructions
let isCodeGenOnly = 1 in {
-def JR64 : IndirectBranch<GPR64Opnd>, MTLO_FM<8>;
-def BEQ64 : CBranch<"beq", seteq, GPR64Opnd>, BEQ_FM<4>;
-def BNE64 : CBranch<"bne", setne, GPR64Opnd>, BEQ_FM<5>;
-def BGEZ64 : CBranchZero<"bgez", setge, GPR64Opnd>, BGEZ_FM<1, 1>;
-def BGTZ64 : CBranchZero<"bgtz", setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
-def BLEZ64 : CBranchZero<"blez", setle, GPR64Opnd>, BGEZ_FM<6, 0>;
-def BLTZ64 : CBranchZero<"bltz", setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
+def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>;
+def BEQ64 : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>;
+def BNE64 : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>;
+def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>;
+def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
+def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>;
+def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM;
def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
-def TAILCALL64_R : JumpFR<GPR64Opnd, MipsTailCall>, MTLO_FM<8>, IsTailCall;
+def TAILCALL64_R : JumpFR<"tcallr", GPR64Opnd, MipsTailCall>,
+ MTLO_FM<8>, IsTailCall;
}
/// Multiply and Divide Instructions.
-def DMULT : Mult<"dmult", IIImult, GPR64Opnd, [HI64, LO64]>,
+def DMULT : Mult<"dmult", IIImult, GPR64Opnd, [HI0_64, LO0_64]>,
MULT_FM<0, 0x1c>;
-def DMULTu : Mult<"dmultu", IIImult, GPR64Opnd, [HI64, LO64]>,
+def DMULTu : Mult<"dmultu", IIImult, GPR64Opnd, [HI0_64, LO0_64]>,
MULT_FM<0, 0x1d>;
-def PseudoDMULT : MultDivPseudo<DMULT, ACRegs128, GPR64Opnd, MipsMult,
+def PseudoDMULT : MultDivPseudo<DMULT, ACC128, GPR64Opnd, MipsMult,
IIImult>;
-def PseudoDMULTu : MultDivPseudo<DMULTu, ACRegs128, GPR64Opnd, MipsMultu,
+def PseudoDMULTu : MultDivPseudo<DMULTu, ACC128, GPR64Opnd, MipsMultu,
IIImult>;
-def DSDIV : Div<"ddiv", IIIdiv, GPR64Opnd, [HI64, LO64]>, MULT_FM<0, 0x1e>;
-def DUDIV : Div<"ddivu", IIIdiv, GPR64Opnd, [HI64, LO64]>, MULT_FM<0, 0x1f>;
-def PseudoDSDIV : MultDivPseudo<DSDIV, ACRegs128, GPR64Opnd, MipsDivRem,
+def DSDIV : Div<"ddiv", IIIdiv, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1e>;
+def DUDIV : Div<"ddivu", IIIdiv, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1f>;
+def PseudoDSDIV : MultDivPseudo<DSDIV, ACC128, GPR64Opnd, MipsDivRem,
IIIdiv, 0, 1, 1>;
-def PseudoDUDIV : MultDivPseudo<DUDIV, ACRegs128, GPR64Opnd, MipsDivRemU,
+def PseudoDUDIV : MultDivPseudo<DUDIV, ACC128, GPR64Opnd, MipsDivRemU,
IIIdiv, 0, 1, 1>;
let isCodeGenOnly = 1 in {
-def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI64]>, MTLO_FM<0x11>;
-def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO64]>, MTLO_FM<0x13>;
-def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, [HI64]>, MFLO_FM<0x10>;
-def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, [LO64]>, MFLO_FM<0x12>;
+def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI0_64]>, MTLO_FM<0x11>;
+def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO0_64]>, MTLO_FM<0x13>;
+def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, AC0_64>, MFLO_FM<0x10>;
+def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, AC0_64>, MFLO_FM<0x12>;
+def PseudoMFHI64 : PseudoMFLOHI<GPR64, ACC128, MipsMFHI>;
+def PseudoMFLO64 : PseudoMFLOHI<GPR64, ACC128, MipsMFLO>;
+def PseudoMTLOHI64 : PseudoMTLOHI<ACC128, GPR64>;
/// Sign Ext In Register Instructions.
def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd>, SEB_FM<0x10, 0x20>;
@@ -221,21 +201,18 @@ def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>;
def DSBH : SubwordSwap<"dsbh", GPR64Opnd>, SEB_FM<2, 0x24>;
def DSHD : SubwordSwap<"dshd", GPR64Opnd>, SEB_FM<5, 0x24>;
-def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd, mem_ea_64>, LW_FM<0x19>;
+def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd>, LW_FM<0x19>;
let isCodeGenOnly = 1 in
-def RDHWR64 : ReadHardware<GPR64Opnd, HW64RegsOpnd>, RDHWR_FM;
+def RDHWR64 : ReadHardware<GPR64Opnd, HWRegsOpnd>, RDHWR_FM;
-def DEXT : ExtBase<"dext", GPR64Opnd>, EXT_FM<3>;
-let Pattern = []<dag> in {
- def DEXTU : ExtBase<"dextu", GPR64Opnd>, EXT_FM<2>;
- def DEXTM : ExtBase<"dextm", GPR64Opnd>, EXT_FM<1>;
-}
-def DINS : InsBase<"dins", GPR64Opnd>, EXT_FM<7>;
-let Pattern = []<dag> in {
- def DINSU : InsBase<"dinsu", GPR64Opnd>, EXT_FM<6>;
- def DINSM : InsBase<"dinsm", GPR64Opnd>, EXT_FM<5>;
-}
+def DEXT : ExtBase<"dext", GPR64Opnd, uimm6, MipsExt>, EXT_FM<3>;
+def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm6>, EXT_FM<2>;
+def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5>, EXT_FM<1>;
+
+def DINS : InsBase<"dins", GPR64Opnd, uimm6, MipsIns>, EXT_FM<7>;
+def DINSU : InsBase<"dinsu", GPR64Opnd, uimm6>, EXT_FM<6>;
+def DINSM : InsBase<"dinsm", GPR64Opnd, uimm5>, EXT_FM<5>;
let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
def DSLL64_32 : FR<0x00, 0x3c, (outs GPR64:$rd), (ins GPR32:$rt),
@@ -251,18 +228,12 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
//===----------------------------------------------------------------------===//
// extended loads
-let Predicates = [NotN64, HasStdEnc] in {
+let Predicates = [HasStdEnc] in {
def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>;
def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>;
def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>;
def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>;
}
-let Predicates = [IsN64, HasStdEnc] in {
- def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>;
- def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>;
- def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64_P8 addr:$src)>;
- def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64_P8 addr:$src)>;
-}
// hi/lo relocs
def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
@@ -316,7 +287,7 @@ defm : SetgeImmPats<GPR64, SLTi64, SLTiu64>;
// truncate
def : MipsPat<(i32 (trunc GPR64:$src)),
(SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>,
- Requires<[IsN64, HasStdEnc]>;
+ Requires<[HasStdEnc]>;
// 32-to-64-bit extension
def : MipsPat<(i64 (anyext GPR32:$src)), (SLL64_32 GPR32:$src)>;
@@ -330,10 +301,6 @@ def : MipsPat<(i64 (sext_inreg GPR64:$src, i32)),
// bswap MipsPattern
def : MipsPat<(bswap GPR64:$rt), (DSHD (DSBH GPR64:$rt))>;
-// mflo/hi patterns.
-def : MipsPat<(i64 (ExtractLOHI ACRegs128:$ac, imm:$lohi_idx)),
- (EXTRACT_SUBREG ACRegs128:$ac, imm:$lohi_idx)>;
-
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
@@ -348,28 +315,16 @@ def : InstAlias<"dadd $rs, $rt, $imm",
0>;
/// Move between CPU and coprocessor registers
-let DecoderNamespace = "Mips64" in {
-def DMFC0_3OP64 : MFC3OP<(outs GPR64Opnd:$rt),
- (ins GPR64Opnd:$rd, uimm16:$sel),
- "dmfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 1>;
-def DMTC0_3OP64 : MFC3OP<(outs GPR64Opnd:$rd, uimm16:$sel),
- (ins GPR64Opnd:$rt),
- "dmtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 5>;
-def DMFC2_3OP64 : MFC3OP<(outs GPR64Opnd:$rt),
- (ins GPR64Opnd:$rd, uimm16:$sel),
- "dmfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 1>;
-def DMTC2_3OP64 : MFC3OP<(outs GPR64Opnd:$rd, uimm16:$sel),
- (ins GPR64Opnd:$rt),
- "dmtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 5>;
+let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
+def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>;
+def DMTC0 : MFC3OP<"dmtc0", GPR64Opnd>, MFC3OP_FM<0x10, 5>;
+def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd>, MFC3OP_FM<0x12, 1>;
+def DMTC2 : MFC3OP<"dmtc2", GPR64Opnd>, MFC3OP_FM<0x12, 5>;
}
// Two operand (implicit 0 selector) versions:
-def : InstAlias<"dmfc0 $rt, $rd",
- (DMFC0_3OP64 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : InstAlias<"dmtc0 $rt, $rd",
- (DMTC0_3OP64 GPR64Opnd:$rd, 0, GPR64Opnd:$rt), 0>;
-def : InstAlias<"dmfc2 $rt, $rd",
- (DMFC2_3OP64 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : InstAlias<"dmtc2 $rt, $rd",
- (DMTC2_3OP64 GPR64Opnd:$rd, 0, GPR64Opnd:$rt), 0>;
+def : InstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+def : InstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+def : InstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+def : InstAlias<"dmtc2 $rt, $rd", (DMTC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
index a094dda..cc09034 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.h
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -22,7 +22,7 @@ namespace llvm {
};
typedef SmallVector<Inst, 7 > InstSeq;
- /// Analyze - Get an instrucion sequence to load immediate Imm. The last
+ /// Analyze - Get an instruction sequence to load immediate Imm. The last
/// instruction in the sequence must be an ADDiu if LastInstrIsADDiu is
/// true;
const InstSeq &Analyze(uint64_t Imm, unsigned Size, bool LastInstrIsADDiu);
@@ -32,19 +32,19 @@ namespace llvm {
/// AddInstr - Add I to all instruction sequences in SeqLs.
void AddInstr(InstSeqLs &SeqLs, const Inst &I);
- /// GetInstSeqLsADDiu - Get instrucion sequences which end with an ADDiu to
+ /// GetInstSeqLsADDiu - Get instruction sequences which end with an ADDiu to
/// load immediate Imm
void GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
- /// GetInstSeqLsORi - Get instrucion sequences which end with an ORi to
+ /// GetInstSeqLsORi - Get instrutcion sequences which end with an ORi to
/// load immediate Imm
void GetInstSeqLsORi(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
- /// GetInstSeqLsSLL - Get instrucion sequences which end with a SLL to
+ /// GetInstSeqLsSLL - Get instruction sequences which end with a SLL to
/// load immediate Imm
void GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
- /// GetInstSeqLs - Get instrucion sequences to load immediate Imm.
+ /// GetInstSeqLs - Get instruction sequences to load immediate Imm.
void GetInstSeqLs(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
/// ReplaceADDiuSLLWithLUi - Replace an ADDiu & SLL pair with a LUi.
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 1dc3326..45c4398 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -15,11 +15,11 @@
#define DEBUG_TYPE "mips-asm-printer"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
-#include "MCTargetDesc/MipsELFStreamer.h"
#include "Mips.h"
#include "MipsAsmPrinter.h"
#include "MipsInstrInfo.h"
#include "MipsMCInstLower.h"
+#include "MipsTargetStreamer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
@@ -33,8 +33,8 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/TargetRegistry.h"
@@ -45,12 +45,17 @@
using namespace llvm;
+MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() {
+ return static_cast<MipsTargetStreamer &>(OutStreamer.getTargetStreamer());
+}
+
bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Initialize TargetLoweringObjectFile.
if (Subtarget->allowMixed16_32())
const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
.Initialize(OutContext, TM);
MipsFI = MF.getInfo<MipsFunctionInfo>();
+ MCP = MF.getConstantPool();
AsmPrinter::runOnMachineFunction(MF);
return true;
}
@@ -71,6 +76,39 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ // If we just ended a constant pool, mark it as such.
+ if (InConstantPool && MI->getOpcode() != Mips::CONSTPOOL_ENTRY) {
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+ InConstantPool = false;
+ }
+ if (MI->getOpcode() == Mips::CONSTPOOL_ENTRY) {
+ // CONSTPOOL_ENTRY - This instruction represents a floating
+ //constant pool in the function. The first operand is the ID#
+ // for this instruction, the second is the index into the
+ // MachineConstantPool that this is, the third is the size in
+ // bytes of this constant pool entry.
+ // The required alignment is specified on the basic block holding this MI.
+ //
+ unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
+ unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
+
+ // If this is the first entry of the pool, mark it.
+ if (!InConstantPool) {
+ OutStreamer.EmitDataRegion(MCDR_DataRegion);
+ InConstantPool = true;
+ }
+
+ OutStreamer.EmitLabel(GetCPISymbol(LabelId));
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
+ if (MCPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(MCPE.Val.ConstVal);
+ return;
+ }
+
+
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
@@ -238,16 +276,15 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
}
if (Subtarget->inMicroMipsMode())
- if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer))
- MES->emitMipsSTOCG(*Subtarget, CurrentFnSym,
- (unsigned)ELF::STO_MIPS_MICROMIPS);
+ getTargetStreamer().emitMipsHackSTOCG(CurrentFnSym,
+ (unsigned)ELF::STO_MIPS_MICROMIPS);
OutStreamer.EmitLabel(CurrentFnSym);
}
/// EmitFunctionBodyStart - Targets can override this to emit stuff before
/// the first basic block in the function.
void MipsAsmPrinter::EmitFunctionBodyStart() {
- MCInstLowering.Initialize(Mang, &MF->getContext());
+ MCInstLowering.Initialize(&MF->getContext());
bool IsNakedFunction =
MF->getFunction()->
@@ -284,6 +321,12 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
}
OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
}
+ // Make sure to terminate any constant pools that were at the end
+ // of the function.
+ if (!InConstantPool)
+ return;
+ InConstantPool = false;
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
}
/// isBlockOnlyReachableByFallthough - Return true if the basic block has
@@ -418,6 +461,11 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
}
+ case 'w':
+ // Print MSA registers for the 'f' constraint
+ // In LLVM, the 'w' modifier doesn't need to do anything.
+ // We can just call printOperand as normal.
+ break;
}
}
@@ -485,7 +533,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
return;
case MachineOperand::MO_GlobalAddress:
- O << *Mang->getSymbol(MO.getGlobal());
+ O << *getSymbol(MO.getGlobal());
break;
case MachineOperand::MO_BlockAddress: {
@@ -526,6 +574,15 @@ void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
printOperand(MI, opNum, O);
}
+void MipsAsmPrinter::printUnsignedImm8(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (unsigned short int)(unsigned char)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
void MipsAsmPrinter::
printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) {
// Load/Store memory operands -- imm($reg)
@@ -587,15 +644,54 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
}
-void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
+static void emitELFHeaderFlagsCG(MipsTargetStreamer &TargetStreamer,
+ const MipsSubtarget &Subtarget) {
+ // Update e_header flags
+ unsigned EFlags = 0;
- if (OutStreamer.hasRawTextSupport()) return;
+ // TODO: Need to add -mabicalls and -mno-abicalls flags.
+ // Currently we assume that -mabicalls is the default.
+ EFlags |= ELF::EF_MIPS_CPIC;
+ if (Subtarget.inMips16Mode())
+ EFlags |= ELF::EF_MIPS_ARCH_ASE_M16;
+ else
+ EFlags |= ELF::EF_MIPS_NOREORDER;
+
+ // Architecture
+ if (Subtarget.hasMips64r2())
+ EFlags |= ELF::EF_MIPS_ARCH_64R2;
+ else if (Subtarget.hasMips64())
+ EFlags |= ELF::EF_MIPS_ARCH_64;
+ else if (Subtarget.hasMips32r2())
+ EFlags |= ELF::EF_MIPS_ARCH_32R2;
+ else
+ EFlags |= ELF::EF_MIPS_ARCH_32;
+
+ if (Subtarget.inMicroMipsMode())
+ EFlags |= ELF::EF_MIPS_MICROMIPS;
+
+ // ABI
+ if (Subtarget.isABI_O32())
+ EFlags |= ELF::EF_MIPS_ABI_O32;
+
+ // Relocation Model
+ Reloc::Model RM = Subtarget.getRelocationModel();
+ if (RM == Reloc::PIC_ || RM == Reloc::Default)
+ EFlags |= ELF::EF_MIPS_PIC;
+ else if (RM == Reloc::Static)
+ ; // Do nothing for Reloc::Static
+ else
+ llvm_unreachable("Unsupported relocation model for e_flags");
+
+ TargetStreamer.emitMipsHackELFFlags(EFlags);
+}
+
+void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
// Emit Mips ELF register info
Subtarget->getMReginfo().emitMipsReginfoSectionCG(
OutStreamer, getObjFileLowering(), *Subtarget);
- if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer))
- MES->emitELFHeaderFlagsCG(*Subtarget);
+ emitELFHeaderFlagsCG(getTargetStreamer(), *Subtarget);
}
void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 4d1d624..11c6acd 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -25,10 +25,12 @@ namespace llvm {
class MCStreamer;
class MachineInstr;
class MachineBasicBlock;
+class MipsTargetStreamer;
class Module;
class raw_ostream;
class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
+ MipsTargetStreamer &getTargetStreamer();
void EmitInstrWithMacroNoAT(const MachineInstr *MI);
@@ -40,6 +42,16 @@ private:
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+ /// MCP - Keep a pointer to constantpool entries of the current
+ /// MachineFunction.
+ const MachineConstantPool *MCP;
+
+ /// InConstantPool - Maintain state when emitting a sequence of constant
+ /// pool entries so we can properly mark them as data regions.
+ bool InConstantPool;
+
+ bool UsingConstantPools;
+
public:
const MipsSubtarget *Subtarget;
@@ -47,8 +59,11 @@ public:
MipsMCInstLower MCInstLowering;
explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), MCInstLowering(*this) {
+ : AsmPrinter(TM, Streamer), MCP(0), InConstantPool(false),
+ MCInstLowering(*this) {
Subtarget = &TM.getSubtarget<MipsSubtarget>();
+ UsingConstantPools =
+ (Subtarget->inMips16Mode() && Subtarget->useConstantIslands());
}
virtual const char *getPassName() const {
@@ -57,6 +72,12 @@ public:
virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual void EmitConstantPool() LLVM_OVERRIDE {
+ if (!UsingConstantPools)
+ AsmPrinter::EmitConstantPool();
+ // we emit constant pools customly!
+ }
+
void EmitInstruction(const MachineInstr *MI);
void printSavedRegsBitmask(raw_ostream &O);
void printHex32(unsigned int Value, raw_ostream &O);
@@ -75,6 +96,7 @@ public:
raw_ostream &O);
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printUnsignedImm8(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index ac40b11..66391cb 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -26,8 +26,10 @@ def RetCC_MipsO32 : CallingConv<[
// f32 are returned in registers F0, F2
CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
- // f64 are returned in register D0, D1
- CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0, D1]>>>
+ // f64 arguments are returned in D0_64 and D1_64 in FP64bit mode or
+ // in D0 and D1 in FP32bit mode.
+ CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCAssignToReg<[D0_64, D1_64]>>>,
+ CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()", CCAssignToReg<[D0, D1]>>>
]>;
//===----------------------------------------------------------------------===//
@@ -149,7 +151,16 @@ def RetCC_MipsEABI : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_MipsO32_FastCC : CallingConv<[
// f64 arguments are passed in double-precision floating pointer registers.
- CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7, D8, D9]>>,
+ CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()",
+ CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7,
+ D8, D9]>>>,
+ CCIfType<[f64], CCIfSubtarget<"isFP64bit()",
+ CCAssignToReg<[D0_64, D1_64, D2_64, D3_64,
+ D4_64, D5_64, D6_64, D7_64,
+ D8_64, D9_64, D10_64, D11_64,
+ D12_64, D13_64, D14_64, D15_64,
+ D16_64, D17_64, D18_64,
+ D19_64]>>>,
// Stack parameter slots for f64 are 64-bit doublewords and 8-byte aligned.
CCIfType<[f64], CCAssignToStack<8, 8>>
@@ -224,6 +235,9 @@ def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP,
def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
(sequence "S%u", 7, 0))>;
+def CSR_O32_FP64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 20), RA, FP,
+ (sequence "S%u", 7, 0))>;
+
def CSR_N32 : CalleeSavedRegs<(add D31_64, D29_64, D27_64, D25_64, D24_64,
D23_64, D22_64, D21_64, RA_64, FP_64, GP_64,
(sequence "S%u_64", 7, 0))>;
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 813037e..ca4163d 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -105,11 +105,16 @@ private:
const MachineOperand &MO) const;
unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getJumpTargetOpValueMM(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getBranchTargetOpValueMM(const MachineInstr &MI,
+ unsigned OpNo) const;
unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemEncodingMMImm12(const MachineInstr &MI, unsigned OpNo) const;
unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getLSAImmEncoding(const MachineInstr &MI, unsigned OpNo) const;
void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
int Offset) const;
@@ -186,6 +191,18 @@ unsigned MipsCodeEmitter::getJumpTargetOpValue(const MachineInstr &MI,
return 0;
}
+unsigned MipsCodeEmitter::getJumpTargetOpValueMM(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
+unsigned MipsCodeEmitter::getBranchTargetOpValueMM(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI,
unsigned OpNo) const {
MachineOperand MO = MI.getOperand(OpNo);
@@ -201,6 +218,12 @@ unsigned MipsCodeEmitter::getMemEncoding(const MachineInstr &MI,
return (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits;
}
+unsigned MipsCodeEmitter::getMemEncodingMMImm12(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI,
unsigned OpNo) const {
// size is encoded as size-1.
@@ -214,6 +237,12 @@ unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI,
getMachineOpValue(MI, MI.getOperand(OpNo)) - 1;
}
+unsigned MipsCodeEmitter::getLSAImmEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
@@ -316,6 +345,14 @@ bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI,
BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO)
.addReg(Mips::ZERO).addImm(0);
break;
+ case Mips::B:
+ BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BEQ)).addReg(Mips::ZERO)
+ .addReg(Mips::ZERO).addOperand(MI->getOperand(0));
+ break;
+ case Mips::TRAP:
+ BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BREAK)).addImm(0)
+ .addImm(0);
+ break;
case Mips::JALRPseudo:
BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA)
.addReg(MI->getOperand(0).getReg());
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index 39862b3..2de1430 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -19,7 +19,7 @@
class CMov_I_I_FT<string opstr, RegisterOperand CRC, RegisterOperand DRC,
InstrItinClass Itin> :
InstSE<(outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F),
- !strconcat(opstr, "\t$rd, $rs, $rt"), [], Itin, FrmFR> {
+ !strconcat(opstr, "\t$rd, $rs, $rt"), [], Itin, FrmFR, opstr> {
let Constraints = "$F = $rd";
}
@@ -37,7 +37,7 @@ class CMov_F_I_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
InstSE<(outs RC:$rd), (ins RC:$rs, FCCRegsOpnd:$fcc, RC:$F),
!strconcat(opstr, "\t$rd, $rs, $fcc"),
[(set RC:$rd, (OpNode RC:$rs, FCCRegsOpnd:$fcc, RC:$F))],
- Itin, FrmFR> {
+ Itin, FrmFR, opstr> {
let Constraints = "$F = $rd";
}
@@ -103,91 +103,94 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
}
// Instantiation of instructions.
-def MOVZ_I_I : CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd, NoItinerary>,
+def MOVZ_I_I : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd, IIArith>,
ADD_FM<0, 0xa>;
let Predicates = [HasStdEnc], isCodeGenOnly = 1 in {
- def MOVZ_I_I64 : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd,
- NoItinerary>, ADD_FM<0, 0xa>;
- def MOVZ_I64_I : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd,
- NoItinerary>, ADD_FM<0, 0xa>;
- def MOVZ_I64_I64 : CMov_I_I_FT<"movz", GPR64Opnd, GPR64Opnd,
- NoItinerary>, ADD_FM<0, 0xa>;
+ def MOVZ_I_I64 : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd, IIArith>,
+ ADD_FM<0, 0xa>;
+ def MOVZ_I64_I : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd, IIArith>,
+ ADD_FM<0, 0xa>;
+ def MOVZ_I64_I64 : CMov_I_I_FT<"movz", GPR64Opnd, GPR64Opnd, IIArith>,
+ ADD_FM<0, 0xa>;
}
-def MOVN_I_I : CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd,
- NoItinerary>, ADD_FM<0, 0xb>;
+def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, IIArith>,
+ ADD_FM<0, 0xb>;
let Predicates = [HasStdEnc], isCodeGenOnly = 1 in {
- def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd,
- NoItinerary>, ADD_FM<0, 0xb>;
- def MOVN_I64_I : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd,
- NoItinerary>, ADD_FM<0, 0xb>;
- def MOVN_I64_I64 : CMov_I_I_FT<"movn", GPR64Opnd, GPR64Opnd,
- NoItinerary>, ADD_FM<0, 0xb>;
+ def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, IIArith>,
+ ADD_FM<0, 0xb>;
+ def MOVN_I64_I : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd, IIArith>,
+ ADD_FM<0, 0xb>;
+ def MOVN_I64_I64 : CMov_I_I_FT<"movn", GPR64Opnd, GPR64Opnd, IIArith>,
+ ADD_FM<0, 0xb>;
}
-def MOVZ_I_S : CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32RegsOpnd, IIFmove>,
+def MOVZ_I_S : CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, IIFmove>,
CMov_I_F_FM<18, 16>;
let isCodeGenOnly = 1 in
-def MOVZ_I64_S : CMov_I_F_FT<"movz.s", GPR64Opnd, FGR32RegsOpnd, IIFmove>,
+def MOVZ_I64_S : CMov_I_F_FT<"movz.s", GPR64Opnd, FGR32Opnd, IIFmove>,
CMov_I_F_FM<18, 16>, Requires<[HasMips64, HasStdEnc]>;
-def MOVN_I_S : CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32RegsOpnd, IIFmove>,
+def MOVN_I_S : CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, IIFmove>,
CMov_I_F_FM<19, 16>;
let isCodeGenOnly = 1 in
-def MOVN_I64_S : CMov_I_F_FT<"movn.s", GPR64Opnd, FGR32RegsOpnd, IIFmove>,
+def MOVN_I64_S : CMov_I_F_FT<"movn.s", GPR64Opnd, FGR32Opnd, IIFmove>,
CMov_I_F_FM<19, 16>, Requires<[HasMips64, HasStdEnc]>;
let Predicates = [NotFP64bit, HasStdEnc] in {
- def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64RegsOpnd, IIFmove>,
+ def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd, IIFmove>,
CMov_I_F_FM<18, 17>;
- def MOVN_I_D32 : CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64RegsOpnd, IIFmove>,
+ def MOVN_I_D32 : CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd, IIFmove>,
CMov_I_F_FM<19, 17>;
}
-let Predicates = [IsFP64bit, HasStdEnc], isCodeGenOnly = 1 in {
- def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64RegsOpnd, IIFmove>,
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64Opnd, IIFmove>,
CMov_I_F_FM<18, 17>;
- def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64RegsOpnd,
- IIFmove>, CMov_I_F_FM<18, 17>;
- def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64RegsOpnd, IIFmove>,
+ def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64Opnd, IIFmove>,
CMov_I_F_FM<19, 17>;
- def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64RegsOpnd,
- IIFmove>, CMov_I_F_FM<19, 17>;
+ let isCodeGenOnly = 1 in {
+ def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd,
+ IIFmove>, CMov_I_F_FM<18, 17>;
+ def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd,
+ IIFmove>, CMov_I_F_FM<19, 17>;
+ }
}
-def MOVT_I : CMov_F_I_FT<"movt", GPR32Opnd, IIArith, MipsCMovFP_T>,
+def MOVT_I : MMRel, CMov_F_I_FT<"movt", GPR32Opnd, IIArith, MipsCMovFP_T>,
CMov_F_I_FM<1>;
let isCodeGenOnly = 1 in
def MOVT_I64 : CMov_F_I_FT<"movt", GPR64Opnd, IIArith, MipsCMovFP_T>,
CMov_F_I_FM<1>, Requires<[HasMips64, HasStdEnc]>;
-def MOVF_I : CMov_F_I_FT<"movf", GPR32Opnd, IIArith, MipsCMovFP_F>,
+def MOVF_I : MMRel, CMov_F_I_FT<"movf", GPR32Opnd, IIArith, MipsCMovFP_F>,
CMov_F_I_FM<0>;
let isCodeGenOnly = 1 in
def MOVF_I64 : CMov_F_I_FT<"movf", GPR64Opnd, IIArith, MipsCMovFP_F>,
CMov_F_I_FM<0>, Requires<[HasMips64, HasStdEnc]>;
-def MOVT_S : CMov_F_F_FT<"movt.s", FGR32RegsOpnd, IIFmove, MipsCMovFP_T>,
+def MOVT_S : CMov_F_F_FT<"movt.s", FGR32Opnd, IIFmove, MipsCMovFP_T>,
CMov_F_F_FM<16, 1>;
-def MOVF_S : CMov_F_F_FT<"movf.s", FGR32RegsOpnd, IIFmove, MipsCMovFP_F>,
+def MOVF_S : CMov_F_F_FT<"movf.s", FGR32Opnd, IIFmove, MipsCMovFP_F>,
CMov_F_F_FM<16, 0>;
let Predicates = [NotFP64bit, HasStdEnc] in {
- def MOVT_D32 : CMov_F_F_FT<"movt.d", AFGR64RegsOpnd, IIFmove, MipsCMovFP_T>,
+ def MOVT_D32 : CMov_F_F_FT<"movt.d", AFGR64Opnd, IIFmove, MipsCMovFP_T>,
CMov_F_F_FM<17, 1>;
- def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64RegsOpnd, IIFmove, MipsCMovFP_F>,
+ def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64Opnd, IIFmove, MipsCMovFP_F>,
CMov_F_F_FM<17, 0>;
}
-let Predicates = [IsFP64bit, HasStdEnc], isCodeGenOnly = 1 in {
- def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64RegsOpnd, IIFmove, MipsCMovFP_T>,
+
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64Opnd, IIFmove, MipsCMovFP_T>,
CMov_F_F_FM<17, 1>;
- def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64RegsOpnd, IIFmove, MipsCMovFP_F>,
+ def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64Opnd, IIFmove, MipsCMovFP_F>,
CMov_F_F_FM<17, 0>;
}
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index bda0167..c46bbac 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -9,9 +9,7 @@
//
//
// This pass is used to make Pc relative loads of constants.
-// For now, only Mips16 will use this. While it has the same name and
-// uses many ideas from the LLVM ARM Constant Island Pass, it's not intended
-// to reuse any of the code from the ARM version.
+// For now, only Mips16 will use this.
//
// Loading constants inline is expensive on Mips16 and it's in general better
// to place the constant nearby in code space and then it can be loaded with a
@@ -27,31 +25,244 @@
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "Mips16InstrInfo.h"
+#include "MipsMachineFunction.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Format.h"
+#include <algorithm>
using namespace llvm;
+STATISTIC(NumCPEs, "Number of constpool entries");
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+
+// FIXME: This option should be removed once it has received sufficient testing.
+static cl::opt<bool>
+AlignConstantIslands("mips-align-constant-islands", cl::Hidden, cl::init(true),
+ cl::desc("Align constant islands in code"));
+
+
+// Rather than do make check tests with huge amounts of code, we force
+// the test to use this amount.
+//
+static cl::opt<int> ConstantIslandsSmallOffset(
+ "mips-constant-islands-small-offset",
+ cl::init(0),
+ cl::desc("Make small offsets be this amount for testing purposes"),
+ cl::Hidden);
+
+//
+// For testing purposes we tell it to not use relaxed load forms so that it
+// will split blocks.
+//
+static cl::opt<bool> NoLoadRelaxation(
+ "mips-constant-islands-no-load-relaxation",
+ cl::init(false),
+ cl::desc("Don't relax loads to long loads - for testing purposes"),
+ cl::Hidden);
+
+
namespace {
+
+
typedef MachineBasicBlock::iterator Iter;
typedef MachineBasicBlock::reverse_iterator ReverseIter;
+ /// MipsConstantIslands - Due to limited PC-relative displacements, Mips
+ /// requires constant pool entries to be scattered among the instructions
+ /// inside a function. To do this, it completely ignores the normal LLVM
+ /// constant pool; instead, it places constants wherever it feels like with
+ /// special instructions.
+ ///
+ /// The terminology used in this pass includes:
+ /// Islands - Clumps of constants placed in the function.
+ /// Water - Potential places where an island could be formed.
+ /// CPE - A constant pool entry that has been placed somewhere, which
+ /// tracks a list of users.
+
class MipsConstantIslands : public MachineFunctionPass {
+ /// BasicBlockInfo - Information about the offset and size of a single
+ /// basic block.
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// Offsets are computed assuming worst case padding before an aligned
+ /// block. This means that subtracting basic block offsets always gives a
+ /// conservative estimate of the real distance which may be smaller.
+ ///
+ /// Because worst case padding is used, the computed offset of an aligned
+ /// block may not actually be aligned.
+ unsigned Offset;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ // FIXME: ignore LogAlign for this patch
+ //
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ return PO;
+ }
+
+ BasicBlockInfo() : Offset(0), Size(0) {}
+
+ };
+
+ std::vector<BasicBlockInfo> BBInfo;
+
+ /// WaterList - A sorted list of basic blocks where islands could be placed
+ /// (i.e. blocks that don't fall through to the following block, due
+ /// to a return, unreachable, or unconditional branch).
+ std::vector<MachineBasicBlock*> WaterList;
+
+ /// NewWaterList - The subset of WaterList that was created since the
+ /// previous iteration by inserting unconditional branches.
+ SmallSet<MachineBasicBlock*, 4> NewWaterList;
+
+ typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
+
+ /// CPUser - One user of a constant pool, keeping the machine instruction
+ /// pointer, the constant pool being referenced, and the max displacement
+ /// allowed from the instruction to the CP. The HighWaterMark records the
+ /// highest basic block where a new CPEntry can be placed. To ensure this
+ /// pass terminates, the CP entries are initially placed at the end of the
+ /// function and then move monotonically to lower addresses. The
+ /// exception to this rule is when the current CP entry for a particular
+ /// CPUser is out of range, but there is another CP entry for the same
+ /// constant value in range. We want to use the existing in-range CP
+ /// entry, but if it later moves out of range, the search for new water
+ /// should resume where it left off. The HighWaterMark is used to record
+ /// that point.
+ struct CPUser {
+ MachineInstr *MI;
+ MachineInstr *CPEMI;
+ MachineBasicBlock *HighWaterMark;
+ private:
+ unsigned MaxDisp;
+ unsigned LongFormMaxDisp; // mips16 has 16/32 bit instructions
+ // with different displacements
+ unsigned LongFormOpcode;
+ public:
+ bool NegOk;
+ CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
+ bool neg,
+ unsigned longformmaxdisp, unsigned longformopcode)
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp),
+ LongFormMaxDisp(longformmaxdisp), LongFormOpcode(longformopcode),
+ NegOk(neg){
+ HighWaterMark = CPEMI->getParent();
+ }
+ /// getMaxDisp - Returns the maximum displacement supported by MI.
+ unsigned getMaxDisp() const {
+ unsigned xMaxDisp = ConstantIslandsSmallOffset?
+ ConstantIslandsSmallOffset: MaxDisp;
+ return xMaxDisp;
+ }
+ void setMaxDisp(unsigned val) {
+ MaxDisp = val;
+ }
+ unsigned getLongFormMaxDisp() const {
+ return LongFormMaxDisp;
+ }
+ unsigned getLongFormOpcode() const {
+ return LongFormOpcode;
+ }
+ };
+
+ /// CPUsers - Keep track of all of the machine instructions that use various
+ /// constant pools and their max displacement.
+ std::vector<CPUser> CPUsers;
+
+ /// CPEntry - One per constant pool entry, keeping the machine instruction
+ /// pointer, the constpool index, and the number of CPUser's which
+ /// reference this entry.
+ struct CPEntry {
+ MachineInstr *CPEMI;
+ unsigned CPI;
+ unsigned RefCount;
+ CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+ : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+ };
+
+ /// CPEntries - Keep track of all of the constant pool entry machine
+ /// instructions. For each original constpool index (i.e. those that
+ /// existed upon entry to this pass), it keeps a vector of entries.
+ /// Original elements are cloned as we go along; the clones are
+ /// put in the vector of the original element, but have distinct CPIs.
+ std::vector<std::vector<CPEntry> > CPEntries;
+
+ /// ImmBranch - One per immediate branch, keeping the machine instruction
+ /// pointer, conditional or unconditional, the max displacement,
+ /// and (if isCond is true) the corresponding unconditional branch
+ /// opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned MaxDisp : 31;
+ bool isCond : 1;
+ int UncondBr;
+ ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+ : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+ };
+
+ /// ImmBranches - Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ /// HasFarJump - True if any far jump instruction has been emitted during
+ /// the branch fix up pass.
+ bool HasFarJump;
+
+ const TargetMachine &TM;
+ bool IsPIC;
+ unsigned ABI;
+ const MipsSubtarget *STI;
+ const Mips16InstrInfo *TII;
+ MipsFunctionInfo *MFI;
+ MachineFunction *MF;
+ MachineConstantPool *MCP;
+
+ unsigned PICLabelUId;
+ bool PrescannedForConstants;
+
+ void initPICLabelUId(unsigned UId) {
+ PICLabelUId = UId;
+ }
+
+
+ unsigned createPICLabelUId() {
+ return PICLabelUId++;
+ }
+
public:
static char ID;
MipsConstantIslands(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm),
IsPIC(TM.getRelocationModel() == Reloc::PIC_),
- ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()) {}
+ ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()),
+ STI(&TM.getSubtarget<MipsSubtarget>()), MF(0), MCP(0),
+ PrescannedForConstants(false){}
virtual const char *getPassName() const {
return "Mips Constant Islands";
@@ -59,26 +270,1264 @@ namespace {
bool runOnMachineFunction(MachineFunction &F);
+ void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
+ CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ unsigned getCPELogAlign(const MachineInstr *CPEMI);
+ void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
+ unsigned getOffsetOf(MachineInstr *MI) const;
+ unsigned getUserOffset(CPUser&) const;
+ void dumpBBs();
+ void verify();
+
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK);
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U);
+
+ bool isLongFormOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U);
+
+ void computeBlockSize(MachineBasicBlock *MBB);
+ MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
+ void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+ bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
+ int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
+ int findLongFormInRangeCPEntry(CPUser& U, unsigned UserOffset);
+ bool findAvailableWater(CPUser&U, unsigned UserOffset,
+ water_iterator &WaterIter);
+ void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ MachineBasicBlock *&NewMBB);
+ bool handleConstantPoolUser(unsigned CPUserIndex);
+ void removeDeadCPEMI(MachineInstr *CPEMI);
+ bool removeUnusedCPEntries();
+ bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+ bool DoDump = false);
+ bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
+ CPUser &U, unsigned &Growth);
+ bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool fixupImmediateBr(ImmBranch &Br);
+ bool fixupConditionalBr(ImmBranch &Br);
+ bool fixupUnconditionalBr(ImmBranch &Br);
+
+ void prescanForConstants();
+
private:
- const TargetMachine &TM;
- bool IsPIC;
- unsigned ABI;
+
};
char MipsConstantIslands::ID = 0;
} // end of anonymous namespace
+
+bool MipsConstantIslands::isLongFormOffsetInRange
+ (unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U) {
+ return isOffsetInRange(UserOffset, TrialOffset,
+ U.getLongFormMaxDisp(), U.NegOk);
+}
+
+bool MipsConstantIslands::isOffsetInRange
+ (unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U) {
+ return isOffsetInRange(UserOffset, TrialOffset,
+ U.getMaxDisp(), U.NegOk);
+}
+/// print block size and offset information - debugging
+void MipsConstantIslands::dumpBBs() {
+ DEBUG({
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+ });
+}
/// createMipsLongBranchPass - Returns a pass that converts branches to long
/// branches.
FunctionPass *llvm::createMipsConstantIslandPass(MipsTargetMachine &tm) {
return new MipsConstantIslands(tm);
}
-bool MipsConstantIslands::runOnMachineFunction(MachineFunction &F) {
+bool MipsConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// The intention is for this to be a mips16 only pass for now
// FIXME:
- // if (!TM.getSubtarget<MipsSubtarget>().inMips16Mode())
- // return false;
+ MF = &mf;
+ MCP = mf.getConstantPool();
+ DEBUG(dbgs() << "constant island machine function " << "\n");
+ if (!TM.getSubtarget<MipsSubtarget>().inMips16Mode() ||
+ !MipsSubtarget::useConstantIslands()) {
+ return false;
+ }
+ TII = (const Mips16InstrInfo*)MF->getTarget().getInstrInfo();
+ MFI = MF->getInfo<MipsFunctionInfo>();
+ DEBUG(dbgs() << "constant island processing " << "\n");
+ //
+ // will need to make predermination if there is any constants we need to
+ // put in constant islands. TBD.
+ //
+ if (!PrescannedForConstants) prescanForConstants();
+
+ HasFarJump = false;
+ // This pass invalidates liveness information when it splits basic blocks.
+ MF->getRegInfo().invalidateLiveness();
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF->RenumberBlocks();
+
+ bool MadeChange = false;
+
+ // Perform the initial placement of the constant pool entries. To start with,
+ // we put them all at the end of the function.
+ std::vector<MachineInstr*> CPEMIs;
+ if (!MCP->isEmpty())
+ doInitialPlacement(CPEMIs);
+
+ /// The next UID to take is the first unused one.
+ initPICLabelUId(CPEMIs.size());
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block, the location of all the water, and finding all of the
+ // constant pool users.
+ initializeFunctionInfo(CPEMIs);
+ CPEMIs.clear();
+ DEBUG(dumpBBs());
+
+ /// Remove dead constant pool entries.
+ MadeChange |= removeUnusedCPEntries();
+
+ // Iteratively place constant pool entries and fix up branches until there
+ // is no change.
+ unsigned NoCPIters = 0, NoBRIters = 0;
+ (void)NoBRIters;
+ while (true) {
+ DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
+ bool CPChange = false;
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+ CPChange |= handleConstantPoolUser(i);
+ if (CPChange && ++NoCPIters > 30)
+ report_fatal_error("Constant Island pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ // Clear NewWaterList now. If we split a block for branches, it should
+ // appear as "new water" for the next iteration of constant pool placement.
+ NewWaterList.clear();
+
+ DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
+ bool BRChange = false;
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ BRChange |= fixupImmediateBr(ImmBranches[i]);
+ if (BRChange && ++NoBRIters > 30)
+ report_fatal_error("Branch Fix Up pass failed to converge!");
+ DEBUG(dumpBBs());
+ if (!CPChange && !BRChange)
+ break;
+ MadeChange = true;
+ }
+
+ DEBUG(dbgs() << '\n'; dumpBBs());
+
+ BBInfo.clear();
+ WaterList.clear();
+ CPUsers.clear();
+ CPEntries.clear();
+ ImmBranches.clear();
+ return MadeChange;
+}
+
+/// doInitialPlacement - Perform the initial placement of the constant pool
+/// entries. To start with, we put them all at the end of the function.
+void
+MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
+ // Create the basic block to hold the CPE's.
+ MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+ MF->push_back(BB);
+
+
+ // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+ unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+
+ // Mark the basic block as required by the const-pool.
+ // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
+ BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+
+ // The function needs to be as aligned as the basic blocks. The linker may
+ // move functions around based on their alignment.
+ MF->ensureAlignment(BB->getAlignment());
+
+ // Order the entries in BB by descending alignment. That ensures correct
+ // alignment of all entries as long as BB is sufficiently aligned. Keep
+ // track of the insertion point for each alignment. We are going to bucket
+ // sort the entries as they are created.
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
+
+ // Add all of the constants from the constant pool to the end block, use an
+ // identity mapping of CPI's to CPE's.
+ const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
+
+ const DataLayout &TD = *MF->getTarget().getDataLayout();
+ for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+ unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+ assert(Size >= 4 && "Too small constant pool entry");
+ unsigned Align = CPs[i].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid alignment");
+ // Verify that all constant pool entries are a multiple of their alignment.
+ // If not, we would have to pad them out so that instructions stay aligned.
+ assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+
+ // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+ unsigned LogAlign = Log2_32(Align);
+ MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
+
+ MachineInstr *CPEMI =
+ BuildMI(*BB, InsAt, DebugLoc(), TII->get(Mips::CONSTPOOL_ENTRY))
+ .addImm(i).addConstantPoolIndex(i).addImm(Size);
+
+ CPEMIs.push_back(CPEMI);
+
+ // Ensure that future entries with higher alignment get inserted before
+ // CPEMI. This is bucket sort with iterators.
+ for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
+ if (InsPoint[a] == InsAt)
+ InsPoint[a] = CPEMI;
+ // Add a new CPEntry, but no corresponding CPUser yet.
+ std::vector<CPEntry> CPEs;
+ CPEs.push_back(CPEntry(CPEMI, i));
+ CPEntries.push_back(CPEs);
+ ++NumCPEs;
+ DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
+ << Size << ", align = " << Align <<'\n');
+ }
+ DEBUG(BB->dump());
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
+ return false;
+
+ MachineBasicBlock *NextBB = llvm::next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
return false;
}
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+MipsConstantIslands::CPEntry
+*MipsConstantIslands::findConstPoolEntry(unsigned CPI,
+ const MachineInstr *CPEMI) {
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ // Number of entries per constpool index should be small, just do a
+ // linear search.
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ if (CPEs[i].CPEMI == CPEMI)
+ return &CPEs[i];
+ }
+ return NULL;
+}
+
+/// getCPELogAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI. Alignment is measured in log2(bytes) units.
+unsigned MipsConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+ assert(CPEMI && CPEMI->getOpcode() == Mips::CONSTPOOL_ENTRY);
+
+ // Everything is 4-byte aligned unless AlignConstantIslands is set.
+ if (!AlignConstantIslands)
+ return 2;
+
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+ unsigned Align = MCP->getConstants()[CPI].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
+ return Log2_32(Align);
+}
+
+/// initializeFunctionInfo - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void MipsConstantIslands::
+initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+ computeBlockSize(I);
+
+
+ // Compute block offsets.
+ adjustBBOffsetsAfter(MF->begin());
+
+ // Now go back through the instructions and build up our data structures.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ // If this block doesn't fall through into the next MBB, then this is
+ // 'water' that a constant pool island could be placed.
+ if (!BBHasFallthrough(&MBB))
+ WaterList.push_back(&MBB);
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ int Opc = I->getOpcode();
+ if (I->isBranch()) {
+ bool isCond = false;
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ int UOpc = Opc;
+ switch (Opc) {
+ default:
+ continue; // Ignore other branches for now
+ case Mips::Bimm16:
+ Bits = 11;
+ Scale = 2;
+ isCond = false;
+ break;
+ case Mips::BimmX16:
+ Bits = 16;
+ Scale = 2;
+ isCond = false;
+ }
+ // Record this immediate branch.
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+ }
+
+ if (Opc == Mips::CONSTPOOL_ENTRY)
+ continue;
+
+
+ // Scan the instructions for constant pool operands.
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (I->getOperand(op).isCPI()) {
+
+ // We found one. The addressing mode tells us the max displacement
+ // from the PC that this instruction permits.
+
+ // Basic size info comes from the TSFlags field.
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ bool NegOk = false;
+ unsigned LongFormBits = 0;
+ unsigned LongFormScale = 0;
+ unsigned LongFormOpcode = 0;
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown addressing mode for CP reference!");
+ case Mips::LwRxPcTcp16:
+ Bits = 8;
+ Scale = 4;
+ LongFormOpcode = Mips::LwRxPcTcpX16;
+ LongFormBits = 16;
+ LongFormScale = 1;
+ break;
+ case Mips::LwRxPcTcpX16:
+ Bits = 16;
+ Scale = 1;
+ NegOk = true;
+ break;
+ }
+ // Remember that this is a user of a CP entry.
+ unsigned CPI = I->getOperand(op).getIndex();
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+ unsigned LongFormMaxOffs = ((1 << LongFormBits)-1) * LongFormScale;
+ CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk,
+ LongFormMaxOffs, LongFormOpcode));
+
+ // Increment corresponding CPEntry reference count.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Cannot find a corresponding CPEntry!");
+ CPE->RefCount++;
+
+ // Instructions can only use one CP entry, don't bother scanning the
+ // rest of the operands.
+ break;
+
+ }
+
+ }
+ }
+
+}
+
+/// computeBlockSize - Compute the size and some alignment information for MBB.
+/// This function updates BBInfo directly.
+void MipsConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I)
+ BBI.Size += TII->GetInstSizeInBytes(I);
+
+}
+
+/// getOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned MipsConstantIslands::getOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ Offset += TII->GetInstSizeInBytes(I);
+ }
+ return Offset;
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) {
+ return LHS->getNumber() < RHS->getNumber();
+}
+
+/// updateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers. Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void MipsConstantIslands::updateForInsertedWaterBlock
+ (MachineBasicBlock *NewBB) {
+ // Renumber the MBB's to keep them consecutive.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Next, update WaterList. Specifically, we need to add NewMBB as having
+ // available water after it.
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+ CompareMBBNumbers);
+ WaterList.insert(IP, NewBB);
+}
+
+unsigned MipsConstantIslands::getUserOffset(CPUser &U) const {
+ return getOffsetOf(U.MI);
+}
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *MipsConstantIslands::splitBlockBeforeInstr
+ (MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MF->insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ BuildMI(OrigBB, DebugLoc(), TII->get(Mips::Bimm16)).addMBB(NewBB);
+ ++NumSplit;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ NewBB->transferSuccessors(OrigBB);
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ // This is almost the same as updateForInsertedWaterBlock, except that
+ // the Water goes after OrigBB, not NewBB.
+ MF->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Next, update WaterList. Specifically, we need to add OrigMBB as having
+ // available water after it (but not if it's already there, which happens
+ // when splitting before a conditional branch that is followed by an
+ // unconditional branch - in that case we want to insert NewBB).
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+ CompareMBBNumbers);
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterBB == OrigBB)
+ WaterList.insert(llvm::next(IP), NewBB);
+ else
+ WaterList.insert(IP, OrigBB);
+ NewWaterList.insert(OrigBB);
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ computeBlockSize(OrigBB);
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ computeBlockSize(NewBB);
+
+ // All BBOffsets following these blocks must be modified.
+ adjustBBOffsetsAfter(OrigBB);
+
+ return NewBB;
+}
+
+
+
+/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+bool MipsConstantIslands::isOffsetInRange(unsigned UserOffset,
+ unsigned TrialOffset, unsigned MaxDisp,
+ bool NegativeOK) {
+ if (UserOffset <= TrialOffset) {
+ // User before the Trial.
+ if (TrialOffset - UserOffset <= MaxDisp)
+ return true;
+ } else if (NegativeOK) {
+ if (UserOffset - TrialOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// isWaterInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
+bool MipsConstantIslands::isWaterInRange(unsigned UserOffset,
+ MachineBasicBlock* Water, CPUser &U,
+ unsigned &Growth) {
+ unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
+ unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
+ unsigned NextBlockOffset, NextBlockAlignment;
+ MachineFunction::const_iterator NextBlock = Water;
+ if (++NextBlock == MF->end()) {
+ NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+ NextBlockAlignment = 0;
+ } else {
+ NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+ NextBlockAlignment = NextBlock->getAlignment();
+ }
+ unsigned Size = U.CPEMI->getOperand(2).getImm();
+ unsigned CPEEnd = CPEOffset + Size;
+
+ // The CPE may be able to hide in the alignment padding before the next
+ // block. It may also cause more padding to be required if it is more aligned
+ // that the next block.
+ if (CPEEnd > NextBlockOffset) {
+ Growth = CPEEnd - NextBlockOffset;
+ // Compute the padding that would go at the end of the CPE to align the next
+ // block.
+ Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. Also account for unknown alignment padding
+ // in blocks between CPE and the user.
+ if (CPEOffset < UserOffset)
+ UserOffset += Growth;
+ } else
+ // CPE fits in existing padding.
+ Growth = 0;
+
+ return isOffsetInRange(UserOffset, CPEOffset, U);
+}
+
+/// isCPEntryInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool MipsConstantIslands::isCPEntryInRange
+ (MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned MaxDisp,
+ bool NegOk, bool DoDump) {
+ unsigned CPEOffset = getOffsetOf(CPEMI);
+
+ if (DoDump) {
+ DEBUG({
+ unsigned Block = MI->getParent()->getNumber();
+ const BasicBlockInfo &BBI = BBInfo[Block];
+ dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << format(" insn address=%#x", UserOffset)
+ << " in BB#" << Block << ": "
+ << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+ << format("CPE address=%#x offset=%+d: ", CPEOffset,
+ int(CPEOffset-UserOffset));
+ });
+ }
+
+ return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+ MachineBasicBlock *Succ = *MBB->succ_begin();
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineInstr *PredMI = &Pred->back();
+ if (PredMI->getOpcode() == Mips::Bimm16)
+ return PredMI->getOperand(0).getMBB() == Succ;
+ return false;
+}
+#endif
+
+void MipsConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ unsigned BBNum = BB->getNumber();
+ for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned Offset = BBInfo[i - 1].Offset + BBInfo[i - 1].Size;
+ BBInfo[i].Offset = Offset;
+ }
+}
+
+/// decrementCPEReferenceCount - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount. If the refcount
+/// becomes 0 remove the entry and instruction. Returns true if we removed
+/// the entry, false if we didn't.
+
+bool MipsConstantIslands::decrementCPEReferenceCount(unsigned CPI,
+ MachineInstr *CPEMI) {
+ // Find the old entry. Eliminate it if it is no longer used.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Unexpected!");
+ if (--CPE->RefCount == 0) {
+ removeDeadCPEMI(CPEMI);
+ CPE->CPEMI = NULL;
+ --NumCPEs;
+ return true;
+ }
+ return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int MipsConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
+{
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk,
+ true)) {
+ DEBUG(dbgs() << "In range\n");
+ return 1;
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ // We already tried this one
+ if (CPEs[i].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[i].CPEMI == NULL)
+ continue;
+ if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
+ U.NegOk)) {
+ DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
+ << CPEs[i].CPI << "\n");
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[i].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+ if (UserMI->getOperand(j).isCPI()) {
+ UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[i].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// This version checks if the longer form of the instruction can be used to
+/// to satisfy things.
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int MipsConstantIslands::findLongFormInRangeCPEntry
+ (CPUser& U, unsigned UserOffset)
+{
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (isCPEntryInRange(UserMI, UserOffset, CPEMI,
+ U.getLongFormMaxDisp(), U.NegOk,
+ true)) {
+ DEBUG(dbgs() << "In range\n");
+ UserMI->setDesc(TII->get(U.getLongFormOpcode()));
+ U.setMaxDisp(U.getLongFormMaxDisp());
+ return 2; // instruction is longer length now
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ // We already tried this one
+ if (CPEs[i].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[i].CPEMI == NULL)
+ continue;
+ if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI,
+ U.getLongFormMaxDisp(), U.NegOk)) {
+ DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
+ << CPEs[i].CPI << "\n");
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[i].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+ if (UserMI->getOperand(j).isCPI()) {
+ UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[i].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+ switch (Opc) {
+ case Mips::Bimm16:
+ return ((1<<10)-1)*2;
+ case Mips::BimmX16:
+ return ((1<<16)-1)*2;
+ default:
+ break;
+ }
+ return ((1<<16)-1)*2;
+}
+
+/// findAvailableWater - Look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not. If it returns true, WaterIter
+/// is set to the WaterList entry.
+/// To ensure that this pass
+/// terminates, the CPE location for a particular CPUser is only allowed to
+/// move to a lower address, so search backward from the end of the list and
+/// prefer the first water that is in range.
+bool MipsConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
+ water_iterator &WaterIter) {
+ if (WaterList.empty())
+ return false;
+
+ unsigned BestGrowth = ~0u;
+ for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+ --IP) {
+ MachineBasicBlock* WaterBB = *IP;
+ // Check if water is in range and is either at a lower address than the
+ // current "high water mark" or a new water block that was created since
+ // the previous iteration by inserting an unconditional branch. In the
+ // latter case, we want to allow resetting the high water mark back to
+ // this new water since we haven't seen it before. Inserting branches
+ // should be relatively uncommon and when it does happen, we want to be
+ // sure to take advantage of it for all the CPEs near that block, so that
+ // we don't insert more branches than necessary.
+ unsigned Growth;
+ if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
+ (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
+ NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+ // This is the least amount of required padding seen so far.
+ BestGrowth = Growth;
+ WaterIter = IP;
+ DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
+ << " Growth=" << Growth << '\n');
+
+ // Keep looking unless it is perfect.
+ if (BestGrowth == 0)
+ return true;
+ }
+ if (IP == B)
+ break;
+ }
+ return BestGrowth != ~0u;
+}
+
+/// createNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct. Otherwise the block is split to create a hole with an
+/// unconditional branch around it. In either case NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
+ unsigned UserOffset,
+ MachineBasicBlock *&NewMBB) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPELogAlign = getCPELogAlign(CPEMI);
+ MachineBasicBlock *UserMBB = UserMI->getParent();
+ const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
+
+ // If the block does not end in an unconditional branch already, and if the
+ // end of the block is within range, make new water there.
+ if (BBHasFallthrough(UserMBB)) {
+ // Size of branch to insert.
+ unsigned Delta = 2;
+ // Compute the offset where the CPE will begin.
+ unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
+
+ if (isOffsetInRange(UserOffset, CPEOffset, U)) {
+ DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
+ << format(", expected CPE offset %#x\n", CPEOffset));
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block. Record
+ // it for branch lengthening; this new branch will not get out of range,
+ // but if the preceding conditional branch is out of range, the targets
+ // will be exchanged, and the altered branch may be out of range, so the
+ // machinery has to know about it.
+ int UncondBr = Mips::Bimm16;
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ BBInfo[UserMBB->getNumber()].Size += Delta;
+ adjustBBOffsetsAfter(UserMBB);
+ return;
+ }
+ }
+
+ // What a big block. Find a place within the block to split it.
+
+ // Try to split the block so it's fully aligned. Compute the latest split
+ // point where we can add a 4-byte branch instruction, and then align to
+ // LogAlign which is the largest possible alignment in the function.
+ unsigned LogAlign = MF->getAlignment();
+ assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+ unsigned BaseInsertOffset = UserOffset + U.getMaxDisp();
+ DEBUG(dbgs() << format("Split in middle of big block before %#x",
+ BaseInsertOffset));
+
+ // The 4 in the following is for the unconditional branch we'll be inserting
+ // Alignment of the island is handled
+ // inside isOffsetInRange.
+ BaseInsertOffset -= 4;
+
+ DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+ << " la=" << LogAlign << '\n');
+
+ // This could point off the end of the block if we've already got constant
+ // pool entries following this block; only the last one is in the water list.
+ // Back past any possible branches (allow for a conditional and a maximally
+ // long unconditional).
+ if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
+ BaseInsertOffset = UserBBI.postOffset() - 8;
+ DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
+ }
+ unsigned EndInsertOffset = BaseInsertOffset + 4 +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ //MachineInstr *LastIT = 0;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = llvm::next(MI)) {
+ assert(MI != UserMBB->end() && "Fell off end of block");
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
+ // Shift intertion point by one unit of alignment so it is within reach.
+ BaseInsertOffset -= 1u << LogAlign;
+ EndInsertOffset -= 1u << LogAlign;
+ }
+ // This is overly conservative, as we don't account for CPEMIs being
+ // reused within the block, but it doesn't matter much. Also assume CPEs
+ // are added in order with alignment padding. We may eventually be able
+ // to pack the aligned CPEs better.
+ EndInsertOffset += U.CPEMI->getOperand(2).getImm();
+ CPUIndex++;
+ }
+ }
+
+ --MI;
+ NewMBB = splitBlockBeforeInstr(MI);
+}
+
+/// handleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range. If so, pick up the constant pool value and move it some
+/// place in-range. Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ // Compute this only once, it's expensive.
+ unsigned UserOffset = getUserOffset(U);
+
+ // See if the current entry is within range, or there is a clone of it
+ // in range.
+ int result = findInRangeCPEntry(U, UserOffset);
+ if (result==1) return false;
+ else if (result==2) return true;
+
+
+ // Look for water where we can place this CPE.
+ MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *NewMBB;
+ water_iterator IP;
+ if (findAvailableWater(U, UserOffset, IP)) {
+ DEBUG(dbgs() << "Found water in range\n");
+ MachineBasicBlock *WaterBB = *IP;
+
+ // If the original WaterList entry was "new water" on this iteration,
+ // propagate that to the new island. This is just keeping NewWaterList
+ // updated to match the WaterList, which will be updated below.
+ if (NewWaterList.erase(WaterBB))
+ NewWaterList.insert(NewIsland);
+
+ // The new CPE goes before the following block (NewMBB).
+ NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
+
+ } else {
+ // No water found.
+ // we first see if a longer form of the instrucion could have reached
+ // the constant. in that case we won't bother to split
+ if (!NoLoadRelaxation) {
+ result = findLongFormInRangeCPEntry(U, UserOffset);
+ if (result != 0) return true;
+ }
+ DEBUG(dbgs() << "No water found\n");
+ createNewWater(CPUserIndex, UserOffset, NewMBB);
+
+ // splitBlockBeforeInstr adds to WaterList, which is important when it is
+ // called while handling branches so that the water will be seen on the
+ // next iteration for constant pools, but in this context, we don't want
+ // it. Check for this so it will be removed from the WaterList.
+ // Also remove any entry from NewWaterList.
+ MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
+ IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+ if (IP != WaterList.end())
+ NewWaterList.erase(WaterBB);
+
+ // We are adding new water. Update NewWaterList.
+ NewWaterList.insert(NewIsland);
+ }
+
+ // Remove the original WaterList entry; we want subsequent insertions in
+ // this vicinity to go after the one we're about to insert. This
+ // considerably reduces the number of times we have to move the same CPE
+ // more than once and is also important to ensure the algorithm terminates.
+ if (IP != WaterList.end())
+ WaterList.erase(IP);
+
+ // Okay, we know we can put an island before NewMBB now, do it!
+ MF->insert(NewMBB, NewIsland);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ updateForInsertedWaterBlock(NewIsland);
+
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ decrementCPEReferenceCount(CPI, CPEMI);
+
+ // Now that we have an island to add the CPE to, clone the original CPE and
+ // add it to the island.
+ U.HighWaterMark = NewIsland;
+ U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(Mips::CONSTPOOL_ENTRY))
+ .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+ CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+ ++NumCPEs;
+
+ // Mark the basic block as aligned as required by the const-pool entry.
+ NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+
+ // Increase the size of the island block to account for the new entry.
+ BBInfo[NewIsland->getNumber()].Size += Size;
+ adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
+
+ // No existing clone of this CPE is within range.
+ // We will be generating a new clone. Get a UID for it.
+ unsigned ID = createPICLabelUId();
+
+ // Finally, change the CPI in the instruction operand to be ID.
+ for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+ if (UserMI->getOperand(i).isCPI()) {
+ UserMI->getOperand(i).setIndex(ID);
+ break;
+ }
+
+ DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
+
+ return true;
+}
+
+/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void MipsConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
+ MachineBasicBlock *CPEBB = CPEMI->getParent();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ CPEMI->eraseFromParent();
+ BBInfo[CPEBB->getNumber()].Size -= Size;
+ // All succeeding offsets have the current size value added in, fix this.
+ if (CPEBB->empty()) {
+ BBInfo[CPEBB->getNumber()].Size = 0;
+
+ // This block no longer needs to be aligned.
+ CPEBB->setAlignment(0);
+ } else
+ // Entries are sorted by descending alignment, so realign from the front.
+ CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+
+ adjustBBOffsetsAfter(CPEBB);
+ // An island has only one predecessor BB and one successor BB. Check if
+ // this BB's predecessor jumps directly to this BB's successor. This
+ // shouldn't happen currently.
+ assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+ // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// removeUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool MipsConstantIslands::removeUnusedCPEntries() {
+ unsigned MadeChange = false;
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ std::vector<CPEntry> &CPEs = CPEntries[i];
+ for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+ if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+ removeDeadCPEMI(CPEs[j].CPEMI);
+ CPEs[j].CPEMI = NULL;
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// isBBInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool MipsConstantIslands::isBBInRange
+ (MachineInstr *MI,MachineBasicBlock *DestBB, unsigned MaxDisp) {
+
+unsigned PCAdj = 4;
+
+ unsigned BrOffset = getOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
+
+ DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " max delta=" << MaxDisp
+ << " from " << getOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+ if (BrOffset <= DestOffset) {
+ // Branch before the Dest.
+ if (DestOffset-BrOffset <= MaxDisp)
+ return true;
+ } else {
+ if (BrOffset-DestOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// fixupImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool MipsConstantIslands::fixupImmediateBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Check to see if the DestBB is already in-range.
+ if (isBBInRange(MI, DestBB, Br.MaxDisp))
+ return false;
+
+ if (!Br.isCond)
+ return fixupUnconditionalBr(Br);
+ return fixupConditionalBr(Br);
+}
+
+/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool
+MipsConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ // Use BL to implement far jump.
+ Br.MaxDisp = ((1 << 16)-1) * 2;
+ MI->setDesc(TII->get(Mips::BimmX16));
+ BBInfo[MBB->getNumber()].Size += 2;
+ adjustBBOffsetsAfter(MBB);
+ HasFarJump = true;
+ ++NumUBrFixed;
+
+ DEBUG(dbgs() << " Changed B to long jump " << *MI);
+
+ return true;
+}
+
+/// fixupConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Add an unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // blt L1
+ // =>
+ // bge L2
+ // b L1
+ // L2:
+ unsigned CCReg = 0; // FIXME
+ unsigned CC=0; //FIXME
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through block. Otherwise,
+ // split the MBB before the next instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ ++NumCBrFixed;
+ if (BMI != MI) {
+ if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ BMI->getOpcode() == Br.UncondBr) {
+ // Last MI in the BB is an unconditional branch. Can we simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+ if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
+ DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
+ << *BMI);
+ BMI->getOperand(0).setMBB(DestBB);
+ MI->getOperand(0).setMBB(NewDest);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ splitBlockBeforeInstr(MI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size -= delta;
+ MBB->back().eraseFromParent();
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
+ }
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+ DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n");
+
+ // Insert a new conditional branch and a new unconditional branch.
+ // Also update the ImmBranch as well as adding a new entry for the new branch.
+ BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
+ .addMBB(NextBB).addImm(CC).addReg(CCReg);
+ Br.MI = &MBB->back();
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
+ unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+ ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+ // Remove the old conditional branch. It may or may not still be in MBB.
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
+ adjustBBOffsetsAfter(MBB);
+ return true;
+}
+
+
+void MipsConstantIslands::prescanForConstants() {
+ unsigned J = 0;
+ (void)J;
+ PrescannedForConstants = true;
+ for (MachineFunction::iterator B =
+ MF->begin(), E = MF->end(); B != E; ++B) {
+ for (MachineBasicBlock::instr_iterator I =
+ B->instr_begin(), EB = B->instr_end(); I != EB; ++I) {
+ switch(I->getDesc().getOpcode()) {
+ case Mips::LwConstant32: {
+ DEBUG(dbgs() << "constant island constant " << *I << "\n");
+ J = I->getNumOperands();
+ DEBUG(dbgs() << "num operands " << J << "\n");
+ MachineOperand& Literal = I->getOperand(1);
+ if (Literal.isImm()) {
+ int64_t V = Literal.getImm();
+ DEBUG(dbgs() << "literal " << V << "\n");
+ Type *Int32Ty =
+ Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, V);
+ unsigned index = MCP->getConstantPoolIndex(C, 4);
+ I->getOperand(2).ChangeToImmediate(index);
+ DEBUG(dbgs() << "constant island constant " << *I << "\n");
+ I->setDesc(TII->get(Mips::LwRxPcTcp16));
+ I->RemoveOperand(1);
+ I->RemoveOperand(1);
+ I->addOperand(MachineOperand::CreateCPI(index, 0));
+ I->addOperand(MachineOperand::CreateImm(4));
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ }
+}
+
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index 526821a..d268384 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -256,236 +256,235 @@ class PREPEND_ENC : APPEND_FMT<0b00001>;
// Instruction desc.
class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCD,
- RegisterClass RCS, RegisterClass RCT = RCS> {
- dag OutOperandList = (outs RCD:$rd);
- dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ InstrItinClass itin, RegisterOperand ROD,
+ RegisterOperand ROS, RegisterOperand ROT = ROS> {
+ dag OutOperandList = (outs ROD:$rd);
+ dag InOperandList = (ins ROS:$rs, ROT:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
- list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
InstrItinClass Itinerary = itin;
}
class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCD,
- RegisterClass RCS = RCD> {
- dag OutOperandList = (outs RCD:$rd);
- dag InOperandList = (ins RCS:$rs);
+ InstrItinClass itin, RegisterOperand ROD,
+ RegisterOperand ROS = ROD> {
+ dag OutOperandList = (outs ROD:$rd);
+ dag InOperandList = (ins ROS:$rs);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
- list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs))];
+ list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs))];
InstrItinClass Itinerary = itin;
}
class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCS,
- RegisterClass RCT = RCS> {
+ InstrItinClass itin, RegisterOperand ROS,
+ RegisterOperand ROT = ROS> {
dag OutOperandList = (outs);
- dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ dag InOperandList = (ins ROS:$rs, ROT:$rt);
string AsmString = !strconcat(instr_asm, "\t$rs, $rt");
- list<dag> Pattern = [(OpNode RCS:$rs, RCT:$rt)];
+ list<dag> Pattern = [(OpNode ROS:$rs, ROT:$rt)];
InstrItinClass Itinerary = itin;
}
class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCD,
- RegisterClass RCS, RegisterClass RCT = RCS> {
- dag OutOperandList = (outs RCD:$rd);
- dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ InstrItinClass itin, RegisterOperand ROD,
+ RegisterOperand ROS, RegisterOperand ROT = ROS> {
+ dag OutOperandList = (outs ROD:$rd);
+ dag InOperandList = (ins ROS:$rs, ROT:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
- list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
InstrItinClass Itinerary = itin;
}
class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCT,
- RegisterClass RCS = RCT> {
- dag OutOperandList = (outs RCT:$rt);
- dag InOperandList = (ins RCS:$rs, shamt:$sa, RCS:$src);
+ InstrItinClass itin, RegisterOperand ROT,
+ RegisterOperand ROS = ROT> {
+ dag OutOperandList = (outs ROT:$rt);
+ dag InOperandList = (ins ROS:$rs, uimm5:$sa, ROS:$src);
string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
- list<dag> Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))];
+ list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
}
class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCD,
- RegisterClass RCT = RCD> {
- dag OutOperandList = (outs RCD:$rd);
- dag InOperandList = (ins RCT:$rt);
+ InstrItinClass itin, RegisterOperand ROD,
+ RegisterOperand ROT = ROD> {
+ dag OutOperandList = (outs ROD:$rd);
+ dag InOperandList = (ins ROT:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
- list<dag> Pattern = [(set RCD:$rd, (OpNode RCT:$rt))];
+ list<dag> Pattern = [(set ROD:$rd, (OpNode ROT:$rt))];
InstrItinClass Itinerary = itin;
}
class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- ImmLeaf immPat, InstrItinClass itin, RegisterClass RC> {
- dag OutOperandList = (outs RC:$rd);
+ ImmLeaf immPat, InstrItinClass itin, RegisterOperand RO> {
+ dag OutOperandList = (outs RO:$rd);
dag InOperandList = (ins uimm16:$imm);
string AsmString = !strconcat(instr_asm, "\t$rd, $imm");
- list<dag> Pattern = [(set RC:$rd, (OpNode immPat:$imm))];
+ list<dag> Pattern = [(set RO:$rd, (OpNode immPat:$imm))];
InstrItinClass Itinerary = itin;
}
class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RC> {
- dag OutOperandList = (outs RC:$rd);
- dag InOperandList = (ins RC:$rt, GPR32:$rs_sa);
+ InstrItinClass itin, RegisterOperand RO> {
+ dag OutOperandList = (outs RO:$rd);
+ dag InOperandList = (ins RO:$rt, GPR32Opnd:$rs_sa);
string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
- list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, GPR32:$rs_sa))];
+ list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs_sa))];
InstrItinClass Itinerary = itin;
}
class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
SDPatternOperator ImmPat, InstrItinClass itin,
- RegisterClass RC> {
- dag OutOperandList = (outs RC:$rd);
- dag InOperandList = (ins RC:$rt, uimm16:$rs_sa);
+ RegisterOperand RO> {
+ dag OutOperandList = (outs RO:$rd);
+ dag InOperandList = (ins RO:$rt, uimm16:$rs_sa);
string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
- list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))];
+ list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, ImmPat:$rs_sa))];
InstrItinClass Itinerary = itin;
bit hasSideEffects = 1;
}
class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rd);
- dag InOperandList = (ins GPR32:$base, GPR32:$index);
+ dag OutOperandList = (outs GPR32Opnd:$rd);
+ dag InOperandList = (ins PtrRC:$base, PtrRC:$index);
string AsmString = !strconcat(instr_asm, "\t$rd, ${index}(${base})");
- list<dag> Pattern = [(set GPR32:$rd,
- (OpNode GPR32:$base, GPR32:$index))];
+ list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode iPTR:$base, iPTR:$index))];
InstrItinClass Itinerary = itin;
bit mayLoad = 1;
}
class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCD,
- RegisterClass RCS = RCD, RegisterClass RCT = RCD> {
- dag OutOperandList = (outs RCD:$rd);
- dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ InstrItinClass itin, RegisterOperand ROD,
+ RegisterOperand ROS = ROD, RegisterOperand ROT = ROD> {
+ dag OutOperandList = (outs ROD:$rd);
+ dag InOperandList = (ins ROS:$rs, ROT:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
- list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
InstrItinClass Itinerary = itin;
}
class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
SDPatternOperator ImmOp, InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rt);
- dag InOperandList = (ins GPR32:$rs, shamt:$sa, GPR32:$src);
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins GPR32Opnd:$rs, uimm5:$sa, GPR32Opnd:$src);
string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
- list<dag> Pattern = [(set GPR32:$rt,
- (OpNode GPR32:$src, GPR32:$rs, ImmOp:$sa))];
+ list<dag> Pattern = [(set GPR32Opnd:$rt,
+ (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, ImmOp:$sa))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
}
class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rt);
- dag InOperandList = (ins ACRegsDSP:$ac, GPR32:$shift_rs);
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
}
class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rt);
- dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs);
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm16:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
}
class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
- dag OutOperandList = (outs ACRegsDSP:$ac);
- dag InOperandList = (ins simm16:$shift, ACRegsDSP:$acin);
+ dag OutOperandList = (outs ACC64DSPOpnd:$ac);
+ dag InOperandList = (ins simm16:$shift, ACC64DSPOpnd:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
- list<dag> Pattern = [(set ACRegsDSP:$ac,
- (OpNode immSExt6:$shift, ACRegsDSP:$acin))];
+ list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
+ (OpNode immSExt6:$shift, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
}
class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
- dag OutOperandList = (outs ACRegsDSP:$ac);
- dag InOperandList = (ins GPR32:$rs, ACRegsDSP:$acin);
+ dag OutOperandList = (outs ACC64DSPOpnd:$ac);
+ dag InOperandList = (ins GPR32Opnd:$rs, ACC64DSPOpnd:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs");
- list<dag> Pattern = [(set ACRegsDSP:$ac,
- (OpNode GPR32:$rs, ACRegsDSP:$acin))];
+ list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
+ (OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
}
class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
- dag OutOperandList = (outs ACRegsDSP:$ac);
- dag InOperandList = (ins GPR32:$rs, ACRegsDSP:$acin);
+ dag OutOperandList = (outs ACC64DSPOpnd:$ac);
+ dag InOperandList = (ins GPR32Opnd:$rs, ACC64DSPOpnd:$acin);
string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
- list<dag> Pattern = [(set ACRegsDSP:$ac,
- (OpNode GPR32:$rs, ACRegsDSP:$acin))];
+ list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
+ (OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
}
class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rd);
+ dag OutOperandList = (outs GPR32Opnd:$rd);
dag InOperandList = (ins uimm16:$mask);
string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
- list<dag> Pattern = [(set GPR32:$rd, (OpNode immZExt10:$mask))];
+ list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))];
InstrItinClass Itinerary = itin;
}
class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
dag OutOperandList = (outs);
- dag InOperandList = (ins GPR32:$rs, uimm16:$mask);
+ dag InOperandList = (ins GPR32Opnd:$rs, uimm16:$mask);
string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
- list<dag> Pattern = [(OpNode GPR32:$rs, immZExt10:$mask)];
+ list<dag> Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)];
InstrItinClass Itinerary = itin;
}
class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
- dag OutOperandList = (outs ACRegsDSP:$ac);
- dag InOperandList = (ins GPR32:$rs, GPR32:$rt, ACRegsDSP:$acin);
+ dag OutOperandList = (outs ACC64DSPOpnd:$ac);
+ dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
- list<dag> Pattern = [(set ACRegsDSP:$ac,
- (OpNode GPR32:$rs, GPR32:$rt, ACRegsDSP:$acin))];
+ list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
+ (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
}
class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs ACRegsDSP:$ac);
- dag InOperandList = (ins GPR32:$rs, GPR32:$rt);
+ dag OutOperandList = (outs ACC64DSPOpnd:$ac);
+ dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
- list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode GPR32:$rs, GPR32:$rt))];
+ list<dag> Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt))];
InstrItinClass Itinerary = itin;
- int AddedComplexity = 20;
bit isCommutable = 1;
}
class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs ACRegsDSP:$ac);
- dag InOperandList = (ins GPR32:$rs, GPR32:$rt, ACRegsDSP:$acin);
+ dag OutOperandList = (outs ACC64DSPOpnd:$ac);
+ dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
- list<dag> Pattern = [(set ACRegsDSP:$ac,
- (OpNode GPR32:$rs, GPR32:$rt, ACRegsDSP:$acin))];
+ list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
+ (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))];
InstrItinClass Itinerary = itin;
- int AddedComplexity = 20;
string Constraints = "$acin = $ac";
}
-class MFHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rd);
- dag InOperandList = (ins RC:$ac);
+class MFHI_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs GPR32Opnd:$rd);
+ dag InOperandList = (ins RO:$ac);
string AsmString = !strconcat(instr_asm, "\t$rd, $ac");
+ list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode RO:$ac))];
InstrItinClass Itinerary = itin;
}
-class MTHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> {
- dag OutOperandList = (outs RC:$ac);
- dag InOperandList = (ins GPR32:$rs);
+class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO, InstrItinClass itin> {
+ dag OutOperandList = (outs RO:$ac);
+ dag InOperandList = (ins GPR32Opnd:$rs);
string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
InstrItinClass Itinerary = itin;
}
class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
- MipsPseudo<(outs GPR32:$dst), (ins), [(set GPR32:$dst, (OpNode))]> {
+ MipsPseudo<(outs GPR32Opnd:$dst), (ins), [(set GPR32Opnd:$dst, (OpNode))]> {
bit usesCustomInserter = 1;
}
@@ -501,10 +500,10 @@ class BPOSGE32_DESC_BASE<string instr_asm, InstrItinClass itin> {
class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rt);
- dag InOperandList = (ins GPR32:$src, GPR32:$rs);
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins GPR32Opnd:$src, GPR32Opnd:$rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
- list<dag> Pattern = [(set GPR32:$rt, (OpNode GPR32:$src, GPR32:$rs))];
+ list<dag> Pattern = [(set GPR32Opnd:$rt, (OpNode GPR32Opnd:$src, GPR32Opnd:$rs))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
}
@@ -515,209 +514,209 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
// Addition/subtraction
class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", null_frag, NoItinerary,
- DSPRegs, DSPRegs>, IsCommutable,
+ DSPROpnd, DSPROpnd>, IsCommutable,
Defs<[DSPOutFlag20]>;
class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag20]>;
class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", null_frag, NoItinerary,
- DSPRegs, DSPRegs>,
+ DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", null_frag, NoItinerary,
- DSPRegs, DSPRegs>, IsCommutable,
+ DSPROpnd, DSPROpnd>, IsCommutable,
Defs<[DSPOutFlag20]>;
class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag20]>;
class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", null_frag, NoItinerary,
- DSPRegs, DSPRegs>,
+ DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w,
- NoItinerary, GPR32, GPR32>,
+ NoItinerary, GPR32Opnd, GPR32Opnd>,
IsCommutable, Defs<[DSPOutFlag20]>;
class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w,
- NoItinerary, GPR32, GPR32>,
+ NoItinerary, GPR32Opnd, GPR32Opnd>,
Defs<[DSPOutFlag20]>;
class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", null_frag, NoItinerary,
- GPR32, GPR32>, IsCommutable,
+ GPR32Opnd, GPR32Opnd>, IsCommutable,
Defs<[DSPCarry]>;
class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", null_frag, NoItinerary,
- GPR32, GPR32>,
+ GPR32Opnd, GPR32Opnd>,
IsCommutable, Uses<[DSPCarry]>, Defs<[DSPOutFlag20]>;
class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary,
- GPR32, GPR32>;
+ GPR32Opnd, GPR32Opnd>;
class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb,
- NoItinerary, GPR32, DSPRegs>;
+ NoItinerary, GPR32Opnd, DSPROpnd>;
// Absolute value
class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w,
- NoItinerary, GPR32>,
+ NoItinerary, GPR32Opnd>,
Defs<[DSPOutFlag20]>;
// Precision reduce/expand
class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph",
int_mips_precrq_qb_ph,
- NoItinerary, DSPRegs, DSPRegs>;
+ NoItinerary, DSPROpnd, DSPROpnd>;
class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w",
int_mips_precrq_ph_w,
- NoItinerary, DSPRegs, GPR32>;
+ NoItinerary, DSPROpnd, GPR32Opnd>;
class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w",
int_mips_precrq_rs_ph_w,
- NoItinerary, DSPRegs,
- GPR32>,
+ NoItinerary, DSPROpnd,
+ GPR32Opnd>,
Defs<[DSPOutFlag22]>;
class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph",
int_mips_precrqu_s_qb_ph,
- NoItinerary, DSPRegs,
- DSPRegs>,
+ NoItinerary, DSPROpnd,
+ DSPROpnd>,
Defs<[DSPOutFlag22]>;
class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl",
int_mips_preceq_w_phl,
- NoItinerary, GPR32, DSPRegs>;
+ NoItinerary, GPR32Opnd, DSPROpnd>;
class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr",
int_mips_preceq_w_phr,
- NoItinerary, GPR32, DSPRegs>;
+ NoItinerary, GPR32Opnd, DSPROpnd>;
class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl",
int_mips_precequ_ph_qbl,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr",
int_mips_precequ_ph_qbr,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla",
int_mips_precequ_ph_qbla,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra",
int_mips_precequ_ph_qbra,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl",
int_mips_preceu_ph_qbl,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr",
int_mips_preceu_ph_qbr,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla",
int_mips_preceu_ph_qbla,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra",
int_mips_preceu_ph_qbra,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
// Shift
class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag22]>;
class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag22]>;
class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag22]>;
class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag22]>;
class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph,
- immZExt4, NoItinerary, DSPRegs>,
+ immZExt4, NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag22]>;
class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag22]>;
class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph,
- immZExt4, NoItinerary, DSPRegs>;
+ immZExt4, NoItinerary, DSPROpnd>;
class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w,
- immZExt5, NoItinerary, GPR32>,
+ immZExt5, NoItinerary, GPR32Opnd>,
Defs<[DSPOutFlag22]>;
class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
- NoItinerary, GPR32>,
+ NoItinerary, GPR32Opnd>,
Defs<[DSPOutFlag22]>;
class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w,
- immZExt5, NoItinerary, GPR32>;
+ immZExt5, NoItinerary, GPR32Opnd>;
class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
- NoItinerary, GPR32>;
+ NoItinerary, GPR32Opnd>;
// Multiplication
class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl",
int_mips_muleu_s_ph_qbl,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag21]>;
class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr",
int_mips_muleu_s_ph_qbr,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag21]>;
class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl",
int_mips_muleq_s_w_phl,
- NoItinerary, GPR32, DSPRegs>,
+ NoItinerary, GPR32Opnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag21]>;
class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr",
int_mips_muleq_s_w_phr,
- NoItinerary, GPR32, DSPRegs>,
+ NoItinerary, GPR32Opnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag21]>;
class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag21]>;
class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph",
@@ -737,10 +736,10 @@ class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>,
Defs<[DSPOutFlag16_19]>;
// Move from/to hi/lo.
-class MFHI_DESC : MFHI_DESC_BASE<"mfhi", HIRegsDSP, NoItinerary>;
-class MFLO_DESC : MFHI_DESC_BASE<"mflo", LORegsDSP, NoItinerary>;
-class MTHI_DESC : MTHI_DESC_BASE<"mthi", HIRegsDSP, NoItinerary>;
-class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LORegsDSP, NoItinerary>;
+class MFHI_DESC : MFHI_DESC_BASE<"mfhi", ACC64DSPOpnd, MipsMFHI, NoItinerary>;
+class MFLO_DESC : MFHI_DESC_BASE<"mflo", ACC64DSPOpnd, MipsMFLO, NoItinerary>;
+class MTHI_DESC : MTHI_DESC_BASE<"mthi", HI32DSPOpnd, NoItinerary>;
+class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LO32DSPOpnd, NoItinerary>;
// Dot product with accumulate/subtract
class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>;
@@ -773,67 +772,67 @@ class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>;
// Comparison
class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb",
int_mips_cmpu_eq_qb, NoItinerary,
- DSPRegs>,
+ DSPROpnd>,
IsCommutable, Defs<[DSPCCond]>;
class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb",
int_mips_cmpu_lt_qb, NoItinerary,
- DSPRegs>, Defs<[DSPCCond]>;
+ DSPROpnd>, Defs<[DSPCCond]>;
class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb",
int_mips_cmpu_le_qb, NoItinerary,
- DSPRegs>, Defs<[DSPCCond]>;
+ DSPROpnd>, Defs<[DSPCCond]>;
class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb",
int_mips_cmpgu_eq_qb,
- NoItinerary, GPR32, DSPRegs>,
+ NoItinerary, GPR32Opnd, DSPROpnd>,
IsCommutable;
class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb",
int_mips_cmpgu_lt_qb,
- NoItinerary, GPR32, DSPRegs>;
+ NoItinerary, GPR32Opnd, DSPROpnd>;
class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb",
int_mips_cmpgu_le_qb,
- NoItinerary, GPR32, DSPRegs>;
+ NoItinerary, GPR32Opnd, DSPROpnd>;
class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
IsCommutable, Defs<[DSPCCond]>;
class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPCCond]>;
class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPCCond]>;
// Misc
class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev,
- NoItinerary, GPR32>;
+ NoItinerary, GPR32Opnd>;
class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph,
- NoItinerary, DSPRegs, DSPRegs>;
+ NoItinerary, DSPROpnd, DSPROpnd>;
class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
- NoItinerary, DSPRegs, GPR32>;
+ NoItinerary, DSPROpnd, GPR32Opnd>;
class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph,
- NoItinerary, DSPRegs, GPR32>;
+ NoItinerary, DSPROpnd, GPR32Opnd>;
class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Uses<[DSPCCond]>;
class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Uses<[DSPCCond]>;
class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>;
@@ -905,97 +904,97 @@ class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>,
// MIPS DSP Rev 2
// Addition/subtraction
class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary,
- DSPRegs, DSPRegs>, IsCommutable,
+ DSPROpnd, DSPROpnd>, IsCommutable,
Defs<[DSPOutFlag20]>;
class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag20]>;
class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary,
- DSPRegs, DSPRegs>,
+ DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb,
- NoItinerary, DSPRegs>, IsCommutable;
+ NoItinerary, DSPROpnd>, IsCommutable;
class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb,
- NoItinerary, DSPRegs>, IsCommutable;
+ NoItinerary, DSPROpnd>, IsCommutable;
class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph,
- NoItinerary, DSPRegs>, IsCommutable;
+ NoItinerary, DSPROpnd>, IsCommutable;
class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph,
- NoItinerary, DSPRegs>, IsCommutable;
+ NoItinerary, DSPROpnd>, IsCommutable;
class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w,
- NoItinerary, GPR32>, IsCommutable;
+ NoItinerary, GPR32Opnd>, IsCommutable;
class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w,
- NoItinerary, GPR32>, IsCommutable;
+ NoItinerary, GPR32Opnd>, IsCommutable;
class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w,
- NoItinerary, GPR32>;
+ NoItinerary, GPR32Opnd>;
class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w,
- NoItinerary, GPR32>;
+ NoItinerary, GPR32Opnd>;
// Comparison
class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb",
int_mips_cmpgdu_eq_qb,
- NoItinerary, GPR32, DSPRegs>,
+ NoItinerary, GPR32Opnd, DSPROpnd>,
IsCommutable, Defs<[DSPCCond]>;
class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb",
int_mips_cmpgdu_lt_qb,
- NoItinerary, GPR32, DSPRegs>,
+ NoItinerary, GPR32Opnd, DSPROpnd>,
Defs<[DSPCCond]>;
class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb",
int_mips_cmpgdu_le_qb,
- NoItinerary, GPR32, DSPRegs>,
+ NoItinerary, GPR32Opnd, DSPROpnd>,
Defs<[DSPCCond]>;
// Absolute
class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag20]>;
// Multiplication
class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", null_frag, NoItinerary,
- DSPRegs>, IsCommutable,
+ DSPROpnd>, IsCommutable,
Defs<[DSPOutFlag21]>;
class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph,
- NoItinerary, DSPRegs>, IsCommutable,
+ NoItinerary, DSPROpnd>, IsCommutable,
Defs<[DSPOutFlag21]>;
class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w,
- NoItinerary, GPR32>, IsCommutable,
+ NoItinerary, GPR32Opnd>, IsCommutable,
Defs<[DSPOutFlag21]>;
class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w,
- NoItinerary, GPR32>, IsCommutable,
+ NoItinerary, GPR32Opnd>, IsCommutable,
Defs<[DSPOutFlag21]>;
class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag21]>;
// Dot product with accumulate/subtract
@@ -1026,36 +1025,36 @@ class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>;
// Precision reduce/expand
class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph",
int_mips_precr_qb_ph,
- NoItinerary, DSPRegs, DSPRegs>;
+ NoItinerary, DSPROpnd, DSPROpnd>;
class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w",
int_mips_precr_sra_ph_w,
- NoItinerary, DSPRegs,
- GPR32>;
+ NoItinerary, DSPROpnd,
+ GPR32Opnd>;
class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w",
int_mips_precr_sra_r_ph_w,
- NoItinerary, DSPRegs,
- GPR32>;
+ NoItinerary, DSPROpnd,
+ GPR32Opnd>;
// Shift
class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb,
- immZExt3, NoItinerary, DSPRegs>;
+ immZExt3, NoItinerary, DSPROpnd>;
class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
// Misc
class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5,
@@ -1240,24 +1239,24 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC;
}
// Pseudos.
-let isPseudo = 1 in {
+let isPseudo = 1, isCodeGenOnly = 1 in {
// Pseudo instructions for loading and storing accumulator registers.
- defm LOAD_AC_DSP : LoadM<"load_ac_dsp", ACRegsDSPOpnd>;
- defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSPOpnd>;
+ def LOAD_ACC64DSP : Load<"", ACC64DSPOpnd>;
+ def STORE_ACC64DSP : Store<"", ACC64DSPOpnd>;
// Pseudos for loading and storing ccond field of DSP control register.
- defm LOAD_CCOND_DSP : LoadM<"load_ccond_dsp", DSPCC>;
- defm STORE_CCOND_DSP : StoreM<"store_ccond_dsp", DSPCC>;
+ def LOAD_CCOND_DSP : Load<"load_ccond_dsp", DSPCC>;
+ def STORE_CCOND_DSP : Store<"store_ccond_dsp", DSPCC>;
}
// Pseudo CMP and PICK instructions.
class PseudoCMP<Instruction RealInst> :
- PseudoDSP<(outs DSPCC:$cmp), (ins DSPRegs:$rs, DSPRegs:$rt), []>,
- PseudoInstExpansion<(RealInst DSPRegs:$rs, DSPRegs:$rt)>, NeverHasSideEffects;
+ PseudoDSP<(outs DSPCC:$cmp), (ins DSPROpnd:$rs, DSPROpnd:$rt), []>,
+ PseudoInstExpansion<(RealInst DSPROpnd:$rs, DSPROpnd:$rt)>, NeverHasSideEffects;
class PseudoPICK<Instruction RealInst> :
- PseudoDSP<(outs DSPRegs:$rd), (ins DSPCC:$cmp, DSPRegs:$rs, DSPRegs:$rt), []>,
- PseudoInstExpansion<(RealInst DSPRegs:$rd, DSPRegs:$rs, DSPRegs:$rt)>,
+ PseudoDSP<(outs DSPROpnd:$rd), (ins DSPCC:$cmp, DSPROpnd:$rs, DSPROpnd:$rt), []>,
+ PseudoInstExpansion<(RealInst DSPROpnd:$rd, DSPROpnd:$rs, DSPROpnd:$rt)>,
NeverHasSideEffects;
def PseudoCMP_EQ_PH : PseudoCMP<CMP_EQ_PH>;
@@ -1270,6 +1269,8 @@ def PseudoCMPU_LE_QB : PseudoCMP<CMPU_LE_QB>;
def PseudoPICK_PH : PseudoPICK<PICK_PH>;
def PseudoPICK_QB : PseudoPICK<PICK_QB>;
+def PseudoMTLOHI_DSP : PseudoMTLOHI<ACC64DSP, GPR32>;
+
// Patterns.
class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
Pat<pattern, result>, Requires<[pred]>;
@@ -1279,19 +1280,19 @@ class BitconvertPat<ValueType DstVT, ValueType SrcVT, RegisterClass DstRC,
DSPPat<(DstVT (bitconvert (SrcVT SrcRC:$src))),
(COPY_TO_REGCLASS SrcRC:$src, DstRC)>;
-def : BitconvertPat<i32, v2i16, GPR32, DSPRegs>;
-def : BitconvertPat<i32, v4i8, GPR32, DSPRegs>;
-def : BitconvertPat<v2i16, i32, DSPRegs, GPR32>;
-def : BitconvertPat<v4i8, i32, DSPRegs, GPR32>;
+def : BitconvertPat<i32, v2i16, GPR32, DSPR>;
+def : BitconvertPat<i32, v4i8, GPR32, DSPR>;
+def : BitconvertPat<v2i16, i32, DSPR, GPR32>;
+def : BitconvertPat<v4i8, i32, DSPR, GPR32>;
def : DSPPat<(v2i16 (load addr:$a)),
- (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>;
+ (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPR))>;
def : DSPPat<(v4i8 (load addr:$a)),
- (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>;
-def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a),
- (SW (COPY_TO_REGCLASS DSPRegs:$val, GPR32), addr:$a)>;
-def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a),
- (SW (COPY_TO_REGCLASS DSPRegs:$val, GPR32), addr:$a)>;
+ (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPR))>;
+def : DSPPat<(store (v2i16 DSPR:$val), addr:$a),
+ (SW (COPY_TO_REGCLASS DSPR:$val, GPR32), addr:$a)>;
+def : DSPPat<(store (v4i8 DSPR:$val), addr:$a),
+ (SW (COPY_TO_REGCLASS DSPR:$val, GPR32), addr:$a)>;
// Binary operations.
class DSPBinPat<Instruction Inst, ValueType ValTy, SDPatternOperator Node,
@@ -1336,7 +1337,7 @@ class DSPSetCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy,
CondCode CC> :
DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)),
(ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)),
- (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs)),
+ (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPR)),
(ValTy ZERO)))>;
class DSPSetCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy,
@@ -1344,7 +1345,7 @@ class DSPSetCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy,
DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)),
(ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)),
(ValTy ZERO),
- (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs))))>;
+ (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPR))))>;
class DSPSelectCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy,
CondCode CC> :
@@ -1384,12 +1385,12 @@ def : DSPSelectCCPatInv<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETUGT>;
// Extr patterns.
class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
- DSPPat<(i32 (OpNode GPR32:$rs, ACRegsDSP:$ac)),
- (Instr ACRegsDSP:$ac, GPR32:$rs)>;
+ DSPPat<(i32 (OpNode GPR32:$rs, ACC64DSP:$ac)),
+ (Instr ACC64DSP:$ac, GPR32:$rs)>;
class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> :
- DSPPat<(i32 (OpNode immZExt5:$shift, ACRegsDSP:$ac)),
- (Instr ACRegsDSP:$ac, immZExt5:$shift)>;
+ DSPPat<(i32 (OpNode immZExt5:$shift, ACC64DSP:$ac)),
+ (Instr ACC64DSP:$ac, immZExt5:$shift)>;
def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>;
def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>;
@@ -1404,11 +1405,6 @@ def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>;
def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>;
def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>;
-// mflo/hi patterns.
-let AddedComplexity = 20 in
-def : DSPPat<(i32 (ExtractLOHI ACRegsDSP:$ac, imm:$lohi_idx)),
- (EXTRACT_SUBREG ACRegsDSP:$ac, imm:$lohi_idx)>;
-
// Indexed load patterns.
class IndexedLoadPat<SDPatternOperator LoadNode, Instruction Instr> :
DSPPat<(i32 (LoadNode (add i32:$base, i32:$index))),
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 545a38d..ffbd83b 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -421,8 +421,7 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
return false;
if (const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V))
- return !PSV->PseudoSourceValue::isConstant(0) &&
- (V != PseudoSourceValue::getStack());
+ return !PSV->isConstant(0) && V != PseudoSourceValue::getStack();
return true;
}
@@ -563,14 +562,13 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
RegDU.init(*Slot);
- if (searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler)) {
- MBB.splice(llvm::next(Slot), &MBB, llvm::next(Filler).base());
- MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
- ++UsefulSlots;
- return true;
- }
+ if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler))
+ return false;
- return false;
+ MBB.splice(llvm::next(Slot), &MBB, llvm::next(Filler).base());
+ MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
+ ++UsefulSlots;
+ return true;
}
bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
@@ -584,14 +582,13 @@ bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
RegDU.setCallerSaved(*Slot);
- if (searchRange(MBB, llvm::next(Slot), MBB.end(), RegDU, NM, Filler)) {
- MBB.splice(llvm::next(Slot), &MBB, Filler);
- MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
- ++UsefulSlots;
- return true;
- }
+ if (!searchRange(MBB, llvm::next(Slot), MBB.end(), RegDU, NM, Filler))
+ return false;
- return false;
+ MBB.splice(llvm::next(Slot), &MBB, Filler);
+ MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
+ ++UsefulSlots;
+ return true;
}
bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 0002a5f..c417bd5 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -69,6 +69,12 @@ bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
return false;
}
+bool MipsDAGToDAGISel::selectAddrRegReg(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
llvm_unreachable("Unimplemented function.");
@@ -81,12 +87,83 @@ bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
return false;
}
+bool MipsDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Offset, SDValue &Alias) {
llvm_unreachable("Unimplemented function.");
return false;
}
+bool MipsDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm1(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm2(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm3(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm4(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm6(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm8(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
/// Select instructions not customized! Used for
/// expanded, promoted and normal instructions
SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
@@ -98,6 +175,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ Node->setNodeId(-1);
return NULL;
}
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index cf0f9c5..a4d9da5 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -57,6 +57,11 @@ private:
virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
+ // Complex Pattern.
+ /// (reg + reg).
+ virtual bool selectAddrRegReg(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
/// Fall back on this function if all else fails.
virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
@@ -65,9 +70,42 @@ private:
virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
+ virtual bool selectIntAddrMM(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Offset, SDValue &Alias);
+ /// \brief Select constant vector splats.
+ virtual bool selectVSplat(SDNode *N, APInt &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm1.
+ virtual bool selectVSplatUimm1(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm2.
+ virtual bool selectVSplatUimm2(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm3.
+ virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm4.
+ virtual bool selectVSplatUimm4(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm5.
+ virtual bool selectVSplatUimm5(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm6.
+ virtual bool selectVSplatUimm6(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm8.
+ virtual bool selectVSplatUimm8(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a simm5.
+ virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is a power of 2.
+ virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is the inverse of a
+ /// power of 2.
+ virtual bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is a run of set bits
+ /// ending at the most significant bit
+ virtual bool selectVSplatMaskL(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is a run of set bits
+ /// starting at bit zero.
+ virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const;
+
virtual SDNode *Select(SDNode *N);
virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0;
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index a62e84f..1e8250c 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -20,6 +20,7 @@
#include "MipsTargetMachine.h"
#include "MipsTargetObjectFile.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -34,6 +35,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cctype>
using namespace llvm;
@@ -67,7 +69,7 @@ static const uint16_t Mips64DPRegs[8] = {
// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
if (!isShiftedMask_64(I))
- return false;
+ return false;
Size = CountPopulation_64(I);
Pos = countTrailingZeros(I);
@@ -79,72 +81,35 @@ SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
}
-static SDValue getTargetNode(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
- EVT Ty = Op.getValueType();
+SDValue MipsTargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
+}
- if (GlobalAddressSDNode *N = dyn_cast<GlobalAddressSDNode>(Op))
- return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(Op), Ty, 0,
- Flag);
- if (ExternalSymbolSDNode *N = dyn_cast<ExternalSymbolSDNode>(Op))
- return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag);
- if (BlockAddressSDNode *N = dyn_cast<BlockAddressSDNode>(Op))
- return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
- if (JumpTableSDNode *N = dyn_cast<JumpTableSDNode>(Op))
- return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
- if (ConstantPoolSDNode *N = dyn_cast<ConstantPoolSDNode>(Op))
- return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
- N->getOffset(), Flag);
-
- llvm_unreachable("Unexpected node type.");
- return SDValue();
+SDValue MipsTargetLowering::getTargetNode(ExternalSymbolSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag);
}
-static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) {
- SDLoc DL(Op);
- EVT Ty = Op.getValueType();
- SDValue Hi = getTargetNode(Op, DAG, MipsII::MO_ABS_HI);
- SDValue Lo = getTargetNode(Op, DAG, MipsII::MO_ABS_LO);
- return DAG.getNode(ISD::ADD, DL, Ty,
- DAG.getNode(MipsISD::Hi, DL, Ty, Hi),
- DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
+SDValue MipsTargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
}
-SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG,
- bool HasMips64) const {
- SDLoc DL(Op);
- EVT Ty = Op.getValueType();
- unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
- SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
- getTargetNode(Op, DAG, GOTFlag));
- SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
- MachinePointerInfo::getGOT(), false, false, false,
- 0);
- unsigned LoFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
- SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty, getTargetNode(Op, DAG, LoFlag));
- return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo);
-}
-
-SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG,
+SDValue MipsTargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
unsigned Flag) const {
- SDLoc DL(Op);
- EVT Ty = Op.getValueType();
- SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
- getTargetNode(Op, DAG, Flag));
- return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Tgt,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
}
-SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
- unsigned HiFlag,
- unsigned LoFlag) const {
- SDLoc DL(Op);
- EVT Ty = Op.getValueType();
- SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag));
- Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
- SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
- getTargetNode(Op, DAG, LoFlag));
- return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Wrapper,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+SDValue MipsTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
+ N->getOffset(), Flag);
}
const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -162,8 +127,9 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T";
case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F";
case MipsISD::TruncIntFP: return "MipsISD::TruncIntFP";
- case MipsISD::ExtractLOHI: return "MipsISD::ExtractLOHI";
- case MipsISD::InsertLOHI: return "MipsISD::InsertLOHI";
+ case MipsISD::MFHI: return "MipsISD::MFHI";
+ case MipsISD::MFLO: return "MipsISD::MFLO";
+ case MipsISD::MTLOHI: return "MipsISD::MTLOHI";
case MipsISD::Mult: return "MipsISD::Mult";
case MipsISD::Multu: return "MipsISD::Multu";
case MipsISD::MAdd: return "MipsISD::MAdd";
@@ -207,6 +173,30 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::SHRL_DSP: return "MipsISD::SHRL_DSP";
case MipsISD::SETCC_DSP: return "MipsISD::SETCC_DSP";
case MipsISD::SELECT_CC_DSP: return "MipsISD::SELECT_CC_DSP";
+ case MipsISD::VALL_ZERO: return "MipsISD::VALL_ZERO";
+ case MipsISD::VANY_ZERO: return "MipsISD::VANY_ZERO";
+ case MipsISD::VALL_NONZERO: return "MipsISD::VALL_NONZERO";
+ case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO";
+ case MipsISD::VCEQ: return "MipsISD::VCEQ";
+ case MipsISD::VCLE_S: return "MipsISD::VCLE_S";
+ case MipsISD::VCLE_U: return "MipsISD::VCLE_U";
+ case MipsISD::VCLT_S: return "MipsISD::VCLT_S";
+ case MipsISD::VCLT_U: return "MipsISD::VCLT_U";
+ case MipsISD::VSMAX: return "MipsISD::VSMAX";
+ case MipsISD::VSMIN: return "MipsISD::VSMIN";
+ case MipsISD::VUMAX: return "MipsISD::VUMAX";
+ case MipsISD::VUMIN: return "MipsISD::VUMIN";
+ case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT";
+ case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT";
+ case MipsISD::VNOR: return "MipsISD::VNOR";
+ case MipsISD::VSHF: return "MipsISD::VSHF";
+ case MipsISD::SHF: return "MipsISD::SHF";
+ case MipsISD::ILVEV: return "MipsISD::ILVEV";
+ case MipsISD::ILVOD: return "MipsISD::ILVOD";
+ case MipsISD::ILVL: return "MipsISD::ILVL";
+ case MipsISD::ILVR: return "MipsISD::ILVR";
+ case MipsISD::PCKEV: return "MipsISD::PCKEV";
+ case MipsISD::PCKOD: return "MipsISD::PCKOD";
default: return NULL;
}
}
@@ -424,8 +414,8 @@ static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT Ty = N->getValueType(0);
- unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
- unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
+ unsigned LO = (Ty == MVT::i32) ? Mips::LO0 : Mips::LO0_64;
+ unsigned HI = (Ty == MVT::i32) ? Mips::HI0 : Mips::HI0_64;
unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem16 :
MipsISD::DivRemU16;
SDLoc DL(N);
@@ -566,7 +556,7 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
// Pattern match EXT.
// $dst = and ((sra or srl) $src , pos), (2**size - 1)
// => ext $dst, $src, size, pos
- if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
+ if (DCI.isBeforeLegalizeOps() || !Subtarget->hasExtractInsert())
return SDValue();
SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
@@ -607,7 +597,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
// $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
// where mask1 = (2**size - 1) << pos, mask0 = ~mask1
// => ins $dst, $src, size, pos, $src1
- if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
+ if (DCI.isBeforeLegalizeOps() || !Subtarget->hasExtractInsert())
return SDValue();
SDValue And0 = N->getOperand(0), And1 = N->getOperand(1);
@@ -779,13 +769,17 @@ static MachineBasicBlock *expandPseudoDIV(MachineInstr *MI,
// Insert instruction "teq $divisor_reg, $zero, 7".
MachineBasicBlock::iterator I(MI);
MachineInstrBuilder MIB;
+ MachineOperand &Divisor = MI->getOperand(2);
MIB = BuildMI(MBB, llvm::next(I), MI->getDebugLoc(), TII.get(Mips::TEQ))
- .addOperand(MI->getOperand(2)).addReg(Mips::ZERO).addImm(7);
+ .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill()))
+ .addReg(Mips::ZERO).addImm(7);
// Use the 32-bit sub-register if this is a 64-bit division.
if (Is64Bit)
MIB->getOperand(0).setSubReg(Mips::sub_32);
+ // Clear Divisor's kill flag.
+ Divisor.setIsKill(false);
return &MBB;
}
@@ -796,107 +790,75 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
default:
llvm_unreachable("Unexpected instr type to insert");
case Mips::ATOMIC_LOAD_ADD_I8:
- case Mips::ATOMIC_LOAD_ADD_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I16:
- case Mips::ATOMIC_LOAD_ADD_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I32:
- case Mips::ATOMIC_LOAD_ADD_I32_P8:
return emitAtomicBinary(MI, BB, 4, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I64:
- case Mips::ATOMIC_LOAD_ADD_I64_P8:
return emitAtomicBinary(MI, BB, 8, Mips::DADDu);
case Mips::ATOMIC_LOAD_AND_I8:
- case Mips::ATOMIC_LOAD_AND_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I16:
- case Mips::ATOMIC_LOAD_AND_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I32:
- case Mips::ATOMIC_LOAD_AND_I32_P8:
return emitAtomicBinary(MI, BB, 4, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I64:
- case Mips::ATOMIC_LOAD_AND_I64_P8:
return emitAtomicBinary(MI, BB, 8, Mips::AND64);
case Mips::ATOMIC_LOAD_OR_I8:
- case Mips::ATOMIC_LOAD_OR_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I16:
- case Mips::ATOMIC_LOAD_OR_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I32:
- case Mips::ATOMIC_LOAD_OR_I32_P8:
return emitAtomicBinary(MI, BB, 4, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I64:
- case Mips::ATOMIC_LOAD_OR_I64_P8:
return emitAtomicBinary(MI, BB, 8, Mips::OR64);
case Mips::ATOMIC_LOAD_XOR_I8:
- case Mips::ATOMIC_LOAD_XOR_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I16:
- case Mips::ATOMIC_LOAD_XOR_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I32:
- case Mips::ATOMIC_LOAD_XOR_I32_P8:
return emitAtomicBinary(MI, BB, 4, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I64:
- case Mips::ATOMIC_LOAD_XOR_I64_P8:
return emitAtomicBinary(MI, BB, 8, Mips::XOR64);
case Mips::ATOMIC_LOAD_NAND_I8:
- case Mips::ATOMIC_LOAD_NAND_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, 0, true);
case Mips::ATOMIC_LOAD_NAND_I16:
- case Mips::ATOMIC_LOAD_NAND_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, 0, true);
case Mips::ATOMIC_LOAD_NAND_I32:
- case Mips::ATOMIC_LOAD_NAND_I32_P8:
return emitAtomicBinary(MI, BB, 4, 0, true);
case Mips::ATOMIC_LOAD_NAND_I64:
- case Mips::ATOMIC_LOAD_NAND_I64_P8:
return emitAtomicBinary(MI, BB, 8, 0, true);
case Mips::ATOMIC_LOAD_SUB_I8:
- case Mips::ATOMIC_LOAD_SUB_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I16:
- case Mips::ATOMIC_LOAD_SUB_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I32:
- case Mips::ATOMIC_LOAD_SUB_I32_P8:
return emitAtomicBinary(MI, BB, 4, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I64:
- case Mips::ATOMIC_LOAD_SUB_I64_P8:
return emitAtomicBinary(MI, BB, 8, Mips::DSUBu);
case Mips::ATOMIC_SWAP_I8:
- case Mips::ATOMIC_SWAP_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, 0);
case Mips::ATOMIC_SWAP_I16:
- case Mips::ATOMIC_SWAP_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, 0);
case Mips::ATOMIC_SWAP_I32:
- case Mips::ATOMIC_SWAP_I32_P8:
return emitAtomicBinary(MI, BB, 4, 0);
case Mips::ATOMIC_SWAP_I64:
- case Mips::ATOMIC_SWAP_I64_P8:
return emitAtomicBinary(MI, BB, 8, 0);
case Mips::ATOMIC_CMP_SWAP_I8:
- case Mips::ATOMIC_CMP_SWAP_I8_P8:
return emitAtomicCmpSwapPartword(MI, BB, 1);
case Mips::ATOMIC_CMP_SWAP_I16:
- case Mips::ATOMIC_CMP_SWAP_I16_P8:
return emitAtomicCmpSwapPartword(MI, BB, 2);
case Mips::ATOMIC_CMP_SWAP_I32:
- case Mips::ATOMIC_CMP_SWAP_I32_P8:
return emitAtomicCmpSwap(MI, BB, 4);
case Mips::ATOMIC_CMP_SWAP_I64:
- case Mips::ATOMIC_CMP_SWAP_I64_P8:
return emitAtomicCmpSwap(MI, BB, 8);
case Mips::PseudoSDIV:
case Mips::PseudoUDIV:
@@ -923,16 +885,16 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned LL, SC, AND, NOR, ZERO, BEQ;
if (Size == 4) {
- LL = IsN64 ? Mips::LL_P8 : Mips::LL;
- SC = IsN64 ? Mips::SC_P8 : Mips::SC;
+ LL = Mips::LL;
+ SC = Mips::SC;
AND = Mips::AND;
NOR = Mips::NOR;
ZERO = Mips::ZERO;
BEQ = Mips::BEQ;
}
else {
- LL = IsN64 ? Mips::LLD_P8 : Mips::LLD;
- SC = IsN64 ? Mips::SCD_P8 : Mips::SCD;
+ LL = Mips::LLD;
+ SC = Mips::SCD;
AND = Mips::AND64;
NOR = Mips::NOR64;
ZERO = Mips::ZERO_64;
@@ -958,8 +920,7 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
@@ -990,7 +951,7 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
BuildMI(BB, DL, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0);
BuildMI(BB, DL, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB);
- MI->eraseFromParent(); // The instruction is gone now.
+ MI->eraseFromParent(); // The instruction is gone now.
return exitMBB;
}
@@ -1001,15 +962,13 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
unsigned Size, unsigned BinOpcode,
bool Nand) const {
assert((Size == 1 || Size == 2) &&
- "Unsupported size for EmitAtomicBinaryPartial.");
+ "Unsupported size for EmitAtomicBinaryPartial.");
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
- unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
- unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
@@ -1106,7 +1065,7 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
// beq success,$0,loopMBB
BB = loopMBB;
- BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, DL, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
if (Nand) {
// and andres, oldval, incr2
// nor binopres, $0, andres
@@ -1120,7 +1079,7 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
// and newval, binopres, mask
BuildMI(BB, DL, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2);
BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
- } else {// atomic.swap
+ } else { // atomic.swap
// and newval, incr2, mask
BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
}
@@ -1129,7 +1088,7 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
.addReg(OldVal).addReg(Mask2);
BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
.addReg(MaskedOldVal0).addReg(NewVal);
- BuildMI(BB, DL, TII->get(SC), Success)
+ BuildMI(BB, DL, TII->get(Mips::SC), Success)
.addReg(StoreVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, DL, TII->get(Mips::BEQ))
.addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
@@ -1151,15 +1110,14 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
.addReg(SllRes).addImm(ShiftImm);
- MI->eraseFromParent(); // The instruction is gone now.
+ MI->eraseFromParent(); // The instruction is gone now.
return exitMBB;
}
-MachineBasicBlock *
-MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size) const {
+MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
MachineFunction *MF = BB->getParent();
@@ -1170,15 +1128,14 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
unsigned LL, SC, ZERO, BNE, BEQ;
if (Size == 4) {
- LL = IsN64 ? Mips::LL_P8 : Mips::LL;
- SC = IsN64 ? Mips::SC_P8 : Mips::SC;
+ LL = Mips::LL;
+ SC = Mips::SC;
ZERO = Mips::ZERO;
BNE = Mips::BNE;
BEQ = Mips::BEQ;
- }
- else {
- LL = IsN64 ? Mips::LLD_P8 : Mips::LLD;
- SC = IsN64 ? Mips::SCD_P8 : Mips::SCD;
+ } else {
+ LL = Mips::LLD;
+ SC = Mips::SCD;
ZERO = Mips::ZERO_64;
BNE = Mips::BNE64;
BEQ = Mips::BEQ64;
@@ -1233,7 +1190,7 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
BuildMI(BB, DL, TII->get(BEQ))
.addReg(Success).addReg(ZERO).addMBB(loop1MBB);
- MI->eraseFromParent(); // The instruction is gone now.
+ MI->eraseFromParent(); // The instruction is gone now.
return exitMBB;
}
@@ -1250,8 +1207,6 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
- unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
- unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
@@ -1348,7 +1303,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
// and maskedoldval0,oldval,mask
// bne maskedoldval0,shiftedcmpval,sinkMBB
BB = loop1MBB;
- BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, DL, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
.addReg(OldVal).addReg(Mask);
BuildMI(BB, DL, TII->get(Mips::BNE))
@@ -1364,7 +1319,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
.addReg(OldVal).addReg(Mask2);
BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
.addReg(MaskedOldVal1).addReg(ShiftedNewVal);
- BuildMI(BB, DL, TII->get(SC), Success)
+ BuildMI(BB, DL, TII->get(Mips::SC), Success)
.addReg(StoreVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, DL, TII->get(Mips::BEQ))
.addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
@@ -1421,9 +1376,7 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BRIND, DL, MVT::Other, Chain, Addr);
}
-SDValue MipsTargetLowering::
-lowerBRCOND(SDValue Op, SelectionDAG &DAG) const
-{
+SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// The first operand is the chain, the second is the condition, the third is
// the block to branch to if the condition is true.
SDValue Chain = Op.getOperand(0);
@@ -1489,7 +1442,9 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
// FIXME there isn't actually debug info here
SDLoc DL(Op);
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ EVT Ty = Op.getValueType();
+ GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = N->getGlobal();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
const MipsTargetObjectFile &TLOF =
@@ -1506,26 +1461,31 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
}
// %hi/%lo relocation
- return getAddrNonPIC(Op, DAG);
+ return getAddrNonPIC(N, Ty, DAG);
}
if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV)))
- return getAddrLocal(Op, DAG, HasMips64);
+ return getAddrLocal(N, Ty, DAG, HasMips64);
if (LargeGOT)
- return getAddrGlobalLargeGOT(Op, DAG, MipsII::MO_GOT_HI16,
- MipsII::MO_GOT_LO16);
+ return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16,
+ MipsII::MO_GOT_LO16, DAG.getEntryNode(),
+ MachinePointerInfo::getGOT());
- return getAddrGlobal(Op, DAG,
- HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16);
+ return getAddrGlobal(N, Ty, DAG,
+ HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16,
+ DAG.getEntryNode(), MachinePointerInfo::getGOT());
}
SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
+ BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
+ EVT Ty = Op.getValueType();
+
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
- return getAddrNonPIC(Op, DAG);
+ return getAddrNonPIC(N, Ty, DAG);
- return getAddrLocal(Op, DAG, HasMips64);
+ return getAddrLocal(N, Ty, DAG, HasMips64);
}
SDValue MipsTargetLowering::
@@ -1612,10 +1572,13 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
SDValue MipsTargetLowering::
lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
{
+ JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
+ EVT Ty = Op.getValueType();
+
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
- return getAddrNonPIC(Op, DAG);
+ return getAddrNonPIC(N, Ty, DAG);
- return getAddrLocal(Op, DAG, HasMips64);
+ return getAddrLocal(N, Ty, DAG, HasMips64);
}
SDValue MipsTargetLowering::
@@ -1630,11 +1593,13 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
// SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
// SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
// ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ EVT Ty = Op.getValueType();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
- return getAddrNonPIC(Op, DAG);
+ return getAddrNonPIC(N, Ty, DAG);
- return getAddrLocal(Op, DAG, HasMips64);
+ return getAddrLocal(N, Ty, DAG, HasMips64);
}
SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -1652,7 +1617,8 @@ SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV), false, false, 0);
}
-static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) {
EVT TyX = Op.getOperand(0).getValueType();
EVT TyY = Op.getOperand(1).getValueType();
SDValue Const1 = DAG.getConstant(1, MVT::i32);
@@ -1671,7 +1637,7 @@ static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(1),
Const1);
- if (HasR2) {
+ if (HasExtractInsert) {
// ext E, Y, 31, 1 ; extract bit31 of Y
// ins X, E, 31, 1 ; insert extracted bit at bit31 of X
SDValue E = DAG.getNode(MipsISD::Ext, DL, MVT::i32, Y, Const31, Const1);
@@ -1697,7 +1663,8 @@ static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
}
-static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) {
unsigned WidthX = Op.getOperand(0).getValueSizeInBits();
unsigned WidthY = Op.getOperand(1).getValueSizeInBits();
EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY);
@@ -1708,7 +1675,7 @@ static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
SDValue X = DAG.getNode(ISD::BITCAST, DL, TyX, Op.getOperand(0));
SDValue Y = DAG.getNode(ISD::BITCAST, DL, TyY, Op.getOperand(1));
- if (HasR2) {
+ if (HasExtractInsert) {
// ext E, Y, width(Y) - 1, 1 ; extract bit width(Y)-1 of Y
// ins X, E, width(X) - 1, 1 ; insert extracted bit at bit width(X)-1 of X
SDValue E = DAG.getNode(MipsISD::Ext, DL, TyY, Y,
@@ -1748,12 +1715,13 @@ static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
SDValue
MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->hasMips64())
- return lowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFCOPYSIGN64(Op, DAG, Subtarget->hasExtractInsert());
- return lowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFCOPYSIGN32(Op, DAG, Subtarget->hasExtractInsert());
}
-static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) {
SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
SDLoc DL(Op);
@@ -1765,7 +1733,7 @@ static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
Const1);
// Clear MSB.
- if (HasR2)
+ if (HasExtractInsert)
Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32,
DAG.getRegister(Mips::ZERO, MVT::i32),
DAG.getConstant(31, MVT::i32), Const1, X);
@@ -1782,7 +1750,8 @@ static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
}
-static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) {
SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
SDLoc DL(Op);
@@ -1790,7 +1759,7 @@ static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0));
// Clear MSB.
- if (HasR2)
+ if (HasExtractInsert)
Res = DAG.getNode(MipsISD::Ins, DL, MVT::i64,
DAG.getRegister(Mips::ZERO_64, MVT::i64),
DAG.getConstant(63, MVT::i32), Const1, X);
@@ -1805,9 +1774,9 @@ static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
SDValue
MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64))
- return lowerFABS64(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFABS64(Op, DAG, Subtarget->hasExtractInsert());
- return lowerFABS32(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFABS32(Op, DAG, Subtarget->hasExtractInsert());
}
SDValue MipsTargetLowering::
@@ -2152,21 +2121,14 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
//===----------------------------------------------------------------------===//
-static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State, const uint16_t *F64Regs) {
- static const unsigned IntRegsSize=4, FloatRegsSize=2;
+ static const unsigned IntRegsSize = 4, FloatRegsSize = 2;
- static const uint16_t IntRegs[] = {
- Mips::A0, Mips::A1, Mips::A2, Mips::A3
- };
- static const uint16_t F32Regs[] = {
- Mips::F12, Mips::F14
- };
- static const uint16_t F64Regs[] = {
- Mips::D6, Mips::D7
- };
+ static const uint16_t IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 };
+ static const uint16_t F32Regs[] = { Mips::F12, Mips::F14 };
// Do not process byval args here.
if (ArgFlags.isByVal())
@@ -2235,14 +2197,28 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
return false;
}
+static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ static const uint16_t F64Regs[] = { Mips::D6, Mips::D7 };
+
+ return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs);
+}
+
+static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ static const uint16_t F64Regs[] = { Mips::D12_64, Mips::D14_64 };
+
+ return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs);
+}
+
#include "MipsGenCallingConv.inc"
//===----------------------------------------------------------------------===//
// Call Calling Convention Implementation
//===----------------------------------------------------------------------===//
-static const unsigned O32IntRegsSize = 4;
-
// Return next O32 integer argument register.
static unsigned getNextIntArgReg(unsigned Reg) {
assert((Reg == Mips::A0) || (Reg == Mips::A2));
@@ -2339,6 +2315,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
+ MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
// Analyze operands of the call, assigning locations to each operand.
@@ -2347,10 +2324,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
getTargetMachine(), ArgLocs, *DAG.getContext());
MipsCC::SpecialCallingConvType SpecialCallingConv =
getSpecialCallingConv(Callee);
- MipsCC MipsCCInfo(CallConv, IsO32, CCInfo, SpecialCallingConv);
+ MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo,
+ SpecialCallingConv);
MipsCCInfo.analyzeCallOperands(Outs, IsVarArg,
- getTargetMachine().Options.UseSoftFloat,
+ Subtarget->mipsSEUsesSoftFloat(),
Callee.getNode(), CLI.Args);
// Get a count of how many bytes are to be pushed on the stack.
@@ -2467,32 +2445,40 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25
bool GlobalOrExternal = false, InternalLinkage = false;
SDValue CalleeLo;
+ EVT Ty = Callee.getValueType();
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
if (IsPICCall) {
- InternalLinkage = G->getGlobal()->hasInternalLinkage();
+ const GlobalValue *Val = G->getGlobal();
+ InternalLinkage = Val->hasInternalLinkage();
if (InternalLinkage)
- Callee = getAddrLocal(Callee, DAG, HasMips64);
+ Callee = getAddrLocal(G, Ty, DAG, HasMips64);
else if (LargeGOT)
- Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
- MipsII::MO_CALL_LO16);
+ Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16,
+ MipsII::MO_CALL_LO16, Chain,
+ FuncInfo->callPtrInfo(Val));
else
- Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
+ Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
+ FuncInfo->callPtrInfo(Val));
} else
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
MipsII::MO_NO_FLAG);
GlobalOrExternal = true;
}
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *Sym = S->getSymbol();
+
if (!IsN64 && !IsPIC) // !N64 && static
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(),
MipsII::MO_NO_FLAG);
else if (LargeGOT)
- Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
- MipsII::MO_CALL_LO16);
+ Callee = getAddrGlobalLargeGOT(S, Ty, DAG, MipsII::MO_CALL_HI16,
+ MipsII::MO_CALL_LO16, Chain,
+ FuncInfo->callPtrInfo(Sym));
else // N64 || PIC
- Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
+ Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
+ FuncInfo->callPtrInfo(Sym));
GlobalOrExternal = true;
}
@@ -2534,9 +2520,9 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+ MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo);
- MipsCCInfo.analyzeCallResult(Ins, getTargetMachine().Options.UseSoftFloat,
+ MipsCCInfo.analyzeCallResult(Ins, Subtarget->mipsSEUsesSoftFloat(),
CallNode, RetTy);
// Copy all of the result registers out of their specified physreg.
@@ -2581,10 +2567,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+ MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo);
Function::const_arg_iterator FuncArg =
DAG.getMachineFunction().getFunction()->arg_begin();
- bool UseSoftFloat = getTargetMachine().Options.UseSoftFloat;
+ bool UseSoftFloat = Subtarget->mipsSEUsesSoftFloat();
MipsCCInfo.analyzeFormalArguments(Ins, UseSoftFloat, FuncArg);
MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
@@ -2613,21 +2599,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Arguments stored on registers
if (IsRegLoc) {
- EVT RegVT = VA.getLocVT();
+ MVT RegVT = VA.getLocVT();
unsigned ArgReg = VA.getLocReg();
- const TargetRegisterClass *RC;
-
- if (RegVT == MVT::i32)
- RC = Subtarget->inMips16Mode()? &Mips::CPU16RegsRegClass :
- &Mips::GPR32RegClass;
- else if (RegVT == MVT::i64)
- RC = &Mips::GPR64RegClass;
- else if (RegVT == MVT::f32)
- RC = &Mips::FGR32RegClass;
- else if (RegVT == MVT::f64)
- RC = HasMips64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass;
- else
- llvm_unreachable("RegVT not supported by FormalArguments Lowering");
+ const TargetRegisterClass *RC = getRegClassFor(RegVT);
// Transform the arguments stored on
// physical registers into virtual ones
@@ -2677,9 +2651,11 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- InVals.push_back(DAG.getLoad(ValVT, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0));
+ SDValue Load = DAG.getLoad(ValVT, DL, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+ InVals.push_back(Load);
+ OutChains.push_back(Load.getValue(1));
}
}
@@ -2740,10 +2716,10 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs,
*DAG.getContext());
- MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+ MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo);
// Analyze return values.
- MipsCCInfo.analyzeReturn(Outs, getTargetMachine().Options.UseSoftFloat,
+ MipsCCInfo.analyzeReturn(Outs, Subtarget->mipsSEUsesSoftFloat(),
MF.getFunction()->getReturnType());
SDValue Flag;
@@ -2802,7 +2778,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
MipsTargetLowering::ConstraintType MipsTargetLowering::
getConstraintType(const std::string &Constraint) const
{
- // Mips specific constrainy
+ // Mips specific constraints
// GCC config/mips/constraints.md
//
// 'd' : An address register. Equivalent to r
@@ -2853,16 +2829,19 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
if (type->isIntegerTy())
weight = CW_Register;
break;
- case 'f':
- if (type->isFloatTy())
+ case 'f': // FPU or MSA register
+ if (Subtarget->hasMSA() && type->isVectorTy() &&
+ cast<VectorType>(type)->getBitWidth() == 128)
+ weight = CW_Register;
+ else if (type->isFloatTy())
weight = CW_Register;
break;
case 'c': // $25 for indirect jumps
case 'l': // lo register
case 'x': // hilo register pair
- if (type->isIntegerTy())
+ if (type->isIntegerTy())
weight = CW_SpecificReg;
- break;
+ break;
case 'I': // signed 16 bit immediate
case 'J': // integer zero
case 'K': // unsigned 16 bit immediate
@@ -2880,6 +2859,104 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
return weight;
}
+/// This is a helper function to parse a physical register string and split it
+/// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag
+/// that is returned indicates whether parsing was successful. The second flag
+/// is true if the numeric part exists.
+static std::pair<bool, bool>
+parsePhysicalReg(const StringRef &C, std::string &Prefix,
+ unsigned long long &Reg) {
+ if (C.front() != '{' || C.back() != '}')
+ return std::make_pair(false, false);
+
+ // Search for the first numeric character.
+ StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1;
+ I = std::find_if(B, E, std::ptr_fun(isdigit));
+
+ Prefix.assign(B, I - B);
+
+ // The second flag is set to false if no numeric characters were found.
+ if (I == E)
+ return std::make_pair(true, false);
+
+ // Parse the numeric characters.
+ return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg),
+ true);
+}
+
+std::pair<unsigned, const TargetRegisterClass *> MipsTargetLowering::
+parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterClass *RC;
+ std::string Prefix;
+ unsigned long long Reg;
+
+ std::pair<bool, bool> R = parsePhysicalReg(C, Prefix, Reg);
+
+ if (!R.first)
+ return std::make_pair((unsigned)0, (const TargetRegisterClass*)0);
+
+ if ((Prefix == "hi" || Prefix == "lo")) { // Parse hi/lo.
+ // No numeric characters follow "hi" or "lo".
+ if (R.second)
+ return std::make_pair((unsigned)0, (const TargetRegisterClass*)0);
+
+ RC = TRI->getRegClass(Prefix == "hi" ?
+ Mips::HI32RegClassID : Mips::LO32RegClassID);
+ return std::make_pair(*(RC->begin()), RC);
+ } else if (Prefix.compare(0, 4, "$msa") == 0) {
+ // Parse $msa(ir|csr|access|save|modify|request|map|unmap)
+
+ // No numeric characters follow the name.
+ if (R.second)
+ return std::make_pair((unsigned)0, (const TargetRegisterClass *)0);
+
+ Reg = StringSwitch<unsigned long long>(Prefix)
+ .Case("$msair", Mips::MSAIR)
+ .Case("$msacsr", Mips::MSACSR)
+ .Case("$msaaccess", Mips::MSAAccess)
+ .Case("$msasave", Mips::MSASave)
+ .Case("$msamodify", Mips::MSAModify)
+ .Case("$msarequest", Mips::MSARequest)
+ .Case("$msamap", Mips::MSAMap)
+ .Case("$msaunmap", Mips::MSAUnmap)
+ .Default(0);
+
+ if (!Reg)
+ return std::make_pair((unsigned)0, (const TargetRegisterClass *)0);
+
+ RC = TRI->getRegClass(Mips::MSACtrlRegClassID);
+ return std::make_pair(Reg, RC);
+ }
+
+ if (!R.second)
+ return std::make_pair((unsigned)0, (const TargetRegisterClass*)0);
+
+ if (Prefix == "$f") { // Parse $f0-$f31.
+ // If the size of FP registers is 64-bit or Reg is an even number, select
+ // the 64-bit register class. Otherwise, select the 32-bit register class.
+ if (VT == MVT::Other)
+ VT = (Subtarget->isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32;
+
+ RC = getRegClassFor(VT);
+
+ if (RC == &Mips::AFGR64RegClass) {
+ assert(Reg % 2 == 0);
+ Reg >>= 1;
+ }
+ } else if (Prefix == "$fcc") // Parse $fcc0-$fcc7.
+ RC = TRI->getRegClass(Mips::FCCRegClassID);
+ else if (Prefix == "$w") { // Parse $w0-$w31.
+ RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT);
+ } else { // Parse $0-$31.
+ assert(Prefix == "$");
+ RC = getRegClassFor((VT == MVT::Other) ? MVT::i32 : VT);
+ }
+
+ assert(Reg < RC->getNumRegs());
+ return std::make_pair(*(RC->begin() + Reg), RC);
+}
+
/// Given a register class constraint, like 'r', if this corresponds directly
/// to an LLVM register class, return a register of 0 and the register class
/// pointer.
@@ -2902,10 +2979,18 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
return std::make_pair(0U, &Mips::GPR64RegClass);
// This will generate an error message
return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
- case 'f':
- if (VT == MVT::f32)
+ case 'f': // FPU or MSA register
+ if (VT == MVT::v16i8)
+ return std::make_pair(0U, &Mips::MSA128BRegClass);
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
+ return std::make_pair(0U, &Mips::MSA128HRegClass);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return std::make_pair(0U, &Mips::MSA128WRegClass);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return std::make_pair(0U, &Mips::MSA128DRegClass);
+ else if (VT == MVT::f32)
return std::make_pair(0U, &Mips::FGR32RegClass);
- if ((VT == MVT::f64) && (!Subtarget->isSingleFloat())) {
+ else if ((VT == MVT::f64) && (!Subtarget->isSingleFloat())) {
if (Subtarget->isFP64bit())
return std::make_pair(0U, &Mips::FGR64RegClass);
return std::make_pair(0U, &Mips::AFGR64RegClass);
@@ -2918,14 +3003,21 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass);
case 'l': // register suitable for indirect jump
if (VT == MVT::i32)
- return std::make_pair((unsigned)Mips::LO, &Mips::LORegsRegClass);
- return std::make_pair((unsigned)Mips::LO64, &Mips::LORegs64RegClass);
+ return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass);
+ return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass);
case 'x': // register suitable for indirect jump
// Fixme: Not triggering the use of both hi and low
// This will generate an error message
return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
}
}
+
+ std::pair<unsigned, const TargetRegisterClass *> R;
+ R = parseRegForInlineAsmConstraint(Constraint, VT);
+
+ if (R.second)
+ return R;
+
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
@@ -3024,8 +3116,8 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
-bool
-MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM, Type *Ty) const {
+bool MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
@@ -3089,13 +3181,13 @@ static bool isF128SoftLibCall(const char *CallSym) {
"log10l", "log2l", "logl", "nearbyintl", "powl", "rintl", "sinl", "sqrtl",
"truncl"};
- const char * const *End = LibCalls + array_lengthof(LibCalls);
+ const char *const *End = LibCalls + array_lengthof(LibCalls);
// Check that LibCalls is sorted alphabetically.
MipsTargetLowering::LTStr Comp;
#ifndef NDEBUG
- for (const char * const *I = LibCalls; I < End - 1; ++I)
+ for (const char *const *I = LibCalls; I < End - 1; ++I)
assert(Comp(*I, *(I + 1)));
#endif
@@ -3133,9 +3225,9 @@ MipsTargetLowering::MipsCC::SpecialCallingConvType
}
MipsTargetLowering::MipsCC::MipsCC(
- CallingConv::ID CC, bool IsO32_, CCState &Info,
- MipsCC::SpecialCallingConvType SpecialCallingConv_)
- : CCInfo(Info), CallConv(CC), IsO32(IsO32_),
+ CallingConv::ID CC, bool IsO32_, bool IsFP64_, CCState &Info,
+ MipsCC::SpecialCallingConvType SpecialCallingConv_)
+ : CCInfo(Info), CallConv(CC), IsO32(IsO32_), IsFP64(IsFP64_),
SpecialCallingConv(SpecialCallingConv_){
// Pre-allocate reserved argument area.
CCInfo.AllocateStack(reservedArgArea(), 1);
@@ -3249,11 +3341,10 @@ analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsSoftFloat,
analyzeReturn(Outs, IsSoftFloat, 0, RetTy);
}
-void
-MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
- MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags) {
+void MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
+ MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags) {
assert(ArgFlags.getByValSize() && "Byval argument's size shouldn't be 0.");
struct ByValArgInfo ByVal;
@@ -3291,11 +3382,11 @@ llvm::CCAssignFn *MipsTargetLowering::MipsCC::fixedArgFn() const {
if (SpecialCallingConv == Mips16RetHelperConv)
return CC_Mips16RetHelper;
- return IsO32 ? CC_MipsO32 : CC_MipsN;
+ return IsO32 ? (IsFP64 ? CC_MipsO32_FP64 : CC_MipsO32_FP32) : CC_MipsN;
}
llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const {
- return IsO32 ? CC_MipsO32 : CC_MipsN_VarArg;
+ return IsO32 ? (IsFP64 ? CC_MipsO32_FP64 : CC_MipsO32_FP32) : CC_MipsN_VarArg;
}
const uint16_t *MipsTargetLowering::MipsCC::shadowRegs() const {
@@ -3473,17 +3564,15 @@ passByValArg(SDValue Chain, SDLoc DL,
DAG.getConstant(Offset, PtrTy));
SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr,
DAG.getIntPtrConstant(ByVal.Address));
- Chain = DAG.getMemcpy(Chain, DL, Dst, Src,
- DAG.getConstant(MemCpySize, PtrTy), Alignment,
- /*isVolatile=*/false, /*AlwaysInline=*/false,
+ Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy),
+ Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false,
MachinePointerInfo(0), MachinePointerInfo(0));
MemOpChains.push_back(Chain);
}
-void
-MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
- const MipsCC &CC, SDValue Chain,
- SDLoc DL, SelectionDAG &DAG) const {
+void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
+ const MipsCC &CC, SDValue Chain,
+ SDLoc DL, SelectionDAG &DAG) const {
unsigned NumRegs = CC.numIntArgRegs();
const uint16_t *ArgRegs = CC.intArgRegs();
const CCState &CCInfo = CC.getCCInfo();
@@ -3501,8 +3590,7 @@ MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
if (NumRegs == Idx)
VaArgOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), RegSize);
else
- VaArgOffset =
- (int)CC.reservedArgArea() - (int)(RegSize * (NumRegs - Idx));
+ VaArgOffset = (int)CC.reservedArgArea() - (int)(RegSize * (NumRegs - Idx));
// Record the frame index of the first variable argument
// which is a value necessary to VASTART.
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 123a2a6..65f68f0 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -17,6 +17,7 @@
#include "Mips.h"
#include "MipsSubtarget.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Function.h"
@@ -69,10 +70,11 @@ namespace llvm {
EH_RETURN,
// Node used to extract integer from accumulator.
- ExtractLOHI,
+ MFHI,
+ MFLO,
// Node used to insert integers to accumulator.
- InsertLOHI,
+ MTLOHI,
// Mult nodes.
Mult,
@@ -152,6 +154,43 @@ namespace llvm {
SETCC_DSP,
SELECT_CC_DSP,
+ // Vector comparisons.
+ // These take a vector and return a boolean.
+ VALL_ZERO,
+ VANY_ZERO,
+ VALL_NONZERO,
+ VANY_NONZERO,
+
+ // These take a vector and return a vector bitmask.
+ VCEQ,
+ VCLE_S,
+ VCLE_U,
+ VCLT_S,
+ VCLT_U,
+
+ // Element-wise vector max/min.
+ VSMAX,
+ VSMIN,
+ VUMAX,
+ VUMIN,
+
+ // Vector Shuffle with mask as an operand
+ VSHF, // Generic shuffle
+ SHF, // 4-element set shuffle.
+ ILVEV, // Interleave even elements
+ ILVOD, // Interleave odd elements
+ ILVL, // Interleave left elements
+ ILVR, // Interleave right elements
+ PCKEV, // Pack even elements
+ PCKOD, // Pack odd elements
+
+ // Combined (XOR (OR $a, $b), -1)
+ VNOR,
+
+ // Extended vector element extraction
+ VEXTRACT_SEXT_ELT,
+ VEXTRACT_ZEXT_ELT,
+
// Load/Store Left/Right nodes.
LWL = ISD::FIRST_TARGET_MEMORY_OPCODE,
LWR,
@@ -211,12 +250,72 @@ namespace llvm {
protected:
SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
- SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const;
-
- SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const;
-
- SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
- unsigned HiFlag, unsigned LoFlag) const;
+ // This method creates the following nodes, which are necessary for
+ // computing a local symbol's address:
+ //
+ // (add (load (wrapper $gp, %got(sym)), %lo(sym))
+ template<class NodeTy>
+ SDValue getAddrLocal(NodeTy *N, EVT Ty, SelectionDAG &DAG,
+ bool HasMips64) const {
+ SDLoc DL(N);
+ unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+ SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
+ getTargetNode(N, Ty, DAG, GOTFlag));
+ SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
+ MachinePointerInfo::getGOT(), false, false,
+ false, 0);
+ unsigned LoFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+ SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty,
+ getTargetNode(N, Ty, DAG, LoFlag));
+ return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo);
+ }
+
+ // This method creates the following nodes, which are necessary for
+ // computing a global symbol's address:
+ //
+ // (load (wrapper $gp, %got(sym)))
+ template<class NodeTy>
+ SDValue getAddrGlobal(NodeTy *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag, SDValue Chain,
+ const MachinePointerInfo &PtrInfo) const {
+ SDLoc DL(N);
+ SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
+ getTargetNode(N, Ty, DAG, Flag));
+ return DAG.getLoad(Ty, DL, Chain, Tgt, PtrInfo, false, false, false, 0);
+ }
+
+ // This method creates the following nodes, which are necessary for
+ // computing a global symbol's address in large-GOT mode:
+ //
+ // (load (wrapper (add %hi(sym), $gp), %lo(sym)))
+ template<class NodeTy>
+ SDValue getAddrGlobalLargeGOT(NodeTy *N, EVT Ty, SelectionDAG &DAG,
+ unsigned HiFlag, unsigned LoFlag,
+ SDValue Chain,
+ const MachinePointerInfo &PtrInfo) const {
+ SDLoc DL(N);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty,
+ getTargetNode(N, Ty, DAG, HiFlag));
+ Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
+ SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
+ getTargetNode(N, Ty, DAG, LoFlag));
+ return DAG.getLoad(Ty, DL, Chain, Wrapper, PtrInfo, false, false, false,
+ 0);
+ }
+
+ // This method creates the following nodes, which are necessary for
+ // computing a symbol's address in non-PIC mode:
+ //
+ // (add %hi(sym), %lo(sym))
+ template<class NodeTy>
+ SDValue getAddrNonPIC(NodeTy *N, EVT Ty, SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ SDValue Hi = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_HI);
+ SDValue Lo = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_LO);
+ return DAG.getNode(ISD::ADD, DL, Ty,
+ DAG.getNode(MipsISD::Hi, DL, Ty, Hi),
+ DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
+ }
/// This function fills Ops, which is the list of operands that will later
/// be used when a function call node is created. It also generates
@@ -244,9 +343,8 @@ namespace llvm {
Mips16RetHelperConv, NoSpecialCallingConv
};
- MipsCC(
- CallingConv::ID CallConv, bool IsO32, CCState &Info,
- SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv);
+ MipsCC(CallingConv::ID CallConv, bool IsO32, bool IsFP64, CCState &Info,
+ SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv);
void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -319,17 +417,39 @@ namespace llvm {
CCState &CCInfo;
CallingConv::ID CallConv;
- bool IsO32;
+ bool IsO32, IsFP64;
SpecialCallingConvType SpecialCallingConv;
SmallVector<ByValArgInfo, 2> ByValArgs;
};
protected:
+ SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+
// Subtarget Info
const MipsSubtarget *Subtarget;
bool HasMips64, IsN64, IsO32;
private:
+ // Create a TargetGlobalAddress node.
+ SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+
+ // Create a TargetExternalSymbol node.
+ SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+
+ // Create a TargetBlockAddress node.
+ SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+
+ // Create a TargetJumpTable node.
+ SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+
+ // Create a TargetConstantPool node.
+ SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
MipsCC::SpecialCallingConvType getSpecialCallingConv(SDValue Callee) const;
// Lower Operand helpers
@@ -361,8 +481,6 @@ namespace llvm {
SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
bool IsSRA) const;
- SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
@@ -433,6 +551,11 @@ namespace llvm {
ConstraintWeight getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const;
+ /// This function parses registers that appear in inline-asm constraints.
+ /// It returns pair (0, 0) on failure.
+ std::pair<unsigned, const TargetRegisterClass *>
+ parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const;
+
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index b992e77..9f7ce9a 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -99,9 +99,9 @@ class ADDS_FT<string opstr, RegisterOperand RC, InstrItinClass Itin, bit IsComm,
multiclass ADDS_M<string opstr, InstrItinClass Itin, bit IsComm,
SDPatternOperator OpNode = null_frag> {
- def _D32 : ADDS_FT<opstr, AFGR64RegsOpnd, Itin, IsComm, OpNode>,
+ def _D32 : ADDS_FT<opstr, AFGR64Opnd, Itin, IsComm, OpNode>,
Requires<[NotFP64bit, HasStdEnc]>;
- def _D64 : ADDS_FT<opstr, FGR64RegsOpnd, Itin, IsComm, OpNode>,
+ def _D64 : ADDS_FT<opstr, FGR64Opnd, Itin, IsComm, OpNode>,
Requires<[IsFP64bit, HasStdEnc]> {
string DecoderNamespace = "Mips64";
}
@@ -115,18 +115,18 @@ class ABSS_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
multiclass ABSS_M<string opstr, InstrItinClass Itin,
SDPatternOperator OpNode= null_frag> {
- def _D32 : ABSS_FT<opstr, AFGR64RegsOpnd, AFGR64RegsOpnd, Itin, OpNode>,
+ def _D32 : ABSS_FT<opstr, AFGR64Opnd, AFGR64Opnd, Itin, OpNode>,
Requires<[NotFP64bit, HasStdEnc]>;
- def _D64 : ABSS_FT<opstr, FGR64RegsOpnd, FGR64RegsOpnd, Itin, OpNode>,
+ def _D64 : ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>,
Requires<[IsFP64bit, HasStdEnc]> {
string DecoderNamespace = "Mips64";
}
}
multiclass ROUND_M<string opstr, InstrItinClass Itin> {
- def _D32 : ABSS_FT<opstr, FGR32RegsOpnd, AFGR64RegsOpnd, Itin>,
+ def _D32 : ABSS_FT<opstr, FGR32Opnd, AFGR64Opnd, Itin>,
Requires<[NotFP64bit, HasStdEnc]>;
- def _D64 : ABSS_FT<opstr, FGR32RegsOpnd, FGR64RegsOpnd, Itin>,
+ def _D64 : ABSS_FT<opstr, FGR32Opnd, FGR64Opnd, Itin>,
Requires<[IsFP64bit, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
@@ -143,16 +143,16 @@ class MTC1_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
[(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>;
class LW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
- Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
- InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
[(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> {
let DecoderMethod = "DecodeFMem";
let mayLoad = 1;
}
class SW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
- Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
- InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
[(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> {
let DecoderMethod = "DecodeFMem";
let mayStore = 1;
@@ -171,19 +171,19 @@ class NMADDS_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
[(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))],
Itin, FrmFR>;
-class LWXC1_FT<string opstr, RegisterOperand DRC, RegisterOperand PRC,
+class LWXC1_FT<string opstr, RegisterOperand DRC,
InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
- InstSE<(outs DRC:$fd), (ins PRC:$base, PRC:$index),
+ InstSE<(outs DRC:$fd), (ins PtrRC:$base, PtrRC:$index),
!strconcat(opstr, "\t$fd, ${index}(${base})"),
- [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI> {
+ [(set DRC:$fd, (OpNode (add iPTR:$base, iPTR:$index)))], Itin, FrmFI> {
let AddedComplexity = 20;
}
-class SWXC1_FT<string opstr, RegisterOperand DRC, RegisterOperand PRC,
+class SWXC1_FT<string opstr, RegisterOperand DRC,
InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
- InstSE<(outs), (ins DRC:$fs, PRC:$base, PRC:$index),
+ InstSE<(outs), (ins DRC:$fs, PtrRC:$base, PtrRC:$index),
!strconcat(opstr, "\t$fs, ${index}(${base})"),
- [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI> {
+ [(OpNode DRC:$fs, (add iPTR:$base, iPTR:$index))], Itin, FrmFI> {
let AddedComplexity = 20;
}
@@ -231,24 +231,24 @@ multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt> {
def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC>, C_COND_FM<fmt, 15>;
}
-defm S : C_COND_M<"s", FGR32RegsOpnd, 16>;
-defm D32 : C_COND_M<"d", AFGR64RegsOpnd, 17>,
+defm S : C_COND_M<"s", FGR32Opnd, 16>;
+defm D32 : C_COND_M<"d", AFGR64Opnd, 17>,
Requires<[NotFP64bit, HasStdEnc]>;
let DecoderNamespace = "Mips64" in
-defm D64 : C_COND_M<"d", FGR64RegsOpnd, 17>, Requires<[IsFP64bit, HasStdEnc]>;
+defm D64 : C_COND_M<"d", FGR64Opnd, 17>, Requires<[IsFP64bit, HasStdEnc]>;
//===----------------------------------------------------------------------===//
// Floating Point Instructions
//===----------------------------------------------------------------------===//
-def ROUND_W_S : ABSS_FT<"round.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def ROUND_W_S : ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0xc, 16>;
-def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0xd, 16>;
-def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0xe, 16>;
-def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0xf, 16>;
-def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x24, 16>;
defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>;
@@ -258,71 +258,71 @@ defm FLOOR_W : ROUND_M<"floor.w.d", IIFcvt>, ABSS_FM<0xf, 17>;
defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>;
let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
- def ROUND_L_S : ABSS_FT<"round.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def ROUND_L_S : ABSS_FT<"round.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x8, 16>;
- def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0x8, 17>;
- def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x9, 16>;
- def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0x9, 17>;
- def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0xa, 16>;
- def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0xa, 17>;
- def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0xb, 16>;
- def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0xb, 17>;
}
-def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x20, 20>;
-def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x25, 16>;
-def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0x25, 17>;
let Predicates = [NotFP64bit, HasStdEnc] in {
- def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32RegsOpnd, AFGR64RegsOpnd, IIFcvt>,
+ def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, IIFcvt>,
ABSS_FM<0x20, 17>;
- def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x21, 20>;
- def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x21, 16>;
}
let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
- def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0x20, 17>;
- def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0x20, 21>;
- def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x21, 20>;
- def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x21, 16>;
- def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0x21, 21>;
}
let isPseudo = 1, isCodeGenOnly = 1 in {
- def PseudoCVT_S_W : ABSS_FT<"", FGR32RegsOpnd, GPR32Opnd, IIFcvt>;
- def PseudoCVT_D32_W : ABSS_FT<"", AFGR64RegsOpnd, GPR32Opnd, IIFcvt>;
- def PseudoCVT_S_L : ABSS_FT<"", FGR64RegsOpnd, GPR64Opnd, IIFcvt>;
- def PseudoCVT_D64_W : ABSS_FT<"", FGR64RegsOpnd, GPR32Opnd, IIFcvt>;
- def PseudoCVT_D64_L : ABSS_FT<"", FGR64RegsOpnd, GPR64Opnd, IIFcvt>;
+ def PseudoCVT_S_W : ABSS_FT<"", FGR32Opnd, GPR32Opnd, IIFcvt>;
+ def PseudoCVT_D32_W : ABSS_FT<"", AFGR64Opnd, GPR32Opnd, IIFcvt>;
+ def PseudoCVT_S_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, IIFcvt>;
+ def PseudoCVT_D64_W : ABSS_FT<"", FGR64Opnd, GPR32Opnd, IIFcvt>;
+ def PseudoCVT_D64_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, IIFcvt>;
}
let Predicates = [NoNaNsFPMath, HasStdEnc] in {
- def FABS_S : ABSS_FT<"abs.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt, fabs>,
+ def FABS_S : ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, IIFcvt, fabs>,
ABSS_FM<0x5, 16>;
- def FNEG_S : ABSS_FT<"neg.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt, fneg>,
+ def FNEG_S : ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, IIFcvt, fneg>,
ABSS_FM<0x7, 16>;
defm FABS : ABSS_M<"abs.d", IIFcvt, fabs>, ABSS_FM<0x5, 17>;
defm FNEG : ABSS_M<"neg.d", IIFcvt, fneg>, ABSS_FM<0x7, 17>;
}
-def FSQRT_S : ABSS_FT<"sqrt.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFsqrtSingle,
+def FSQRT_S : ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, IIFsqrtSingle,
fsqrt>, ABSS_FM<0x4, 16>;
defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>;
@@ -334,164 +334,134 @@ defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>;
/// Move Control Registers From/To CPU Registers
def CFC1 : MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, IIFmove>, MFC1_FM<2>;
def CTC1 : MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, IIFmove>, MFC1_FM<6>;
-def MFC1 : MFC1_FT<"mfc1", GPR32Opnd, FGR32RegsOpnd, IIFmoveC1, bitconvert>,
+def MFC1 : MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, IIFmoveC1, bitconvert>,
MFC1_FM<0>;
-def MTC1 : MTC1_FT<"mtc1", FGR32RegsOpnd, GPR32Opnd, IIFmoveC1, bitconvert>,
+def MTC1 : MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, IIFmoveC1, bitconvert>,
MFC1_FM<4>;
-def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64RegsOpnd, IIFmoveC1,
+def MFHC1 : MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, IIFmoveC1>,
+ MFC1_FM<3>;
+def MTHC1 : MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, IIFmoveC1>,
+ MFC1_FM<7>;
+def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, IIFmoveC1,
bitconvert>, MFC1_FM<1>;
-def DMTC1 : MTC1_FT<"dmtc1", FGR64RegsOpnd, GPR64Opnd, IIFmoveC1,
+def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, IIFmoveC1,
bitconvert>, MFC1_FM<5>;
-def FMOV_S : ABSS_FT<"mov.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFmove>,
+def FMOV_S : ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, IIFmove>,
ABSS_FM<0x6, 16>;
-def FMOV_D32 : ABSS_FT<"mov.d", AFGR64RegsOpnd, AFGR64RegsOpnd, IIFmove>,
+def FMOV_D32 : ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, IIFmove>,
ABSS_FM<0x6, 17>, Requires<[NotFP64bit, HasStdEnc]>;
-def FMOV_D64 : ABSS_FT<"mov.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFmove>,
+def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, IIFmove>,
ABSS_FM<0x6, 17>, Requires<[IsFP64bit, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
/// Floating Point Memory Instructions
-let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
- def LWC1_P8 : LW_FT<"lwc1", FGR32RegsOpnd, IIFLoad, mem64, load>,
- LW_FM<0x31>;
- def SWC1_P8 : SW_FT<"swc1", FGR32RegsOpnd, IIFStore, mem64, store>,
- LW_FM<0x39>;
- def LDC164_P8 : LW_FT<"ldc1", FGR64RegsOpnd, IIFLoad, mem64, load>,
- LW_FM<0x35> {
- let isCodeGenOnly =1;
- }
- def SDC164_P8 : SW_FT<"sdc1", FGR64RegsOpnd, IIFStore, mem64, store>,
- LW_FM<0x3d> {
- let isCodeGenOnly =1;
- }
+let Predicates = [HasStdEnc] in {
+ def LWC1 : LW_FT<"lwc1", FGR32Opnd, IIFLoad, load>, LW_FM<0x31>;
+ def SWC1 : SW_FT<"swc1", FGR32Opnd, IIFStore, store>, LW_FM<0x39>;
}
-let Predicates = [NotN64, HasStdEnc] in {
- def LWC1 : LW_FT<"lwc1", FGR32RegsOpnd, IIFLoad, mem, load>, LW_FM<0x31>;
- def SWC1 : SW_FT<"swc1", FGR32RegsOpnd, IIFStore, mem, store>, LW_FM<0x39>;
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def LDC164 : LW_FT<"ldc1", FGR64Opnd, IIFLoad, load>, LW_FM<0x35>;
+ def SDC164 : SW_FT<"sdc1", FGR64Opnd, IIFStore, store>, LW_FM<0x3d>;
}
-let Predicates = [NotN64, HasMips64, HasStdEnc],
- DecoderNamespace = "Mips64" in {
- def LDC164 : LW_FT<"ldc1", FGR64RegsOpnd, IIFLoad, mem, load>, LW_FM<0x35>;
- def SDC164 : SW_FT<"sdc1", FGR64RegsOpnd, IIFStore, mem, store>, LW_FM<0x3d>;
+let Predicates = [NotFP64bit, HasStdEnc] in {
+ def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad, load>, LW_FM<0x35>;
+ def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore, store>, LW_FM<0x3d>;
}
-let Predicates = [NotN64, NotMips64, HasStdEnc] in {
- let isPseudo = 1, isCodeGenOnly = 1 in {
- def PseudoLDC1 : LW_FT<"", AFGR64RegsOpnd, IIFLoad, mem, load>;
- def PseudoSDC1 : SW_FT<"", AFGR64RegsOpnd, IIFStore, mem, store>;
- }
- def LDC1 : LW_FT<"ldc1", AFGR64RegsOpnd, IIFLoad, mem>, LW_FM<0x35>;
- def SDC1 : SW_FT<"sdc1", AFGR64RegsOpnd, IIFStore, mem>, LW_FM<0x3d>;
+/// Cop2 Memory Instructions
+let Predicates = [HasStdEnc] in {
+ def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>;
+ def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>;
+ def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>;
+ def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>;
}
// Indexed loads and stores.
let Predicates = [HasFPIdx, HasStdEnc] in {
- def LWXC1 : LWXC1_FT<"lwxc1", FGR32RegsOpnd, GPR32Opnd, IIFLoad, load>,
- LWXC1_FM<0>;
- def SWXC1 : SWXC1_FT<"swxc1", FGR32RegsOpnd, GPR32Opnd, IIFStore, store>,
- SWXC1_FM<8>;
-}
-
-let Predicates = [HasMips32r2, NotMips64, HasStdEnc] in {
- def LDXC1 : LWXC1_FT<"ldxc1", AFGR64RegsOpnd, GPR32Opnd, IIFLoad, load>,
- LWXC1_FM<1>;
- def SDXC1 : SWXC1_FT<"sdxc1", AFGR64RegsOpnd, GPR32Opnd, IIFStore, store>,
- SWXC1_FM<9>;
+ def LWXC1 : LWXC1_FT<"lwxc1", FGR32Opnd, IIFLoad, load>, LWXC1_FM<0>;
+ def SWXC1 : SWXC1_FT<"swxc1", FGR32Opnd, IIFStore, store>, SWXC1_FM<8>;
}
-let Predicates = [HasMips64, NotN64, HasStdEnc], DecoderNamespace="Mips64" in {
- def LDXC164 : LWXC1_FT<"ldxc1", FGR64RegsOpnd, GPR32Opnd, IIFLoad, load>,
- LWXC1_FM<1>;
- def SDXC164 : SWXC1_FT<"sdxc1", FGR64RegsOpnd, GPR32Opnd, IIFStore, store>,
- SWXC1_FM<9>;
+let Predicates = [HasFPIdx, NotFP64bit, HasStdEnc] in {
+ def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, IIFLoad, load>, LWXC1_FM<1>;
+ def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, IIFStore, store>, SWXC1_FM<9>;
}
-// n64
-let Predicates = [IsN64, HasStdEnc], isCodeGenOnly=1 in {
- def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32RegsOpnd, GPR64Opnd, IIFLoad, load>,
- LWXC1_FM<0>;
- def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64RegsOpnd, GPR64Opnd, IIFLoad,
- load>, LWXC1_FM<1>;
- def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32RegsOpnd, GPR64Opnd, IIFStore,
- store>, SWXC1_FM<8>;
- def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64RegsOpnd, GPR64Opnd, IIFStore,
- store>, SWXC1_FM<9>;
+let Predicates = [HasFPIdx, IsFP64bit, HasStdEnc],
+ DecoderNamespace="Mips64" in {
+ def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, IIFLoad, load>, LWXC1_FM<1>;
+ def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, IIFStore, store>, SWXC1_FM<9>;
}
// Load/store doubleword indexed unaligned.
-let Predicates = [NotMips64, HasStdEnc] in {
- def LUXC1 : LWXC1_FT<"luxc1", AFGR64RegsOpnd, GPR32Opnd, IIFLoad>,
- LWXC1_FM<0x5>;
- def SUXC1 : SWXC1_FT<"suxc1", AFGR64RegsOpnd, GPR32Opnd, IIFStore>,
- SWXC1_FM<0xd>;
+let Predicates = [NotFP64bit, HasStdEnc] in {
+ def LUXC1 : LWXC1_FT<"luxc1", AFGR64Opnd, IIFLoad>, LWXC1_FM<0x5>;
+ def SUXC1 : SWXC1_FT<"suxc1", AFGR64Opnd, IIFStore>, SWXC1_FM<0xd>;
}
-let Predicates = [HasMips64, HasStdEnc],
- DecoderNamespace="Mips64" in {
- def LUXC164 : LWXC1_FT<"luxc1", FGR64RegsOpnd, GPR32Opnd, IIFLoad>,
- LWXC1_FM<0x5>;
- def SUXC164 : SWXC1_FT<"suxc1", FGR64RegsOpnd, GPR32Opnd, IIFStore>,
- SWXC1_FM<0xd>;
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace="Mips64" in {
+ def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, IIFLoad>, LWXC1_FM<0x5>;
+ def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, IIFStore>, SWXC1_FM<0xd>;
}
/// Floating-point Aritmetic
-def FADD_S : ADDS_FT<"add.s", FGR32RegsOpnd, IIFadd, 1, fadd>,
+def FADD_S : ADDS_FT<"add.s", FGR32Opnd, IIFadd, 1, fadd>,
ADDS_FM<0x00, 16>;
defm FADD : ADDS_M<"add.d", IIFadd, 1, fadd>, ADDS_FM<0x00, 17>;
-def FDIV_S : ADDS_FT<"div.s", FGR32RegsOpnd, IIFdivSingle, 0, fdiv>,
+def FDIV_S : ADDS_FT<"div.s", FGR32Opnd, IIFdivSingle, 0, fdiv>,
ADDS_FM<0x03, 16>;
defm FDIV : ADDS_M<"div.d", IIFdivDouble, 0, fdiv>, ADDS_FM<0x03, 17>;
-def FMUL_S : ADDS_FT<"mul.s", FGR32RegsOpnd, IIFmulSingle, 1, fmul>,
+def FMUL_S : ADDS_FT<"mul.s", FGR32Opnd, IIFmulSingle, 1, fmul>,
ADDS_FM<0x02, 16>;
defm FMUL : ADDS_M<"mul.d", IIFmulDouble, 1, fmul>, ADDS_FM<0x02, 17>;
-def FSUB_S : ADDS_FT<"sub.s", FGR32RegsOpnd, IIFadd, 0, fsub>,
+def FSUB_S : ADDS_FT<"sub.s", FGR32Opnd, IIFadd, 0, fsub>,
ADDS_FM<0x01, 16>;
defm FSUB : ADDS_M<"sub.d", IIFadd, 0, fsub>, ADDS_FM<0x01, 17>;
let Predicates = [HasMips32r2, HasStdEnc] in {
- def MADD_S : MADDS_FT<"madd.s", FGR32RegsOpnd, IIFmulSingle, fadd>,
+ def MADD_S : MADDS_FT<"madd.s", FGR32Opnd, IIFmulSingle, fadd>,
MADDS_FM<4, 0>;
- def MSUB_S : MADDS_FT<"msub.s", FGR32RegsOpnd, IIFmulSingle, fsub>,
+ def MSUB_S : MADDS_FT<"msub.s", FGR32Opnd, IIFmulSingle, fsub>,
MADDS_FM<5, 0>;
}
let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in {
- def NMADD_S : NMADDS_FT<"nmadd.s", FGR32RegsOpnd, IIFmulSingle, fadd>,
+ def NMADD_S : NMADDS_FT<"nmadd.s", FGR32Opnd, IIFmulSingle, fadd>,
MADDS_FM<6, 0>;
- def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32RegsOpnd, IIFmulSingle, fsub>,
+ def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32Opnd, IIFmulSingle, fsub>,
MADDS_FM<7, 0>;
}
let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in {
- def MADD_D32 : MADDS_FT<"madd.d", AFGR64RegsOpnd, IIFmulDouble, fadd>,
+ def MADD_D32 : MADDS_FT<"madd.d", AFGR64Opnd, IIFmulDouble, fadd>,
MADDS_FM<4, 1>;
- def MSUB_D32 : MADDS_FT<"msub.d", AFGR64RegsOpnd, IIFmulDouble, fsub>,
+ def MSUB_D32 : MADDS_FT<"msub.d", AFGR64Opnd, IIFmulDouble, fsub>,
MADDS_FM<5, 1>;
}
let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in {
- def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64RegsOpnd, IIFmulDouble, fadd>,
+ def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64Opnd, IIFmulDouble, fadd>,
MADDS_FM<6, 1>;
- def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64RegsOpnd, IIFmulDouble, fsub>,
+ def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64Opnd, IIFmulDouble, fsub>,
MADDS_FM<7, 1>;
}
let Predicates = [HasMips32r2, IsFP64bit, HasStdEnc], isCodeGenOnly=1 in {
- def MADD_D64 : MADDS_FT<"madd.d", FGR64RegsOpnd, IIFmulDouble, fadd>,
+ def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, IIFmulDouble, fadd>,
MADDS_FM<4, 1>;
- def MSUB_D64 : MADDS_FT<"msub.d", FGR64RegsOpnd, IIFmulDouble, fsub>,
+ def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, IIFmulDouble, fsub>,
MADDS_FM<5, 1>;
}
let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc],
isCodeGenOnly=1 in {
- def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64RegsOpnd, IIFmulDouble, fadd>,
+ def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, IIFmulDouble, fadd>,
MADDS_FM<6, 1>;
- def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64RegsOpnd, IIFmulDouble, fsub>,
+ def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, IIFmulDouble, fsub>,
MADDS_FM<7, 1>;
}
@@ -542,20 +512,27 @@ def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>,
// This pseudo instr gets expanded into 2 mtc1 instrs after register
// allocation.
-def BuildPairF64 :
- PseudoSE<(outs AFGR64RegsOpnd:$dst),
- (ins GPR32Opnd:$lo, GPR32Opnd:$hi),
- [(set AFGR64RegsOpnd:$dst,
- (MipsBuildPairF64 GPR32Opnd:$lo, GPR32Opnd:$hi))]>;
+class BuildPairF64Base<RegisterOperand RO> :
+ PseudoSE<(outs RO:$dst), (ins GPR32Opnd:$lo, GPR32Opnd:$hi),
+ [(set RO:$dst, (MipsBuildPairF64 GPR32Opnd:$lo, GPR32Opnd:$hi))]>;
+
+def BuildPairF64 : BuildPairF64Base<AFGR64Opnd>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+def BuildPairF64_64 : BuildPairF64Base<FGR64Opnd>,
+ Requires<[IsFP64bit, HasStdEnc]>;
// This pseudo instr gets expanded into 2 mfc1 instrs after register
// allocation.
// if n is 0, lower part of src is extracted.
// if n is 1, higher part of src is extracted.
-def ExtractElementF64 :
- PseudoSE<(outs GPR32Opnd:$dst), (ins AFGR64RegsOpnd:$src, i32imm:$n),
- [(set GPR32Opnd:$dst,
- (MipsExtractElementF64 AFGR64RegsOpnd:$src, imm:$n))]>;
+class ExtractElementF64Base<RegisterOperand RO> :
+ PseudoSE<(outs GPR32Opnd:$dst), (ins RO:$src, i32imm:$n),
+ [(set GPR32Opnd:$dst, (MipsExtractElementF64 RO:$src, imm:$n))]>;
+
+def ExtractElementF64 : ExtractElementF64Base<AFGR64Opnd>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+def ExtractElementF64_64 : ExtractElementF64Base<FGR64Opnd>,
+ Requires<[IsFP64bit, HasStdEnc]>;
//===----------------------------------------------------------------------===//
// InstAliases.
@@ -571,18 +548,18 @@ def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
def : MipsPat<(f32 (sint_to_fp GPR32Opnd:$src)),
(PseudoCVT_S_W GPR32Opnd:$src)>;
-def : MipsPat<(MipsTruncIntFP FGR32RegsOpnd:$src),
- (TRUNC_W_S FGR32RegsOpnd:$src)>;
+def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+ (TRUNC_W_S FGR32Opnd:$src)>;
let Predicates = [NotFP64bit, HasStdEnc] in {
def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)),
(PseudoCVT_D32_W GPR32Opnd:$src)>;
- def : MipsPat<(MipsTruncIntFP AFGR64RegsOpnd:$src),
- (TRUNC_W_D32 AFGR64RegsOpnd:$src)>;
- def : MipsPat<(f32 (fround AFGR64RegsOpnd:$src)),
- (CVT_S_D32 AFGR64RegsOpnd:$src)>;
- def : MipsPat<(f64 (fextend FGR32RegsOpnd:$src)),
- (CVT_D32_S FGR32RegsOpnd:$src)>;
+ def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src),
+ (TRUNC_W_D32 AFGR64Opnd:$src)>;
+ def : MipsPat<(f32 (fround AFGR64Opnd:$src)),
+ (CVT_S_D32 AFGR64Opnd:$src)>;
+ def : MipsPat<(f64 (fextend FGR32Opnd:$src)),
+ (CVT_D32_S FGR32Opnd:$src)>;
}
let Predicates = [IsFP64bit, HasStdEnc] in {
@@ -592,44 +569,37 @@ let Predicates = [IsFP64bit, HasStdEnc] in {
def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)),
(PseudoCVT_D64_W GPR32Opnd:$src)>;
def : MipsPat<(f32 (sint_to_fp GPR64Opnd:$src)),
- (EXTRACT_SUBREG (PseudoCVT_S_L GPR64Opnd:$src), sub_32)>;
+ (EXTRACT_SUBREG (PseudoCVT_S_L GPR64Opnd:$src), sub_lo)>;
def : MipsPat<(f64 (sint_to_fp GPR64Opnd:$src)),
(PseudoCVT_D64_L GPR64Opnd:$src)>;
- def : MipsPat<(MipsTruncIntFP FGR64RegsOpnd:$src),
- (TRUNC_W_D64 FGR64RegsOpnd:$src)>;
- def : MipsPat<(MipsTruncIntFP FGR32RegsOpnd:$src),
- (TRUNC_L_S FGR32RegsOpnd:$src)>;
- def : MipsPat<(MipsTruncIntFP FGR64RegsOpnd:$src),
- (TRUNC_L_D64 FGR64RegsOpnd:$src)>;
+ def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
+ (TRUNC_W_D64 FGR64Opnd:$src)>;
+ def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+ (TRUNC_L_S FGR32Opnd:$src)>;
+ def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
+ (TRUNC_L_D64 FGR64Opnd:$src)>;
- def : MipsPat<(f32 (fround FGR64RegsOpnd:$src)),
- (CVT_S_D64 FGR64RegsOpnd:$src)>;
- def : MipsPat<(f64 (fextend FGR32RegsOpnd:$src)),
- (CVT_D64_S FGR32RegsOpnd:$src)>;
+ def : MipsPat<(f32 (fround FGR64Opnd:$src)),
+ (CVT_S_D64 FGR64Opnd:$src)>;
+ def : MipsPat<(f64 (fextend FGR32Opnd:$src)),
+ (CVT_D64_S FGR32Opnd:$src)>;
}
// Patterns for loads/stores with a reg+imm operand.
let AddedComplexity = 40 in {
- let Predicates = [IsN64, HasStdEnc] in {
- def : LoadRegImmPat<LWC1_P8, f32, load>;
- def : StoreRegImmPat<SWC1_P8, f32>;
- def : LoadRegImmPat<LDC164_P8, f64, load>;
- def : StoreRegImmPat<SDC164_P8, f64>;
- }
-
- let Predicates = [NotN64, HasStdEnc] in {
+ let Predicates = [HasStdEnc] in {
def : LoadRegImmPat<LWC1, f32, load>;
def : StoreRegImmPat<SWC1, f32>;
}
- let Predicates = [NotN64, HasMips64, HasStdEnc] in {
+ let Predicates = [IsFP64bit, HasStdEnc] in {
def : LoadRegImmPat<LDC164, f64, load>;
def : StoreRegImmPat<SDC164, f64>;
}
- let Predicates = [NotN64, NotMips64, HasStdEnc] in {
- def : LoadRegImmPat<PseudoLDC1, f64, load>;
- def : StoreRegImmPat<PseudoSDC1, f64>;
+ let Predicates = [NotFP64bit, HasStdEnc] in {
+ def : LoadRegImmPat<LDC1, f64, load>;
+ def : StoreRegImmPat<SDC1, f64>;
}
}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 1322784..737a018 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -183,7 +183,7 @@ class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
// Format J instruction class in Mips : <|opcode|address|>
//===----------------------------------------------------------------------===//
-class FJ<bits<6> op>
+class FJ<bits<6> op> : StdArch
{
bits<26> target;
@@ -272,7 +272,7 @@ class SRLV_FM<bits<6> funct, bit rotate> : StdArch {
let Inst{5-0} = funct;
}
-class BEQ_FM<bits<6> op> {
+class BEQ_FM<bits<6> op> : StdArch {
bits<5> rs;
bits<5> rt;
bits<16> offset;
@@ -285,7 +285,7 @@ class BEQ_FM<bits<6> op> {
let Inst{15-0} = offset;
}
-class BGEZ_FM<bits<6> op, bits<5> funct> {
+class BGEZ_FM<bits<6> op, bits<5> funct> : StdArch {
bits<5> rs;
bits<16> offset;
@@ -297,17 +297,6 @@ class BGEZ_FM<bits<6> op, bits<5> funct> {
let Inst{15-0} = offset;
}
-class B_FM {
- bits<16> offset;
-
- bits<32> Inst;
-
- let Inst{31-26} = 4;
- let Inst{25-21} = 0;
- let Inst{20-16} = 0;
- let Inst{15-0} = offset;
-}
-
class SLTI_FM<bits<6> op> : StdArch {
bits<5> rt;
bits<5> rs;
@@ -321,7 +310,7 @@ class SLTI_FM<bits<6> op> : StdArch {
let Inst{15-0} = imm16;
}
-class MFLO_FM<bits<6> funct> {
+class MFLO_FM<bits<6> funct> : StdArch {
bits<5> rd;
bits<32> Inst;
@@ -333,7 +322,7 @@ class MFLO_FM<bits<6> funct> {
let Inst{5-0} = funct;
}
-class MTLO_FM<bits<6> funct> {
+class MTLO_FM<bits<6> funct> : StdArch {
bits<5> rs;
bits<32> Inst;
@@ -344,7 +333,7 @@ class MTLO_FM<bits<6> funct> {
let Inst{5-0} = funct;
}
-class SEB_FM<bits<5> funct, bits<6> funct2> {
+class SEB_FM<bits<5> funct, bits<6> funct2> : StdArch {
bits<5> rd;
bits<5> rt;
@@ -358,7 +347,7 @@ class SEB_FM<bits<5> funct, bits<6> funct2> {
let Inst{5-0} = funct2;
}
-class CLO_FM<bits<6> funct> {
+class CLO_FM<bits<6> funct> : StdArch {
bits<5> rd;
bits<5> rs;
bits<5> rt;
@@ -374,7 +363,7 @@ class CLO_FM<bits<6> funct> {
let rt = rd;
}
-class LUI_FM {
+class LUI_FM : StdArch {
bits<5> rt;
bits<16> imm16;
@@ -386,7 +375,7 @@ class LUI_FM {
let Inst{15-0} = imm16;
}
-class JALR_FM {
+class JALR_FM : StdArch {
bits<5> rd;
bits<5> rs;
@@ -400,7 +389,7 @@ class JALR_FM {
let Inst{5-0} = 9;
}
-class BGEZAL_FM<bits<5> funct> {
+class BGEZAL_FM<bits<5> funct> : StdArch {
bits<5> rs;
bits<16> offset;
@@ -435,7 +424,7 @@ class MULT_FM<bits<6> op, bits<6> funct> : StdArch {
let Inst{5-0} = funct;
}
-class EXT_FM<bits<6> funct> {
+class EXT_FM<bits<6> funct> : StdArch {
bits<5> rt;
bits<5> rs;
bits<5> pos;
@@ -465,7 +454,7 @@ class RDHWR_FM {
let Inst{5-0} = 0x3b;
}
-class TEQ_FM<bits<6> funct> {
+class TEQ_FM<bits<6> funct> : StdArch {
bits<5> rs;
bits<5> rt;
bits<10> code_;
@@ -479,6 +468,17 @@ class TEQ_FM<bits<6> funct> {
let Inst{5-0} = funct;
}
+class TEQI_FM<bits<5> funct> : StdArch {
+ bits<5> rs;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 1;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = funct;
+ let Inst{15-0} = imm16;
+}
//===----------------------------------------------------------------------===//
// System calls format <op|code_|funct>
//===----------------------------------------------------------------------===//
@@ -520,6 +520,24 @@ class ER_FM<bits<6> funct>
let Inst{5-0} = funct;
}
+
+//===----------------------------------------------------------------------===//
+// Enable/disable interrupt instruction format <Cop0|MFMC0|rt|12|0|sc|0|0>
+//===----------------------------------------------------------------------===//
+
+class EI_FM<bits<1> sc>
+{
+ bits<32> Inst;
+ bits<5> rt;
+ let Inst{31-26} = 0x10;
+ let Inst{25-21} = 0xb;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = 0xc;
+ let Inst{10-6} = 0;
+ let Inst{5} = sc;
+ let Inst{4-0} = 0;
+}
+
//===----------------------------------------------------------------------===//
//
// FLOATING POINT INSTRUCTION FORMATS
@@ -701,7 +719,7 @@ class CMov_I_F_FM<bits<6> funct, bits<5> fmt> {
let Inst{5-0} = funct;
}
-class CMov_F_I_FM<bit tf> {
+class CMov_F_I_FM<bit tf> : StdArch {
bits<5> rd;
bits<5> rs;
bits<3> fcc;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index eae05a3..0ebad05 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -22,11 +22,14 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "MipsGenInstrInfo.inc"
using namespace llvm;
+// Pin the vtable to this file.
+void MipsInstrInfo::anchor() {}
+
MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm, unsigned UncondBr)
: MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
TM(tm), UncondBrOpc(UncondBr) {}
@@ -219,7 +222,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// If there is only one terminator instruction, process it.
if (!SecondLastOpc) {
- // Unconditional branch
+ // Unconditional branch.
if (LastOpc == UncondBrOpc) {
TBB = LastInst->getOperand(0).getMBB();
return BT_Uncond;
@@ -271,6 +274,10 @@ unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
const char *AsmStr = MI->getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
+ case Mips::CONSTPOOL_ENTRY:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
}
}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index b6480ef..d9ac961 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -27,6 +27,7 @@
namespace llvm {
class MipsInstrInfo : public MipsGenInstrInfo {
+ virtual void anchor();
protected:
MipsTargetMachine &TM;
unsigned UncondBrOpc;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index b9e8895..ebdbaa4 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -23,11 +23,9 @@ def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
SDTCisInt<4>]>;
def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDT_ExtractLOHI : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, untyped>,
- SDTCisVT<2, i32>]>;
-def SDT_InsertLOHI : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
- SDTCisVT<1, i32>,
- SDTCisSameAs<1, 2>]>;
+def SDT_MFLOHI : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVT<1, untyped>]>;
+def SDT_MTLOHI : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+ SDTCisInt<1>, SDTCisSameAs<1, 2>]>;
def SDT_MipsMultDiv : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisInt<1>,
SDTCisSameAs<1, 2>]>;
def SDT_MipsMAddMSub : SDTypeProfile<1, 3,
@@ -86,11 +84,12 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
[SDNPHasChain, SDNPSideEffect,
SDNPOptInGlue, SDNPOutGlue]>;
-// Node used to extract integer from LO/HI register.
-def ExtractLOHI : SDNode<"MipsISD::ExtractLOHI", SDT_ExtractLOHI>;
+// Nodes used to extract LO/HI registers.
+def MipsMFHI : SDNode<"MipsISD::MFHI", SDT_MFLOHI>;
+def MipsMFLO : SDNode<"MipsISD::MFLO", SDT_MFLOHI>;
// Node used to insert 32-bit integers to LOHI register pair.
-def InsertLOHI : SDNode<"MipsISD::InsertLOHI", SDT_InsertLOHI>;
+def MipsMTLOHI : SDNode<"MipsISD::MTLOHI", SDT_MTLOHI>;
// Mult nodes.
def MipsMult : SDNode<"MipsISD::Mult", SDT_MipsMultDiv>;
@@ -115,7 +114,7 @@ def MipsDivRemU16 : SDNode<"MipsISD::DivRemU16", SDT_MipsDivRem16,
// Wrapper node patterns give the instruction selector a chance to replace
// target constant nodes that would otherwise remain unchanged with ADDiu
// nodes. Without these wrapper node patterns, the following conditional move
-// instrucion is emitted when function cmov2 in test/CodeGen/Mips/cmov.ll is
+// instruction is emitted when function cmov2 in test/CodeGen/Mips/cmov.ll is
// compiled:
// movn %got(d)($gp), %got(c)($gp), $4
// This instruction is illegal since movn can take only register operands.
@@ -182,6 +181,12 @@ def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">,
def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">,
AssemblerPredicate<"!FeatureMips16">;
def NotDSP : Predicate<"!Subtarget.hasDSP()">;
+def InMicroMips : Predicate<"Subtarget.inMicroMipsMode()">,
+ AssemblerPredicate<"FeatureMicroMips">;
+def NotInMicroMips : Predicate<"!Subtarget.inMicroMipsMode()">,
+ AssemblerPredicate<"!FeatureMicroMips">;
+def IsLE : Predicate<"Subtarget.isLittle()">;
+def IsBE : Predicate<"!Subtarget.isLittle()">;
class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
let Predicates = [HasStdEnc];
@@ -242,7 +247,7 @@ def brtarget : Operand<OtherVT> {
def calltarget : Operand<iPTR> {
let EncoderMethod = "getJumpTargetOpValue";
}
-def calltarget64: Operand<i64>;
+
def simm16 : Operand<i32> {
let DecoderMethod= "DecodeSimm16";
}
@@ -256,48 +261,67 @@ def uimm20 : Operand<i32> {
def uimm10 : Operand<i32> {
}
-def simm16_64 : Operand<i64>;
-def shamt : Operand<i32>;
+def simm16_64 : Operand<i64> {
+ let DecoderMethod = "DecodeSimm16";
+}
// Unsigned Operand
+def uimm5 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+def uimm6 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
def uimm16 : Operand<i32> {
let PrintMethod = "printUnsignedImm";
}
+def pcrel16 : Operand<i32> {
+}
+
def MipsMemAsmOperand : AsmOperandClass {
let Name = "Mem";
let ParserMethod = "parseMemOperand";
}
-// Address operand
-def mem : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops GPR32, simm16);
- let EncoderMethod = "getMemEncoding";
- let ParserMatchClass = MipsMemAsmOperand;
- let OperandType = "OPERAND_MEMORY";
+def MipsInvertedImmoperand : AsmOperandClass {
+ let Name = "InvNum";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "parseInvNum";
+}
+
+def PtrRegAsmOperand : AsmOperandClass {
+ let Name = "PtrReg";
+ let ParserMethod = "parsePtrReg";
+}
+
+
+def InvertedImOperand : Operand<i32> {
+ let ParserMatchClass = MipsInvertedImmoperand;
}
-def mem64 : Operand<i64> {
+// Address operand
+def mem : Operand<iPTR> {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops GPR64, simm16_64);
+ let MIOperandInfo = (ops ptr_rc, simm16);
let EncoderMethod = "getMemEncoding";
let ParserMatchClass = MipsMemAsmOperand;
let OperandType = "OPERAND_MEMORY";
}
-def mem_ea : Operand<i32> {
+def mem_ea : Operand<iPTR> {
let PrintMethod = "printMemOperandEA";
- let MIOperandInfo = (ops GPR32, simm16);
+ let MIOperandInfo = (ops ptr_rc, simm16);
let EncoderMethod = "getMemEncoding";
let OperandType = "OPERAND_MEMORY";
}
-def mem_ea_64 : Operand<i64> {
- let PrintMethod = "printMemOperandEA";
- let MIOperandInfo = (ops GPR64, simm16_64);
- let EncoderMethod = "getMemEncoding";
- let OperandType = "OPERAND_MEMORY";
+def PtrRC : Operand<iPTR> {
+ let MIOperandInfo = (ops ptr_rc);
+ let DecoderMethod = "DecodePtrRegisterClass";
+ let ParserMatchClass = PtrRegAsmOperand;
}
// size operand of ext instruction
@@ -370,6 +394,9 @@ def addr :
def addrRegImm :
ComplexPattern<iPTR, 2, "selectAddrRegImm", [frameindex]>;
+def addrRegReg :
+ ComplexPattern<iPTR, 2, "selectAddrRegReg", [frameindex]>;
+
def addrDefault :
ComplexPattern<iPTR, 2, "selectAddrDefault", [frameindex]>;
@@ -404,9 +431,9 @@ class ArithLogicI<string opstr, Operand Od, RegisterOperand RO,
// Arithmetic Multiply ADD/SUB
class MArithR<string opstr, bit isComm = 0> :
InstSE<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
- !strconcat(opstr, "\t$rs, $rt"), [], IIImult, FrmR> {
- let Defs = [HI, LO];
- let Uses = [HI, LO];
+ !strconcat(opstr, "\t$rs, $rt"), [], IIImult, FrmR, opstr> {
+ let Defs = [HI0, LO0];
+ let Uses = [HI0, LO0];
let isCommutable = isComm;
}
@@ -435,121 +462,66 @@ class shift_rotate_reg<string opstr, RegisterOperand RO,
// Load Upper Imediate
class LoadUpper<string opstr, RegisterOperand RO, Operand Imm>:
InstSE<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"),
- [], IIArith, FrmI>, IsAsCheapAsAMove {
+ [], IIArith, FrmI, opstr>, IsAsCheapAsAMove {
let neverHasSideEffects = 1;
let isReMaterializable = 1;
}
-class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: FFI<op, outs, ins, asmstr, pattern> {
- bits<21> addr;
- let Inst{25-21} = addr{20-16};
- let Inst{15-0} = addr{15-0};
- let DecoderMethod = "DecodeMem";
-}
-
// Memory Load/Store
-class Load<string opstr, SDPatternOperator OpNode, DAGOperand RO,
- InstrItinClass Itin, Operand MemOpnd, ComplexPattern Addr,
- string ofsuffix> :
- InstSE<(outs RO:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(set RO:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI,
- !strconcat(opstr, ofsuffix)> {
+class Load<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
+ InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+ InstSE<(outs RO:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RO:$rt, (OpNode Addr:$addr))], Itin, FrmI, opstr> {
let DecoderMethod = "DecodeMem";
let canFoldAsLoad = 1;
let mayLoad = 1;
}
-class Store<string opstr, SDPatternOperator OpNode, DAGOperand RO,
- InstrItinClass Itin, Operand MemOpnd, ComplexPattern Addr,
- string ofsuffix> :
- InstSE<(outs), (ins RO:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(OpNode RO:$rt, Addr:$addr)], NoItinerary, FrmI,
- !strconcat(opstr, ofsuffix)> {
+class Store<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
+ InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+ InstSE<(outs), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> {
let DecoderMethod = "DecodeMem";
let mayStore = 1;
}
-multiclass LoadM<string opstr, DAGOperand RO,
- SDPatternOperator OpNode = null_frag,
- InstrItinClass Itin = NoItinerary,
- ComplexPattern Addr = addr> {
- def NAME : Load<opstr, OpNode, RO, Itin, mem, Addr, "">,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : Load<opstr, OpNode, RO, Itin, mem64, Addr, "_p8">,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
-}
-
-multiclass StoreM<string opstr, DAGOperand RO,
- SDPatternOperator OpNode = null_frag,
- InstrItinClass Itin = NoItinerary,
- ComplexPattern Addr = addr> {
- def NAME : Store<opstr, OpNode, RO, Itin, mem, Addr, "">,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : Store<opstr, OpNode, RO, Itin, mem64, Addr, "_p8">,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
-}
-
// Load/Store Left/Right
let canFoldAsLoad = 1 in
class LoadLeftRight<string opstr, SDNode OpNode, RegisterOperand RO,
- Operand MemOpnd> :
- InstSE<(outs RO:$rt), (ins MemOpnd:$addr, RO:$src),
+ InstrItinClass Itin> :
+ InstSE<(outs RO:$rt), (ins mem:$addr, RO:$src),
!strconcat(opstr, "\t$rt, $addr"),
- [(set RO:$rt, (OpNode addr:$addr, RO:$src))], NoItinerary, FrmI> {
+ [(set RO:$rt, (OpNode addr:$addr, RO:$src))], Itin, FrmI> {
let DecoderMethod = "DecodeMem";
string Constraints = "$src = $rt";
}
class StoreLeftRight<string opstr, SDNode OpNode, RegisterOperand RO,
- Operand MemOpnd>:
- InstSE<(outs), (ins RO:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(OpNode RO:$rt, addr:$addr)], NoItinerary, FrmI> {
+ InstrItinClass Itin> :
+ InstSE<(outs), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RO:$rt, addr:$addr)], Itin, FrmI> {
let DecoderMethod = "DecodeMem";
}
-multiclass LoadLeftRightM<string opstr, SDNode OpNode, RegisterOperand RO> {
- def NAME : LoadLeftRight<opstr, OpNode, RO, mem>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : LoadLeftRight<opstr, OpNode, RO, mem64>,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
-}
-
-multiclass StoreLeftRightM<string opstr, SDNode OpNode, RegisterOperand RO> {
- def NAME : StoreLeftRight<opstr, OpNode, RO, mem>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : StoreLeftRight<opstr, OpNode, RO, mem64>,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
-}
-
// Conditional Branch
-class CBranch<string opstr, PatFrag cond_op, RegisterOperand RO> :
- InstSE<(outs), (ins RO:$rs, RO:$rt, brtarget:$offset),
+class CBranch<string opstr, DAGOperand opnd, PatFrag cond_op,
+ RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, RO:$rt, opnd:$offset),
!strconcat(opstr, "\t$rs, $rt, $offset"),
[(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], IIBranch,
- FrmI> {
+ FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
let Defs = [AT];
}
-class CBranchZero<string opstr, PatFrag cond_op, RegisterOperand RO> :
- InstSE<(outs), (ins RO:$rs, brtarget:$offset),
+class CBranchZero<string opstr, DAGOperand opnd, PatFrag cond_op,
+ RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, opnd:$offset),
!strconcat(opstr, "\t$rs, $offset"),
- [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], IIBranch, FrmI> {
+ [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], IIBranch,
+ FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
@@ -572,9 +544,9 @@ class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
// Jump
class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
- SDPatternOperator targetoperator> :
+ SDPatternOperator targetoperator, string bopstr> :
InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
- [(operator targetoperator:$target)], IIBranch, FrmJ> {
+ [(operator targetoperator:$target)], IIBranch, FrmJ, bopstr> {
let isTerminator=1;
let isBarrier=1;
let hasDelaySlot = 1;
@@ -583,9 +555,9 @@ class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
}
// Unconditional branch
-class UncondBranch<string opstr> :
- InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"),
- [(br bb:$offset)], IIBranch, FrmI> {
+class UncondBranch<Instruction BEQInst> :
+ PseudoSE<(outs), (ins brtarget:$offset), [(br bb:$offset)], IIBranch>,
+ PseudoInstExpansion<(BEQInst ZERO, ZERO, brtarget:$offset)> {
let isBranch = 1;
let isTerminator = 1;
let isBarrier = 1;
@@ -596,17 +568,20 @@ class UncondBranch<string opstr> :
// Base class for indirect branch and return instruction classes.
let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
-class JumpFR<RegisterOperand RO, SDPatternOperator operator = null_frag>:
- InstSE<(outs), (ins RO:$rs), "jr\t$rs", [(operator RO:$rs)], IIBranch, FrmR>;
+class JumpFR<string opstr, RegisterOperand RO,
+ SDPatternOperator operator = null_frag>:
+ InstSE<(outs), (ins RO:$rs), "jr\t$rs", [(operator RO:$rs)], IIBranch,
+ FrmR, opstr>;
// Indirect branch
-class IndirectBranch<RegisterOperand RO>: JumpFR<RO, brind> {
+class IndirectBranch<string opstr, RegisterOperand RO> :
+ JumpFR<opstr, RO, brind> {
let isBranch = 1;
let isIndirectBranch = 1;
}
// Return instruction
-class RetBase<RegisterOperand RO>: JumpFR<RO> {
+class RetBase<string opstr, RegisterOperand RO>: JumpFR<opstr, RO> {
let isReturn = 1;
let isCodeGenOnly = 1;
let hasCtrlDep = 1;
@@ -615,9 +590,9 @@ class RetBase<RegisterOperand RO>: JumpFR<RO> {
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1, Defs = [RA] in {
- class JumpLink<string opstr> :
- InstSE<(outs), (ins calltarget:$target), !strconcat(opstr, "\t$target"),
- [(MipsJmpLink imm:$target)], IIBranch, FrmJ> {
+ class JumpLink<string opstr, DAGOperand opnd> :
+ InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
+ [(MipsJmpLink imm:$target)], IIBranch, FrmJ, opstr> {
let DecoderMethod = "DecodeJumpTarget";
}
@@ -628,11 +603,11 @@ let isCall=1, hasDelaySlot=1, Defs = [RA] in {
class JumpLinkReg<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [], IIBranch, FrmR>;
+ [], IIBranch, FrmR, opstr>;
- class BGEZAL_FT<string opstr, RegisterOperand RO> :
- InstSE<(outs), (ins RO:$rs, brtarget:$offset),
- !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI>;
+ class BGEZAL_FT<string opstr, DAGOperand opnd, RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, opnd:$offset),
+ !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr>;
}
@@ -660,6 +635,20 @@ class ER_FT<string opstr> :
InstSE<(outs), (ins),
opstr, [], NoItinerary, FrmOther>;
+// Interrupts
+class DEI_FT<string opstr, RegisterOperand RO> :
+ InstSE<(outs RO:$rt), (ins),
+ !strconcat(opstr, "\t$rt"), [], NoItinerary, FrmOther>;
+
+// Wait
+class WAIT_FT<string opstr> :
+ InstSE<(outs), (ins), opstr, [], NoItinerary, FrmOther> {
+ let Inst{31-26} = 0x10;
+ let Inst{25} = 1;
+ let Inst{24-6} = 0;
+ let Inst{5-0} = 0x20;
+}
+
// Sync
let hasSideEffects = 1 in
class SYNC_FT :
@@ -669,8 +658,12 @@ class SYNC_FT :
let hasSideEffects = 1 in
class TEQ_FT<string opstr, RegisterOperand RO> :
InstSE<(outs), (ins RO:$rs, RO:$rt, uimm16:$code_),
- !strconcat(opstr, "\t$rs, $rt, $code_"), [], NoItinerary, FrmI>;
+ !strconcat(opstr, "\t$rs, $rt, $code_"), [], NoItinerary,
+ FrmI, opstr>;
+class TEQI_FT<string opstr, RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, uimm16:$imm16),
+ !strconcat(opstr, "\t$rs, $imm16"), [], NoItinerary, FrmOther, opstr>;
// Mul, Div
class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
list<Register> DefRegs> :
@@ -698,10 +691,10 @@ class MultDivPseudo<Instruction RealInst, RegisterClass R0, RegisterOperand R1,
// Pseudo multiply add/sub instruction with explicit accumulator register
// operands.
class MAddSubPseudo<Instruction RealInst, SDPatternOperator OpNode>
- : PseudoSE<(outs ACRegs:$ac),
- (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACRegs:$acin),
- [(set ACRegs:$ac,
- (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACRegs:$acin))],
+ : PseudoSE<(outs ACC64:$ac),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64:$acin),
+ [(set ACC64:$ac,
+ (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64:$acin))],
IIImult>,
PseudoInstExpansion<(RealInst GPR32Opnd:$rs, GPR32Opnd:$rt)> {
string Constraints = "$acin = $ac";
@@ -710,25 +703,35 @@ class MAddSubPseudo<Instruction RealInst, SDPatternOperator OpNode>
class Div<string opstr, InstrItinClass itin, RegisterOperand RO,
list<Register> DefRegs> :
InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$$zero, $rs, $rt"),
- [], itin, FrmR> {
+ [], itin, FrmR, opstr> {
let Defs = DefRegs;
}
// Move from Hi/Lo
-class MoveFromLOHI<string opstr, RegisterOperand RO, list<Register> UseRegs>:
- InstSE<(outs RO:$rd), (ins), !strconcat(opstr, "\t$rd"), [], IIHiLo, FrmR> {
- let Uses = UseRegs;
+class PseudoMFLOHI<RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode>
+ : PseudoSE<(outs DstRC:$rd), (ins SrcRC:$hilo),
+ [(set DstRC:$rd, (OpNode SrcRC:$hilo))], IIHiLo>;
+
+class MoveFromLOHI<string opstr, RegisterOperand RO, Register UseReg>:
+ InstSE<(outs RO:$rd), (ins), !strconcat(opstr, "\t$rd"), [], IIHiLo, FrmR,
+ opstr> {
+ let Uses = [UseReg];
let neverHasSideEffects = 1;
}
+class PseudoMTLOHI<RegisterClass DstRC, RegisterClass SrcRC>
+ : PseudoSE<(outs DstRC:$lohi), (ins SrcRC:$lo, SrcRC:$hi),
+ [(set DstRC:$lohi, (MipsMTLOHI SrcRC:$lo, SrcRC:$hi))], IIHiLo>;
+
class MoveToLOHI<string opstr, RegisterOperand RO, list<Register> DefRegs>:
- InstSE<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), [], IIHiLo, FrmR> {
+ InstSE<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), [], IIHiLo,
+ FrmR, opstr> {
let Defs = DefRegs;
let neverHasSideEffects = 1;
}
-class EffectiveAddress<string opstr, RegisterOperand RO, Operand Mem> :
- InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+class EffectiveAddress<string opstr, RegisterOperand RO> :
+ InstSE<(outs RO:$rt), (ins mem_ea:$addr), !strconcat(opstr, "\t$rt, $addr"),
[(set RO:$rt, addr:$addr)], NoItinerary, FrmI> {
let isCodeGenOnly = 1;
let DecoderMethod = "DecodeMem";
@@ -737,26 +740,26 @@ class EffectiveAddress<string opstr, RegisterOperand RO, Operand Mem> :
// Count Leading Ones/Zeros in Word
class CountLeading0<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [(set RO:$rd, (ctlz RO:$rs))], IIArith, FrmR>,
+ [(set RO:$rd, (ctlz RO:$rs))], IIArith, FrmR, opstr>,
Requires<[HasBitCount, HasStdEnc]>;
class CountLeading1<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [(set RO:$rd, (ctlz (not RO:$rs)))], IIArith, FrmR>,
+ [(set RO:$rd, (ctlz (not RO:$rs)))], IIArith, FrmR, opstr>,
Requires<[HasBitCount, HasStdEnc]>;
// Sign Extend in Register.
class SignExtInReg<string opstr, ValueType vt, RegisterOperand RO> :
InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"),
- [(set RO:$rd, (sext_inreg RO:$rt, vt))], IIseb, FrmR> {
+ [(set RO:$rd, (sext_inreg RO:$rt, vt))], IIseb, FrmR, opstr> {
let Predicates = [HasSEInReg, HasStdEnc];
}
// Subword Swap
class SubwordSwap<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
- NoItinerary, FrmR> {
+ NoItinerary, FrmR, opstr> {
let Predicates = [HasSwap, HasStdEnc];
let neverHasSideEffects = 1;
}
@@ -767,66 +770,60 @@ class ReadHardware<RegisterOperand CPURegOperand, RegisterOperand RO> :
IIArith, FrmR>;
// Ext and Ins
-class ExtBase<string opstr, RegisterOperand RO>:
- InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ext:$size),
+class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd,
+ SDPatternOperator Op = null_frag>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ext:$size),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
- [(set RO:$rt, (MipsExt RO:$rs, imm:$pos, imm:$size))], NoItinerary,
- FrmR> {
+ [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], NoItinerary,
+ FrmR, opstr> {
let Predicates = [HasMips32r2, HasStdEnc];
}
-class InsBase<string opstr, RegisterOperand RO>:
- InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ins:$size, RO:$src),
+class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd,
+ SDPatternOperator Op = null_frag>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ins:$size, RO:$src),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
- [(set RO:$rt, (MipsIns RO:$rs, imm:$pos, imm:$size, RO:$src))],
- NoItinerary, FrmR> {
+ [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size, RO:$src))],
+ NoItinerary, FrmR, opstr> {
let Predicates = [HasMips32r2, HasStdEnc];
let Constraints = "$src = $rt";
}
// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
-class Atomic2Ops<PatFrag Op, RegisterClass DRC, RegisterClass PRC> :
- PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
- [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
-
-multiclass Atomic2Ops32<PatFrag Op> {
- def NAME : Atomic2Ops<Op, GPR32, GPR32>, Requires<[NotN64, HasStdEnc]>;
- def _P8 : Atomic2Ops<Op, GPR32, GPR64>, Requires<[IsN64, HasStdEnc]>;
-}
+class Atomic2Ops<PatFrag Op, RegisterClass DRC> :
+ PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr),
+ [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>;
// Atomic Compare & Swap.
-class AtomicCmpSwap<PatFrag Op, RegisterClass DRC, RegisterClass PRC> :
- PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
- [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
-
-multiclass AtomicCmpSwap32<PatFrag Op> {
- def NAME : AtomicCmpSwap<Op, GPR32, GPR32>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : AtomicCmpSwap<Op, GPR32, GPR64>,
- Requires<[IsN64, HasStdEnc]>;
-}
+class AtomicCmpSwap<PatFrag Op, RegisterClass DRC> :
+ PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap),
+ [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>;
-class LLBase<string opstr, RegisterOperand RO, Operand Mem> :
- InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+class LLBase<string opstr, RegisterOperand RO> :
+ InstSE<(outs RO:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
[], NoItinerary, FrmI> {
let DecoderMethod = "DecodeMem";
let mayLoad = 1;
}
-class SCBase<string opstr, RegisterOperand RO, Operand Mem> :
- InstSE<(outs RO:$dst), (ins RO:$rt, Mem:$addr),
+class SCBase<string opstr, RegisterOperand RO> :
+ InstSE<(outs RO:$dst), (ins RO:$rt, mem:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
let DecoderMethod = "DecodeMem";
let mayStore = 1;
let Constraints = "$rt = $dst";
}
-class MFC3OP<dag outs, dag ins, string asmstr> :
- InstSE<outs, ins, asmstr, [], NoItinerary, FrmFR>;
+class MFC3OP<string asmstr, RegisterOperand RO> :
+ InstSE<(outs RO:$rt, RO:$rd, uimm16:$sel), (ins),
+ !strconcat(asmstr, "\t$rt, $rd, $sel"), [], NoItinerary, FrmFR>;
-let isBarrier = 1, isTerminator = 1, isCodeGenOnly = 1 in
-def TRAP : InstSE<(outs), (ins), "break", [(trap)], NoItinerary, FrmOther> {
- let Inst = 0x0000000d;
+class TrapBase<Instruction RealInst>
+ : PseudoSE<(outs), (ins), [(trap)], NoItinerary>,
+ PseudoInstExpansion<(RealInst 0, 0)> {
+ let isBarrier = 1;
+ let isTerminator = 1;
+ let isCodeGenOnly = 1;
}
//===----------------------------------------------------------------------===//
@@ -845,38 +842,38 @@ def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
}
let usesCustomInserter = 1 in {
- defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8>;
- defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32<atomic_load_add_16>;
- defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32<atomic_load_add_32>;
- defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32<atomic_load_sub_8>;
- defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32<atomic_load_sub_16>;
- defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32<atomic_load_sub_32>;
- defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32<atomic_load_and_8>;
- defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32<atomic_load_and_16>;
- defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32<atomic_load_and_32>;
- defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32<atomic_load_or_8>;
- defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32<atomic_load_or_16>;
- defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32<atomic_load_or_32>;
- defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32<atomic_load_xor_8>;
- defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32<atomic_load_xor_16>;
- defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32<atomic_load_xor_32>;
- defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32<atomic_load_nand_8>;
- defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16>;
- defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32>;
-
- defm ATOMIC_SWAP_I8 : Atomic2Ops32<atomic_swap_8>;
- defm ATOMIC_SWAP_I16 : Atomic2Ops32<atomic_swap_16>;
- defm ATOMIC_SWAP_I32 : Atomic2Ops32<atomic_swap_32>;
-
- defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32<atomic_cmp_swap_8>;
- defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32<atomic_cmp_swap_16>;
- defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32>;
+ def ATOMIC_LOAD_ADD_I8 : Atomic2Ops<atomic_load_add_8, GPR32>;
+ def ATOMIC_LOAD_ADD_I16 : Atomic2Ops<atomic_load_add_16, GPR32>;
+ def ATOMIC_LOAD_ADD_I32 : Atomic2Ops<atomic_load_add_32, GPR32>;
+ def ATOMIC_LOAD_SUB_I8 : Atomic2Ops<atomic_load_sub_8, GPR32>;
+ def ATOMIC_LOAD_SUB_I16 : Atomic2Ops<atomic_load_sub_16, GPR32>;
+ def ATOMIC_LOAD_SUB_I32 : Atomic2Ops<atomic_load_sub_32, GPR32>;
+ def ATOMIC_LOAD_AND_I8 : Atomic2Ops<atomic_load_and_8, GPR32>;
+ def ATOMIC_LOAD_AND_I16 : Atomic2Ops<atomic_load_and_16, GPR32>;
+ def ATOMIC_LOAD_AND_I32 : Atomic2Ops<atomic_load_and_32, GPR32>;
+ def ATOMIC_LOAD_OR_I8 : Atomic2Ops<atomic_load_or_8, GPR32>;
+ def ATOMIC_LOAD_OR_I16 : Atomic2Ops<atomic_load_or_16, GPR32>;
+ def ATOMIC_LOAD_OR_I32 : Atomic2Ops<atomic_load_or_32, GPR32>;
+ def ATOMIC_LOAD_XOR_I8 : Atomic2Ops<atomic_load_xor_8, GPR32>;
+ def ATOMIC_LOAD_XOR_I16 : Atomic2Ops<atomic_load_xor_16, GPR32>;
+ def ATOMIC_LOAD_XOR_I32 : Atomic2Ops<atomic_load_xor_32, GPR32>;
+ def ATOMIC_LOAD_NAND_I8 : Atomic2Ops<atomic_load_nand_8, GPR32>;
+ def ATOMIC_LOAD_NAND_I16 : Atomic2Ops<atomic_load_nand_16, GPR32>;
+ def ATOMIC_LOAD_NAND_I32 : Atomic2Ops<atomic_load_nand_32, GPR32>;
+
+ def ATOMIC_SWAP_I8 : Atomic2Ops<atomic_swap_8, GPR32>;
+ def ATOMIC_SWAP_I16 : Atomic2Ops<atomic_swap_16, GPR32>;
+ def ATOMIC_SWAP_I32 : Atomic2Ops<atomic_swap_32, GPR32>;
+
+ def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<atomic_cmp_swap_8, GPR32>;
+ def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>;
+ def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, GPR32>;
}
/// Pseudo instructions for loading and storing accumulator registers.
let isPseudo = 1, isCodeGenOnly = 1 in {
- defm LOAD_AC64 : LoadM<"", ACRegs>;
- defm STORE_AC64 : StoreM<"", ACRegs>;
+ def LOAD_ACC64 : Load<"", ACC64>;
+ def STORE_ACC64 : Store<"", ACC64>;
}
//===----------------------------------------------------------------------===//
@@ -911,6 +908,7 @@ def ADDu : MMRel, ArithLogicR<"addu", GPR32Opnd, 1, IIArith, add>,
ADD_FM<0, 0x21>;
def SUBu : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, IIArith, sub>,
ADD_FM<0, 0x23>;
+let Defs = [HI0, LO0] in
def MUL : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, IIImul, mul>,
ADD_FM<0x1c, 2>;
def ADD : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM<0, 0x20>;
@@ -926,11 +924,11 @@ def XOR : MMRel, ArithLogicR<"xor", GPR32Opnd, 1, IILogic, xor>,
def NOR : MMRel, LogicNOR<"nor", GPR32Opnd>, ADD_FM<0, 0x27>;
/// Shift Instructions
-def SLL : MMRel, shift_rotate_imm<"sll", shamt, GPR32Opnd, shl, immZExt5>,
+def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, shl, immZExt5>,
SRA_FM<0, 0>;
-def SRL : MMRel, shift_rotate_imm<"srl", shamt, GPR32Opnd, srl, immZExt5>,
+def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, srl, immZExt5>,
SRA_FM<2, 0>;
-def SRA : MMRel, shift_rotate_imm<"sra", shamt, GPR32Opnd, sra, immZExt5>,
+def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, sra, immZExt5>,
SRA_FM<3, 0>;
def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, shl>, SRLV_FM<4, 0>;
def SRLV : MMRel, shift_rotate_reg<"srlv", GPR32Opnd, srl>, SRLV_FM<6, 0>;
@@ -938,7 +936,7 @@ def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, sra>, SRLV_FM<7, 0>;
// Rotate Instructions
let Predicates = [HasMips32r2, HasStdEnc] in {
- def ROTR : MMRel, shift_rotate_imm<"rotr", shamt, GPR32Opnd, rotr,
+ def ROTR : MMRel, shift_rotate_imm<"rotr", uimm5, GPR32Opnd, rotr,
immZExt5>,
SRA_FM<2, 1>;
def ROTRV : MMRel, shift_rotate_reg<"rotrv", GPR32Opnd, rotr>,
@@ -947,65 +945,85 @@ let Predicates = [HasMips32r2, HasStdEnc] in {
/// Load and Store Instructions
/// aligned
-defm LB : LoadM<"lb", GPR32Opnd, sextloadi8, IILoad>, MMRel, LW_FM<0x20>;
-defm LBu : LoadM<"lbu", GPR32Opnd, zextloadi8, IILoad, addrDefault>, MMRel,
- LW_FM<0x24>;
-defm LH : LoadM<"lh", GPR32Opnd, sextloadi16, IILoad, addrDefault>, MMRel,
- LW_FM<0x21>;
-defm LHu : LoadM<"lhu", GPR32Opnd, zextloadi16, IILoad>, MMRel, LW_FM<0x25>;
-defm LW : LoadM<"lw", GPR32Opnd, load, IILoad, addrDefault>, MMRel, LW_FM<0x23>;
-defm SB : StoreM<"sb", GPR32Opnd, truncstorei8, IIStore>, MMRel, LW_FM<0x28>;
-defm SH : StoreM<"sh", GPR32Opnd, truncstorei16, IIStore>, MMRel, LW_FM<0x29>;
-defm SW : StoreM<"sw", GPR32Opnd, store, IIStore>, MMRel, LW_FM<0x2b>;
+def LB : Load<"lb", GPR32Opnd, sextloadi8, IILoad>, MMRel, LW_FM<0x20>;
+def LBu : Load<"lbu", GPR32Opnd, zextloadi8, IILoad, addrDefault>, MMRel,
+ LW_FM<0x24>;
+def LH : Load<"lh", GPR32Opnd, sextloadi16, IILoad, addrDefault>, MMRel,
+ LW_FM<0x21>;
+def LHu : Load<"lhu", GPR32Opnd, zextloadi16, IILoad>, MMRel, LW_FM<0x25>;
+def LW : Load<"lw", GPR32Opnd, load, IILoad, addrDefault>, MMRel,
+ LW_FM<0x23>;
+def SB : Store<"sb", GPR32Opnd, truncstorei8, IIStore>, MMRel, LW_FM<0x28>;
+def SH : Store<"sh", GPR32Opnd, truncstorei16, IIStore>, MMRel, LW_FM<0x29>;
+def SW : Store<"sw", GPR32Opnd, store, IIStore>, MMRel, LW_FM<0x2b>;
/// load/store left/right
-defm LWL : LoadLeftRightM<"lwl", MipsLWL, GPR32Opnd>, LW_FM<0x22>;
-defm LWR : LoadLeftRightM<"lwr", MipsLWR, GPR32Opnd>, LW_FM<0x26>;
-defm SWL : StoreLeftRightM<"swl", MipsSWL, GPR32Opnd>, LW_FM<0x2a>;
-defm SWR : StoreLeftRightM<"swr", MipsSWR, GPR32Opnd>, LW_FM<0x2e>;
+let Predicates = [NotInMicroMips] in {
+def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd, IILoad>, LW_FM<0x22>;
+def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd, IILoad>, LW_FM<0x26>;
+def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd, IIStore>, LW_FM<0x2a>;
+def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, IIStore>, LW_FM<0x2e>;
+}
def SYNC : SYNC_FT, SYNC_FM;
-def TEQ : TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>;
+def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>;
+def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>;
+def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>;
+def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>;
+def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>;
+def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>;
+
+def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>;
+def TGEI : MMRel, TEQI_FT<"tgei", GPR32Opnd>, TEQI_FM<0x8>;
+def TGEIU : MMRel, TEQI_FT<"tgeiu", GPR32Opnd>, TEQI_FM<0x9>;
+def TLTI : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM<0xa>;
+def TTLTIU : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>;
+def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>;
def BREAK : BRK_FT<"break">, BRK_FM<0xd>;
def SYSCALL : SYS_FT<"syscall">, SYS_FM<0xc>;
+def TRAP : TrapBase<BREAK>;
def ERET : ER_FT<"eret">, ER_FM<0x18>;
def DERET : ER_FT<"deret">, ER_FM<0x1f>;
-/// Load-linked, Store-conditional
-let Predicates = [NotN64, HasStdEnc] in {
- def LL : LLBase<"ll", GPR32Opnd, mem>, LW_FM<0x30>;
- def SC : SCBase<"sc", GPR32Opnd, mem>, LW_FM<0x38>;
-}
+def EI : DEI_FT<"ei", GPR32Opnd>, EI_FM<1>;
+def DI : DEI_FT<"di", GPR32Opnd>, EI_FM<0>;
-let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
- def LL_P8 : LLBase<"ll", GPR32Opnd, mem64>, LW_FM<0x30>;
- def SC_P8 : SCBase<"sc", GPR32Opnd, mem64>, LW_FM<0x38>;
-}
+def WAIT : WAIT_FT<"wait">;
+
+/// Load-linked, Store-conditional
+def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>;
+def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>;
/// Jump and Branch Instructions
-def J : JumpFJ<jmptarget, "j", br, bb>, FJ<2>,
+def J : MMRel, JumpFJ<jmptarget, "j", br, bb, "j">, FJ<2>,
Requires<[RelocStatic, HasStdEnc]>, IsBranch;
-def JR : IndirectBranch<GPR32Opnd>, MTLO_FM<8>;
-def B : UncondBranch<"b">, B_FM;
-def BEQ : CBranch<"beq", seteq, GPR32Opnd>, BEQ_FM<4>;
-def BNE : CBranch<"bne", setne, GPR32Opnd>, BEQ_FM<5>;
-def BGEZ : CBranchZero<"bgez", setge, GPR32Opnd>, BGEZ_FM<1, 1>;
-def BGTZ : CBranchZero<"bgtz", setgt, GPR32Opnd>, BGEZ_FM<7, 0>;
-def BLEZ : CBranchZero<"blez", setle, GPR32Opnd>, BGEZ_FM<6, 0>;
-def BLTZ : CBranchZero<"bltz", setlt, GPR32Opnd>, BGEZ_FM<1, 0>;
-
-def JAL : JumpLink<"jal">, FJ<3>;
-def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM;
+def JR : MMRel, IndirectBranch<"jr", GPR32Opnd>, MTLO_FM<8>;
+def BEQ : MMRel, CBranch<"beq", brtarget, seteq, GPR32Opnd>, BEQ_FM<4>;
+def BNE : MMRel, CBranch<"bne", brtarget, setne, GPR32Opnd>, BEQ_FM<5>;
+def BGEZ : MMRel, CBranchZero<"bgez", brtarget, setge, GPR32Opnd>,
+ BGEZ_FM<1, 1>;
+def BGTZ : MMRel, CBranchZero<"bgtz", brtarget, setgt, GPR32Opnd>,
+ BGEZ_FM<7, 0>;
+def BLEZ : MMRel, CBranchZero<"blez", brtarget, setle, GPR32Opnd>,
+ BGEZ_FM<6, 0>;
+def BLTZ : MMRel, CBranchZero<"bltz", brtarget, setlt, GPR32Opnd>,
+ BGEZ_FM<1, 0>;
+def B : UncondBranch<BEQ>;
+
+def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>;
+def JALR : MMRel, JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM;
def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>;
-def BGEZAL : BGEZAL_FT<"bgezal", GPR32Opnd>, BGEZAL_FM<0x11>;
-def BLTZAL : BGEZAL_FT<"bltzal", GPR32Opnd>, BGEZAL_FM<0x10>;
+def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>;
+def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>;
def BAL_BR : BAL_BR_Pseudo<BGEZAL>;
-def TAILCALL : JumpFJ<calltarget, "j", MipsTailCall, imm>, FJ<2>, IsTailCall;
-def TAILCALL_R : JumpFR<GPR32Opnd, MipsTailCall>, MTLO_FM<8>, IsTailCall;
+def TAILCALL : MMRel, JumpFJ<calltarget, "j", MipsTailCall, imm, "tcall">,
+ FJ<2>, IsTailCall;
+def TAILCALL_R : MMRel, JumpFR<"tcallr", GPR32Opnd, MipsTailCall>, MTLO_FM<8>,
+ IsTailCall;
-def RET : RetBase<GPR32Opnd>, MTLO_FM<8>;
+def RET : MMRel, RetBase<"ret", GPR32Opnd>, MTLO_FM<8>;
// Exception handling related node and instructions.
// The conversion sequence is:
@@ -1029,34 +1047,30 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
}
/// Multiply and Divide Instructions.
-def MULT : MMRel, Mult<"mult", IIImult, GPR32Opnd, [HI, LO]>,
+def MULT : MMRel, Mult<"mult", IIImult, GPR32Opnd, [HI0, LO0]>,
MULT_FM<0, 0x18>;
-def MULTu : MMRel, Mult<"multu", IIImult, GPR32Opnd, [HI, LO]>,
+def MULTu : MMRel, Mult<"multu", IIImult, GPR32Opnd, [HI0, LO0]>,
MULT_FM<0, 0x19>;
-def PseudoMULT : MultDivPseudo<MULT, ACRegs, GPR32Opnd, MipsMult, IIImult>;
-def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, GPR32Opnd, MipsMultu, IIImult>;
-def SDIV : Div<"div", IIIdiv, GPR32Opnd, [HI, LO]>, MULT_FM<0, 0x1a>;
-def UDIV : Div<"divu", IIIdiv, GPR32Opnd, [HI, LO]>, MULT_FM<0, 0x1b>;
-def PseudoSDIV : MultDivPseudo<SDIV, ACRegs, GPR32Opnd, MipsDivRem, IIIdiv,
- 0, 1, 1>;
-def PseudoUDIV : MultDivPseudo<UDIV, ACRegs, GPR32Opnd, MipsDivRemU, IIIdiv,
- 0, 1, 1>;
+def SDIV : MMRel, Div<"div", IIIdiv, GPR32Opnd, [HI0, LO0]>,
+ MULT_FM<0, 0x1a>;
+def UDIV : MMRel, Div<"divu", IIIdiv, GPR32Opnd, [HI0, LO0]>,
+ MULT_FM<0, 0x1b>;
-def MTHI : MoveToLOHI<"mthi", GPR32Opnd, [HI]>, MTLO_FM<0x11>;
-def MTLO : MoveToLOHI<"mtlo", GPR32Opnd, [LO]>, MTLO_FM<0x13>;
-def MFHI : MoveFromLOHI<"mfhi", GPR32Opnd, [HI]>, MFLO_FM<0x10>;
-def MFLO : MoveFromLOHI<"mflo", GPR32Opnd, [LO]>, MFLO_FM<0x12>;
+def MTHI : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>;
+def MTLO : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>;
+def MFHI : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, AC0>, MFLO_FM<0x10>;
+def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>, MFLO_FM<0x12>;
/// Sign Ext In Register Instructions.
-def SEB : SignExtInReg<"seb", i8, GPR32Opnd>, SEB_FM<0x10, 0x20>;
-def SEH : SignExtInReg<"seh", i16, GPR32Opnd>, SEB_FM<0x18, 0x20>;
+def SEB : MMRel, SignExtInReg<"seb", i8, GPR32Opnd>, SEB_FM<0x10, 0x20>;
+def SEH : MMRel, SignExtInReg<"seh", i16, GPR32Opnd>, SEB_FM<0x18, 0x20>;
/// Count Leading
-def CLZ : CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>;
-def CLO : CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>;
+def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>;
+def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>;
/// Word Swap Bytes Within Halfwords
-def WSBH : SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>;
+def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>;
/// No operation.
def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
@@ -1065,39 +1079,41 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
// instructions. The same not happens for stack address copies, so an
// add op with mem ComplexPattern is used and the stack address copy
// can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<"addiu", GPR32Opnd, mem_ea>, LW_FM<9>;
+def LEA_ADDiu : EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>;
// MADD*/MSUB*
-def MADD : MArithR<"madd", 1>, MULT_FM<0x1c, 0>;
-def MADDU : MArithR<"maddu", 1>, MULT_FM<0x1c, 1>;
-def MSUB : MArithR<"msub">, MULT_FM<0x1c, 4>;
-def MSUBU : MArithR<"msubu">, MULT_FM<0x1c, 5>;
+def MADD : MMRel, MArithR<"madd", 1>, MULT_FM<0x1c, 0>;
+def MADDU : MMRel, MArithR<"maddu", 1>, MULT_FM<0x1c, 1>;
+def MSUB : MMRel, MArithR<"msub">, MULT_FM<0x1c, 4>;
+def MSUBU : MMRel, MArithR<"msubu">, MULT_FM<0x1c, 5>;
+
+let Predicates = [HasStdEnc, NotDSP] in {
+def PseudoMULT : MultDivPseudo<MULT, ACC64, GPR32Opnd, MipsMult, IIImult>;
+def PseudoMULTu : MultDivPseudo<MULTu, ACC64, GPR32Opnd, MipsMultu, IIImult>;
+def PseudoMFHI : PseudoMFLOHI<GPR32, ACC64, MipsMFHI>;
+def PseudoMFLO : PseudoMFLOHI<GPR32, ACC64, MipsMFLO>;
+def PseudoMTLOHI : PseudoMTLOHI<ACC64, GPR32>;
def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd>;
def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu>;
def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub>;
def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu>;
+}
+
+def PseudoSDIV : MultDivPseudo<SDIV, ACC64, GPR32Opnd, MipsDivRem, IIIdiv,
+ 0, 1, 1>;
+def PseudoUDIV : MultDivPseudo<UDIV, ACC64, GPR32Opnd, MipsDivRemU, IIIdiv,
+ 0, 1, 1>;
def RDHWR : ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM;
-def EXT : ExtBase<"ext", GPR32Opnd>, EXT_FM<0>;
-def INS : InsBase<"ins", GPR32Opnd>, EXT_FM<4>;
+def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>;
+def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>;
/// Move Control Registers From/To CPU Registers
-def MFC0_3OP : MFC3OP<(outs GPR32Opnd:$rt),
- (ins GPR32Opnd:$rd, uimm16:$sel),
- "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>;
-
-def MTC0_3OP : MFC3OP<(outs GPR32Opnd:$rd, uimm16:$sel),
- (ins GPR32Opnd:$rt),
- "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>;
-
-def MFC2_3OP : MFC3OP<(outs GPR32Opnd:$rt),
- (ins GPR32Opnd:$rd, uimm16:$sel),
- "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>;
-
-def MTC2_3OP : MFC3OP<(outs GPR32Opnd:$rd, uimm16:$sel),
- (ins GPR32Opnd:$rt),
- "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>;
+def MFC0 : MFC3OP<"mfc0", GPR32Opnd>, MFC3OP_FM<0x10, 0>;
+def MTC0 : MFC3OP<"mtc0", GPR32Opnd>, MFC3OP_FM<0x10, 4>;
+def MFC2 : MFC3OP<"mfc2", GPR32Opnd>, MFC3OP_FM<0x12, 0>;
+def MTC2 : MFC3OP<"mtc2", GPR32Opnd>, MFC3OP_FM<0x12, 4>;
//===----------------------------------------------------------------------===//
// Instruction aliases
@@ -1129,14 +1145,11 @@ def : InstAlias<"xor $rs, $rt, $imm",
def : InstAlias<"or $rs, $rt, $imm",
(ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
-def : InstAlias<"mfc0 $rt, $rd",
- (MFC0_3OP GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : InstAlias<"mtc0 $rt, $rd",
- (MTC0_3OP GPR32Opnd:$rd, 0, GPR32Opnd:$rt), 0>;
-def : InstAlias<"mfc2 $rt, $rd",
- (MFC2_3OP GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : InstAlias<"mtc2 $rt, $rd",
- (MTC2_3OP GPR32Opnd:$rd, 0, GPR32Opnd:$rt), 0>;
+def : InstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+def : InstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+def : InstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+def : InstAlias<"mtc2 $rt, $rd", (MTC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+def : InstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>;
def : InstAlias<"bnez $rs,$offset",
(BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
def : InstAlias<"beqz $rs,$offset",
@@ -1145,6 +1158,20 @@ def : InstAlias<"syscall", (SYSCALL 0), 1>;
def : InstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>;
def : InstAlias<"break", (BREAK 0, 0), 1>;
+def : InstAlias<"ei", (EI ZERO), 1>;
+def : InstAlias<"di", (DI ZERO), 1>;
+
+def : InstAlias<"teq $rs, $rt", (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : InstAlias<"tge $rs, $rt", (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : InstAlias<"tgeu $rs, $rt", (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : InstAlias<"tlt $rs, $rt", (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : InstAlias<"tltu $rs, $rt", (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : InstAlias<"tne $rs, $rt", (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : InstAlias<"sub, $rd, $rs, $imm",
+ (ADDi GPR32Opnd:$rd, GPR32Opnd:$rs, InvertedImOperand:$imm)>;
+def : InstAlias<"subu, $rd, $rs, $imm",
+ (ADDiu GPR32Opnd:$rd, GPR32Opnd:$rs, InvertedImOperand:$imm)>;
+
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -1152,7 +1179,7 @@ def : InstAlias<"break", (BREAK 0, 0), 1>;
class LoadImm32< string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32Reg : LoadImm32<"li", shamt,GPR32Opnd>;
+def LoadImm32Reg : LoadImm32<"li", uimm5, GPR32Opnd>;
class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr),
@@ -1162,9 +1189,7 @@ def LoadAddr32Reg : LoadAddress<"la", mem, GPR32Opnd>;
class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadAddr32Imm : LoadAddressImm<"la", shamt,GPR32Opnd>;
-
-
+def LoadAddr32Imm : LoadAddressImm<"la", uimm5, GPR32Opnd>;
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
@@ -1261,24 +1286,15 @@ def : MipsPat<(not GPR32:$in),
(NOR GPR32Opnd:$in, ZERO)>;
// extended loads
-let Predicates = [NotN64, HasStdEnc] in {
+let Predicates = [HasStdEnc] in {
def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>;
def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>;
}
-let Predicates = [IsN64, HasStdEnc] in {
- def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>;
- def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>;
- def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu_P8 addr:$src)>;
-}
// peepholes
-let Predicates = [NotN64, HasStdEnc] in {
- def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
-}
-let Predicates = [IsN64, HasStdEnc] in {
- def : MipsPat<(store (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>;
-}
+let Predicates = [HasStdEnc] in
+def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
// brcond patterns
multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp,
@@ -1369,22 +1385,13 @@ defm : SetgeImmPats<GPR32, SLTi, SLTiu>;
// bswap pattern
def : MipsPat<(bswap GPR32:$rt), (ROTR (WSBH GPR32:$rt), 16)>;
-// mflo/hi patterns.
-def : MipsPat<(i32 (ExtractLOHI ACRegs:$ac, imm:$lohi_idx)),
- (EXTRACT_SUBREG ACRegs:$ac, imm:$lohi_idx)>;
-
// Load halfword/word patterns.
let AddedComplexity = 40 in {
- let Predicates = [NotN64, HasStdEnc] in {
+ let Predicates = [HasStdEnc] in {
def : LoadRegImmPat<LBu, i32, zextloadi8>;
def : LoadRegImmPat<LH, i32, sextloadi16>;
def : LoadRegImmPat<LW, i32, load>;
}
- let Predicates = [IsN64, HasStdEnc] in {
- def : LoadRegImmPat<LBu_P8, i32, zextloadi8>;
- def : LoadRegImmPat<LH_P8, i32, sextloadi16>;
- def : LoadRegImmPat<LW_P8, i32, load>;
- }
}
//===----------------------------------------------------------------------===//
@@ -1405,6 +1412,10 @@ include "Mips16InstrInfo.td"
include "MipsDSPInstrFormats.td"
include "MipsDSPInstrInfo.td"
+// MSA
+include "MipsMSAInstrFormats.td"
+include "MipsMSAInstrInfo.td"
+
// Micromips
include "MicroMipsInstrFormats.td"
include "MicroMipsInstrInfo.td"
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 971176e..2efe578 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -237,6 +237,11 @@ void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
MIB.addMBB(MBBOpnd);
+ // Bundle the instruction in the delay slot to the newly created branch
+ // and erase the original branch.
+ assert(Br->isBundledWithSucc());
+ MachineBasicBlock::instr_iterator II(Br);
+ MIBundleBuilder(&*MIB).append((++II)->removeFromBundle());
Br->eraseFromParent();
}
@@ -432,8 +437,10 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
if (!I->Br || I->HasLongBranch)
continue;
+ int ShVal = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode() ? 2 : 4;
+
// Check if offset fits into 16-bit immediate field of branches.
- if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / 4))
+ if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / ShVal))
continue;
I->HasLongBranch = true;
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index d836975..b6dfadc 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -28,8 +28,7 @@ using namespace llvm;
MipsMCInstLower::MipsMCInstLower(MipsAsmPrinter &asmprinter)
: AsmPrinter(asmprinter) {}
-void MipsMCInstLower::Initialize(Mangler *M, MCContext *C) {
- Mang = M;
+void MipsMCInstLower::Initialize(MCContext *C) {
Ctx = C;
}
@@ -74,7 +73,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
break;
case MachineOperand::MO_GlobalAddress:
- Symbol = Mang->getSymbol(MO.getGlobal());
+ Symbol = AsmPrinter.getSymbol(MO.getGlobal());
Offset += MO.getOffset();
break;
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index c4a6016..4570bd9 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -19,7 +19,6 @@ namespace llvm {
class MCOperand;
class MachineInstr;
class MachineFunction;
- class Mangler;
class MipsAsmPrinter;
/// MipsMCInstLower - This class is used to lower an MachineInstr into an
@@ -27,11 +26,10 @@ namespace llvm {
class LLVM_LIBRARY_VISIBILITY MipsMCInstLower {
typedef MachineOperand::MachineOperandType MachineOperandType;
MCContext *Ctx;
- Mangler *Mang;
MipsAsmPrinter &AsmPrinter;
public:
MipsMCInstLower(MipsAsmPrinter &asmprinter);
- void Initialize(Mangler *mang, MCContext *C);
+ void Initialize(MCContext *C);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td
new file mode 100644
index 0000000..875dc0b
--- /dev/null
+++ b/lib/Target/Mips/MipsMSAInstrFormats.td
@@ -0,0 +1,406 @@
+//===- MipsMSAInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def HasMSA : Predicate<"Subtarget.hasMSA()">,
+ AssemblerPredicate<"FeatureMSA">;
+
+class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
+ let Predicates = [HasMSA];
+ let Inst{31-26} = 0b011110;
+}
+
+class MSACBranch : MSAInst {
+ let Inst{31-26} = 0b010001;
+}
+
+class MSASpecial : MSAInst {
+ let Inst{31-26} = 0b000000;
+}
+
+class PseudoMSA<dag outs, dag ins, list<dag> pattern,
+ InstrItinClass itin = IIPseudo>:
+ MipsPseudo<outs, ins, pattern, itin> {
+ let Predicates = [HasMSA];
+}
+
+class MSA_BIT_B_FMT<bits<3> major, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+ bits<3> m;
+
+ let Inst{25-23} = major;
+ let Inst{22-19} = 0b1110;
+ let Inst{18-16} = m;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_BIT_H_FMT<bits<3> major, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+ bits<4> m;
+
+ let Inst{25-23} = major;
+ let Inst{22-20} = 0b110;
+ let Inst{19-16} = m;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_BIT_W_FMT<bits<3> major, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+ bits<5> m;
+
+ let Inst{25-23} = major;
+ let Inst{22-21} = 0b10;
+ let Inst{20-16} = m;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_BIT_D_FMT<bits<3> major, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+ bits<6> m;
+
+ let Inst{25-23} = major;
+ let Inst{22} = 0b0;
+ let Inst{21-16} = m;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_2R_FILL_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSAInst {
+ bits<5> rs;
+ bits<5> wd;
+
+ let Inst{25-18} = major;
+ let Inst{17-16} = df;
+ let Inst{15-11} = rs;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_2R_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-18} = major;
+ let Inst{17-16} = df;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_2RF_FMT<bits<9> major, bits<1> df, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-17} = major;
+ let Inst{16} = df;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_3R_FMT<bits<3> major, bits<2> df, bits<6> minor>: MSAInst {
+ bits<5> wt;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-23} = major;
+ let Inst{22-21} = df;
+ let Inst{20-16} = wt;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_3RF_FMT<bits<4> major, bits<1> df, bits<6> minor>: MSAInst {
+ bits<5> wt;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21} = df;
+ let Inst{20-16} = wt;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_3R_INDEX_FMT<bits<3> major, bits<2> df, bits<6> minor>: MSAInst {
+ bits<5> rt;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-23} = major;
+ let Inst{22-21} = df;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_FMT<bits<10> major, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-16} = major;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_CFCMSA_FMT<bits<10> major, bits<6> minor>: MSAInst {
+ bits<5> rd;
+ bits<5> cs;
+
+ let Inst{25-16} = major;
+ let Inst{15-11} = cs;
+ let Inst{10-6} = rd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_CTCMSA_FMT<bits<10> major, bits<6> minor>: MSAInst {
+ bits<5> rs;
+ bits<5> cd;
+
+ let Inst{25-16} = major;
+ let Inst{15-11} = rs;
+ let Inst{10-6} = cd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_B_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = n{3-0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_H_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-19} = 0b100;
+ let Inst{18-16} = n{2-0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-18} = 0b1100;
+ let Inst{17-16} = n{1-0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_D_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-17} = 0b11100;
+ let Inst{16} = n{0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_COPY_B_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> rd;
+
+ let Inst{25-22} = major;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = n{3-0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = rd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_COPY_H_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> rd;
+
+ let Inst{25-22} = major;
+ let Inst{21-19} = 0b100;
+ let Inst{18-16} = n{2-0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = rd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_COPY_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> rd;
+
+ let Inst{25-22} = major;
+ let Inst{21-18} = 0b1100;
+ let Inst{17-16} = n{1-0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = rd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_INSERT_B_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<6> n;
+ bits<5> rs;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = n{3-0};
+ let Inst{15-11} = rs;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_INSERT_H_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<6> n;
+ bits<5> rs;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-19} = 0b100;
+ let Inst{18-16} = n{2-0};
+ let Inst{15-11} = rs;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_INSERT_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<6> n;
+ bits<5> rs;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-18} = 0b1100;
+ let Inst{17-16} = n{1-0};
+ let Inst{15-11} = rs;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_I5_FMT<bits<3> major, bits<2> df, bits<6> minor>: MSAInst {
+ bits<5> imm;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-23} = major;
+ let Inst{22-21} = df;
+ let Inst{20-16} = imm;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_I8_FMT<bits<2> major, bits<6> minor>: MSAInst {
+ bits<8> u8;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-24} = major;
+ let Inst{23-16} = u8;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_I10_FMT<bits<3> major, bits<2> df, bits<6> minor>: MSAInst {
+ bits<10> s10;
+ bits<5> wd;
+
+ let Inst{25-23} = major;
+ let Inst{22-21} = df;
+ let Inst{20-11} = s10;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_MI10_FMT<bits<2> df, bits<4> minor>: MSAInst {
+ bits<21> addr;
+ bits<5> wd;
+
+ let Inst{25-16} = addr{9-0};
+ let Inst{15-11} = addr{20-16};
+ let Inst{10-6} = wd;
+ let Inst{5-2} = minor;
+ let Inst{1-0} = df;
+}
+
+class MSA_VEC_FMT<bits<5> major, bits<6> minor>: MSAInst {
+ bits<5> wt;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-21} = major;
+ let Inst{20-16} = wt;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_CBRANCH_FMT<bits<3> major, bits<2> df>: MSACBranch {
+ bits<16> offset;
+ bits<5> wt;
+
+ let Inst{25-23} = major;
+ let Inst{22-21} = df;
+ let Inst{20-16} = wt;
+ let Inst{15-0} = offset;
+}
+
+class MSA_CBRANCH_V_FMT<bits<5> major>: MSACBranch {
+ bits<16> offset;
+ bits<5> wt;
+
+ let Inst{25-21} = major;
+ let Inst{20-16} = wt;
+ let Inst{15-0} = offset;
+}
+
+class SPECIAL_LSA_FMT<bits<6> minor>: MSASpecial {
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> rd;
+ bits<2> sa;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-8} = 0b000;
+ let Inst{7-6} = sa;
+ let Inst{5-0} = minor;
+}
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
new file mode 100644
index 0000000..82c51a6
--- /dev/null
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -0,0 +1,3694 @@
+//===- MipsMSAInstrInfo.td - MSA ASE instructions -*- tablegen ------------*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips MSA ASE instructions.
+//
+//===----------------------------------------------------------------------===//
+
+def SDT_MipsVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
+def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>,
+ SDTCisInt<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, OtherVT>]>;
+def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>,
+ SDTCisFP<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, OtherVT>]>;
+def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>,
+ SDTCisInt<1>, SDTCisVec<1>,
+ SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
+def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
+ SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>;
+def SDT_ILV : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
+
+def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>;
+def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>;
+def MipsVAllZero : SDNode<"MipsISD::VALL_ZERO", SDT_MipsVecCond>;
+def MipsVAnyZero : SDNode<"MipsISD::VANY_ZERO", SDT_MipsVecCond>;
+def MipsVSMax : SDNode<"MipsISD::VSMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def MipsVSMin : SDNode<"MipsISD::VSMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def MipsVUMax : SDNode<"MipsISD::VUMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def MipsVUMin : SDNode<"MipsISD::VUMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def MipsVSHF : SDNode<"MipsISD::VSHF", SDT_VSHF>;
+def MipsSHF : SDNode<"MipsISD::SHF", SDT_SHF>;
+def MipsILVEV : SDNode<"MipsISD::ILVEV", SDT_ILV>;
+def MipsILVOD : SDNode<"MipsISD::ILVOD", SDT_ILV>;
+def MipsILVL : SDNode<"MipsISD::ILVL", SDT_ILV>;
+def MipsILVR : SDNode<"MipsISD::ILVR", SDT_ILV>;
+def MipsPCKEV : SDNode<"MipsISD::PCKEV", SDT_ILV>;
+def MipsPCKOD : SDNode<"MipsISD::PCKOD", SDT_ILV>;
+
+def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>;
+def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>;
+
+def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT",
+ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
+def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT",
+ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
+
+// Operands
+
+def uimm2 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// The immediate of an LSA instruction needs special handling
+// as the encoded value should be subtracted by one.
+def uimm2LSAAsmOperand : AsmOperandClass {
+ let Name = "LSAImm";
+ let ParserMethod = "parseLSAImm";
+ let RenderMethod = "addImmOperands";
+}
+
+def LSAImm : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+ let EncoderMethod = "getLSAImmEncoding";
+ let DecoderMethod = "DecodeLSAImm";
+ let ParserMatchClass = uimm2LSAAsmOperand;
+}
+
+def uimm3 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def uimm4 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def uimm8 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def simm5 : Operand<i32>;
+
+def simm10 : Operand<i32>;
+
+def vsplat_uimm1 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm2 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm3 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm4 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm5 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm6 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm8 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_simm5 : Operand<vAny>;
+
+def vsplat_simm10 : Operand<vAny>;
+
+def immZExt2Lsa : ImmLeaf<i32, [{return isUInt<2>(Imm - 1);}]>;
+
+// Pattern fragments
+def vextract_sext_i8 : PatFrag<(ops node:$vec, node:$idx),
+ (MipsVExtractSExt node:$vec, node:$idx, i8)>;
+def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx),
+ (MipsVExtractSExt node:$vec, node:$idx, i16)>;
+def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx),
+ (MipsVExtractSExt node:$vec, node:$idx, i32)>;
+
+def vextract_zext_i8 : PatFrag<(ops node:$vec, node:$idx),
+ (MipsVExtractZExt node:$vec, node:$idx, i8)>;
+def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx),
+ (MipsVExtractZExt node:$vec, node:$idx, i16)>;
+def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx),
+ (MipsVExtractZExt node:$vec, node:$idx, i32)>;
+
+def vinsert_v16i8 : PatFrag<(ops node:$vec, node:$val, node:$idx),
+ (v16i8 (vector_insert node:$vec, node:$val, node:$idx))>;
+def vinsert_v8i16 : PatFrag<(ops node:$vec, node:$val, node:$idx),
+ (v8i16 (vector_insert node:$vec, node:$val, node:$idx))>;
+def vinsert_v4i32 : PatFrag<(ops node:$vec, node:$val, node:$idx),
+ (v4i32 (vector_insert node:$vec, node:$val, node:$idx))>;
+
+class vfsetcc_type<ValueType ResTy, ValueType OpTy, CondCode CC> :
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>;
+
+// ISD::SETFALSE cannot occur
+def vfsetoeq_v4f32 : vfsetcc_type<v4i32, v4f32, SETOEQ>;
+def vfsetoeq_v2f64 : vfsetcc_type<v2i64, v2f64, SETOEQ>;
+def vfsetoge_v4f32 : vfsetcc_type<v4i32, v4f32, SETOGE>;
+def vfsetoge_v2f64 : vfsetcc_type<v2i64, v2f64, SETOGE>;
+def vfsetogt_v4f32 : vfsetcc_type<v4i32, v4f32, SETOGT>;
+def vfsetogt_v2f64 : vfsetcc_type<v2i64, v2f64, SETOGT>;
+def vfsetole_v4f32 : vfsetcc_type<v4i32, v4f32, SETOLE>;
+def vfsetole_v2f64 : vfsetcc_type<v2i64, v2f64, SETOLE>;
+def vfsetolt_v4f32 : vfsetcc_type<v4i32, v4f32, SETOLT>;
+def vfsetolt_v2f64 : vfsetcc_type<v2i64, v2f64, SETOLT>;
+def vfsetone_v4f32 : vfsetcc_type<v4i32, v4f32, SETONE>;
+def vfsetone_v2f64 : vfsetcc_type<v2i64, v2f64, SETONE>;
+def vfsetord_v4f32 : vfsetcc_type<v4i32, v4f32, SETO>;
+def vfsetord_v2f64 : vfsetcc_type<v2i64, v2f64, SETO>;
+def vfsetun_v4f32 : vfsetcc_type<v4i32, v4f32, SETUO>;
+def vfsetun_v2f64 : vfsetcc_type<v2i64, v2f64, SETUO>;
+def vfsetueq_v4f32 : vfsetcc_type<v4i32, v4f32, SETUEQ>;
+def vfsetueq_v2f64 : vfsetcc_type<v2i64, v2f64, SETUEQ>;
+def vfsetuge_v4f32 : vfsetcc_type<v4i32, v4f32, SETUGE>;
+def vfsetuge_v2f64 : vfsetcc_type<v2i64, v2f64, SETUGE>;
+def vfsetugt_v4f32 : vfsetcc_type<v4i32, v4f32, SETUGT>;
+def vfsetugt_v2f64 : vfsetcc_type<v2i64, v2f64, SETUGT>;
+def vfsetule_v4f32 : vfsetcc_type<v4i32, v4f32, SETULE>;
+def vfsetule_v2f64 : vfsetcc_type<v2i64, v2f64, SETULE>;
+def vfsetult_v4f32 : vfsetcc_type<v4i32, v4f32, SETULT>;
+def vfsetult_v2f64 : vfsetcc_type<v2i64, v2f64, SETULT>;
+def vfsetune_v4f32 : vfsetcc_type<v4i32, v4f32, SETUNE>;
+def vfsetune_v2f64 : vfsetcc_type<v2i64, v2f64, SETUNE>;
+// ISD::SETTRUE cannot occur
+// ISD::SETFALSE2 cannot occur
+// ISD::SETTRUE2 cannot occur
+
+class vsetcc_type<ValueType ResTy, CondCode CC> :
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (ResTy (vsetcc node:$lhs, node:$rhs, CC))>;
+
+def vseteq_v16i8 : vsetcc_type<v16i8, SETEQ>;
+def vseteq_v8i16 : vsetcc_type<v8i16, SETEQ>;
+def vseteq_v4i32 : vsetcc_type<v4i32, SETEQ>;
+def vseteq_v2i64 : vsetcc_type<v2i64, SETEQ>;
+def vsetle_v16i8 : vsetcc_type<v16i8, SETLE>;
+def vsetle_v8i16 : vsetcc_type<v8i16, SETLE>;
+def vsetle_v4i32 : vsetcc_type<v4i32, SETLE>;
+def vsetle_v2i64 : vsetcc_type<v2i64, SETLE>;
+def vsetlt_v16i8 : vsetcc_type<v16i8, SETLT>;
+def vsetlt_v8i16 : vsetcc_type<v8i16, SETLT>;
+def vsetlt_v4i32 : vsetcc_type<v4i32, SETLT>;
+def vsetlt_v2i64 : vsetcc_type<v2i64, SETLT>;
+def vsetule_v16i8 : vsetcc_type<v16i8, SETULE>;
+def vsetule_v8i16 : vsetcc_type<v8i16, SETULE>;
+def vsetule_v4i32 : vsetcc_type<v4i32, SETULE>;
+def vsetule_v2i64 : vsetcc_type<v2i64, SETULE>;
+def vsetult_v16i8 : vsetcc_type<v16i8, SETULT>;
+def vsetult_v8i16 : vsetcc_type<v8i16, SETULT>;
+def vsetult_v4i32 : vsetcc_type<v4i32, SETULT>;
+def vsetult_v2i64 : vsetcc_type<v2i64, SETULT>;
+
+def vsplati8 : PatFrag<(ops node:$e0),
+ (v16i8 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def vsplati16 : PatFrag<(ops node:$e0),
+ (v8i16 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def vsplati32 : PatFrag<(ops node:$e0),
+ (v4i32 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def vsplati64 : PatFrag<(ops node:$e0),
+ (v2i64 (build_vector:$v0 node:$e0, node:$e0))>;
+def vsplatf32 : PatFrag<(ops node:$e0),
+ (v4f32 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def vsplatf64 : PatFrag<(ops node:$e0),
+ (v2f64 (build_vector node:$e0, node:$e0))>;
+
+def vsplati8_elt : PatFrag<(ops node:$v, node:$i),
+ (MipsVSHF (vsplati8 node:$i), node:$v, node:$v)>;
+def vsplati16_elt : PatFrag<(ops node:$v, node:$i),
+ (MipsVSHF (vsplati16 node:$i), node:$v, node:$v)>;
+def vsplati32_elt : PatFrag<(ops node:$v, node:$i),
+ (MipsVSHF (vsplati32 node:$i), node:$v, node:$v)>;
+def vsplati64_elt : PatFrag<(ops node:$v, node:$i),
+ (MipsVSHF (vsplati64 node:$i), node:$v, node:$v)>;
+
+class SplatPatLeaf<Operand opclass, dag frag, code pred = [{}],
+ SDNodeXForm xform = NOOP_SDNodeXForm>
+ : PatLeaf<frag, pred, xform> {
+ Operand OpClass = opclass;
+}
+
+class SplatComplexPattern<Operand opclass, ValueType ty, int numops, string fn,
+ list<SDNode> roots = [],
+ list<SDNodeProperty> props = []> :
+ ComplexPattern<ty, numops, fn, roots, props> {
+ Operand OpClass = opclass;
+}
+
+def vsplati8_uimm3 : SplatComplexPattern<vsplat_uimm3, v16i8, 1,
+ "selectVSplatUimm3",
+ [build_vector, bitconvert]>;
+
+def vsplati8_uimm4 : SplatComplexPattern<vsplat_uimm4, v16i8, 1,
+ "selectVSplatUimm4",
+ [build_vector, bitconvert]>;
+
+def vsplati8_uimm5 : SplatComplexPattern<vsplat_uimm5, v16i8, 1,
+ "selectVSplatUimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati8_uimm8 : SplatComplexPattern<vsplat_uimm8, v16i8, 1,
+ "selectVSplatUimm8",
+ [build_vector, bitconvert]>;
+
+def vsplati8_simm5 : SplatComplexPattern<vsplat_simm5, v16i8, 1,
+ "selectVSplatSimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati16_uimm3 : SplatComplexPattern<vsplat_uimm3, v8i16, 1,
+ "selectVSplatUimm3",
+ [build_vector, bitconvert]>;
+
+def vsplati16_uimm4 : SplatComplexPattern<vsplat_uimm4, v8i16, 1,
+ "selectVSplatUimm4",
+ [build_vector, bitconvert]>;
+
+def vsplati16_uimm5 : SplatComplexPattern<vsplat_uimm5, v8i16, 1,
+ "selectVSplatUimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati16_simm5 : SplatComplexPattern<vsplat_simm5, v8i16, 1,
+ "selectVSplatSimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati32_uimm2 : SplatComplexPattern<vsplat_uimm2, v4i32, 1,
+ "selectVSplatUimm2",
+ [build_vector, bitconvert]>;
+
+def vsplati32_uimm5 : SplatComplexPattern<vsplat_uimm5, v4i32, 1,
+ "selectVSplatUimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati32_simm5 : SplatComplexPattern<vsplat_simm5, v4i32, 1,
+ "selectVSplatSimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati64_uimm1 : SplatComplexPattern<vsplat_uimm1, v2i64, 1,
+ "selectVSplatUimm1",
+ [build_vector, bitconvert]>;
+
+def vsplati64_uimm5 : SplatComplexPattern<vsplat_uimm5, v2i64, 1,
+ "selectVSplatUimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati64_uimm6 : SplatComplexPattern<vsplat_uimm6, v2i64, 1,
+ "selectVSplatUimm6",
+ [build_vector, bitconvert]>;
+
+def vsplati64_simm5 : SplatComplexPattern<vsplat_simm5, v2i64, 1,
+ "selectVSplatSimm5",
+ [build_vector, bitconvert]>;
+
+// Any build_vector that is a constant splat with a value that is an exact
+// power of 2
+def vsplat_uimm_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmPow2",
+ [build_vector, bitconvert]>;
+
+// Any build_vector that is a constant splat with a value that is the bitwise
+// inverse of an exact power of 2
+def vsplat_uimm_inv_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmInvPow2",
+ [build_vector, bitconvert]>;
+
+// Any build_vector that is a constant splat with only a consecutive sequence
+// of left-most bits set.
+def vsplat_maskl_bits : SplatComplexPattern<vsplat_uimm8, vAny, 1,
+ "selectVSplatMaskL",
+ [build_vector, bitconvert]>;
+
+// Any build_vector that is a constant splat with only a consecutive sequence
+// of right-most bits set.
+def vsplat_maskr_bits : SplatComplexPattern<vsplat_uimm8, vAny, 1,
+ "selectVSplatMaskR",
+ [build_vector, bitconvert]>;
+
+// Any build_vector that is a constant splat with a value that equals 1
+// FIXME: These should be a ComplexPattern but we can't use them because the
+// ISel generator requires the uses to have a name, but providing a name
+// causes other errors ("used in pattern but not operand list")
+def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{
+ APInt Imm;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ return selectVSplat (N, Imm) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
+}]>;
+
+def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
+ APInt Imm;
+ SDNode *BV = N->getOperand(0).getNode();
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ return selectVSplat (BV, Imm) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
+}]>;
+
+def vbclr_b : PatFrag<(ops node:$ws, node:$wt),
+ (and node:$ws, (xor (shl vsplat_imm_eq_1, node:$wt),
+ immAllOnesV))>;
+def vbclr_h : PatFrag<(ops node:$ws, node:$wt),
+ (and node:$ws, (xor (shl vsplat_imm_eq_1, node:$wt),
+ immAllOnesV))>;
+def vbclr_w : PatFrag<(ops node:$ws, node:$wt),
+ (and node:$ws, (xor (shl vsplat_imm_eq_1, node:$wt),
+ immAllOnesV))>;
+def vbclr_d : PatFrag<(ops node:$ws, node:$wt),
+ (and node:$ws, (xor (shl (v2i64 vsplati64_imm_eq_1),
+ node:$wt),
+ (bitconvert (v4i32 immAllOnesV))))>;
+
+def vbneg_b : PatFrag<(ops node:$ws, node:$wt),
+ (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+def vbneg_h : PatFrag<(ops node:$ws, node:$wt),
+ (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+def vbneg_w : PatFrag<(ops node:$ws, node:$wt),
+ (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+def vbneg_d : PatFrag<(ops node:$ws, node:$wt),
+ (xor node:$ws, (shl (v2i64 vsplati64_imm_eq_1),
+ node:$wt))>;
+
+def vbset_b : PatFrag<(ops node:$ws, node:$wt),
+ (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+def vbset_h : PatFrag<(ops node:$ws, node:$wt),
+ (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+def vbset_w : PatFrag<(ops node:$ws, node:$wt),
+ (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+def vbset_d : PatFrag<(ops node:$ws, node:$wt),
+ (or node:$ws, (shl (v2i64 vsplati64_imm_eq_1),
+ node:$wt))>;
+
+def fms : PatFrag<(ops node:$wd, node:$ws, node:$wt),
+ (fsub node:$wd, (fmul node:$ws, node:$wt))>;
+
+def muladd : PatFrag<(ops node:$wd, node:$ws, node:$wt),
+ (add node:$wd, (mul node:$ws, node:$wt))>;
+
+def mulsub : PatFrag<(ops node:$wd, node:$ws, node:$wt),
+ (sub node:$wd, (mul node:$ws, node:$wt))>;
+
+def mul_fexp2 : PatFrag<(ops node:$ws, node:$wt),
+ (fmul node:$ws, (fexp2 node:$wt))>;
+
+// Immediates
+def immSExt5 : ImmLeaf<i32, [{return isInt<5>(Imm);}]>;
+def immSExt10: ImmLeaf<i32, [{return isInt<10>(Imm);}]>;
+
+// Instruction encoding.
+class ADD_A_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010000>;
+class ADD_A_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010000>;
+class ADD_A_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010000>;
+class ADD_A_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010000>;
+
+class ADDS_A_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010000>;
+class ADDS_A_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010000>;
+class ADDS_A_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010000>;
+class ADDS_A_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010000>;
+
+class ADDS_S_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010000>;
+class ADDS_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010000>;
+class ADDS_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010000>;
+class ADDS_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010000>;
+
+class ADDS_U_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b010000>;
+class ADDS_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010000>;
+class ADDS_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010000>;
+class ADDS_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010000>;
+
+class ADDV_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b001110>;
+class ADDV_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b001110>;
+class ADDV_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b001110>;
+class ADDV_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b001110>;
+
+class ADDVI_B_ENC : MSA_I5_FMT<0b000, 0b00, 0b000110>;
+class ADDVI_H_ENC : MSA_I5_FMT<0b000, 0b01, 0b000110>;
+class ADDVI_W_ENC : MSA_I5_FMT<0b000, 0b10, 0b000110>;
+class ADDVI_D_ENC : MSA_I5_FMT<0b000, 0b11, 0b000110>;
+
+class AND_V_ENC : MSA_VEC_FMT<0b00000, 0b011110>;
+
+class ANDI_B_ENC : MSA_I8_FMT<0b00, 0b000000>;
+
+class ASUB_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010001>;
+class ASUB_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010001>;
+class ASUB_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010001>;
+class ASUB_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010001>;
+
+class ASUB_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b010001>;
+class ASUB_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010001>;
+class ASUB_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010001>;
+class ASUB_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010001>;
+
+class AVE_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010000>;
+class AVE_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010000>;
+class AVE_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010000>;
+class AVE_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010000>;
+
+class AVE_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b010000>;
+class AVE_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010000>;
+class AVE_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010000>;
+class AVE_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010000>;
+
+class AVER_S_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b010000>;
+class AVER_S_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010000>;
+class AVER_S_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010000>;
+class AVER_S_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b010000>;
+
+class AVER_U_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b010000>;
+class AVER_U_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010000>;
+class AVER_U_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010000>;
+class AVER_U_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010000>;
+
+class BCLR_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b001101>;
+class BCLR_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b001101>;
+class BCLR_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b001101>;
+class BCLR_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b001101>;
+
+class BCLRI_B_ENC : MSA_BIT_B_FMT<0b011, 0b001001>;
+class BCLRI_H_ENC : MSA_BIT_H_FMT<0b011, 0b001001>;
+class BCLRI_W_ENC : MSA_BIT_W_FMT<0b011, 0b001001>;
+class BCLRI_D_ENC : MSA_BIT_D_FMT<0b011, 0b001001>;
+
+class BINSL_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b001101>;
+class BINSL_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b001101>;
+class BINSL_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b001101>;
+class BINSL_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b001101>;
+
+class BINSLI_B_ENC : MSA_BIT_B_FMT<0b110, 0b001001>;
+class BINSLI_H_ENC : MSA_BIT_H_FMT<0b110, 0b001001>;
+class BINSLI_W_ENC : MSA_BIT_W_FMT<0b110, 0b001001>;
+class BINSLI_D_ENC : MSA_BIT_D_FMT<0b110, 0b001001>;
+
+class BINSR_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b001101>;
+class BINSR_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b001101>;
+class BINSR_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b001101>;
+class BINSR_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b001101>;
+
+class BINSRI_B_ENC : MSA_BIT_B_FMT<0b111, 0b001001>;
+class BINSRI_H_ENC : MSA_BIT_H_FMT<0b111, 0b001001>;
+class BINSRI_W_ENC : MSA_BIT_W_FMT<0b111, 0b001001>;
+class BINSRI_D_ENC : MSA_BIT_D_FMT<0b111, 0b001001>;
+
+class BMNZ_V_ENC : MSA_VEC_FMT<0b00100, 0b011110>;
+
+class BMNZI_B_ENC : MSA_I8_FMT<0b00, 0b000001>;
+
+class BMZ_V_ENC : MSA_VEC_FMT<0b00101, 0b011110>;
+
+class BMZI_B_ENC : MSA_I8_FMT<0b01, 0b000001>;
+
+class BNEG_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b001101>;
+class BNEG_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b001101>;
+class BNEG_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b001101>;
+class BNEG_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b001101>;
+
+class BNEGI_B_ENC : MSA_BIT_B_FMT<0b101, 0b001001>;
+class BNEGI_H_ENC : MSA_BIT_H_FMT<0b101, 0b001001>;
+class BNEGI_W_ENC : MSA_BIT_W_FMT<0b101, 0b001001>;
+class BNEGI_D_ENC : MSA_BIT_D_FMT<0b101, 0b001001>;
+
+class BNZ_B_ENC : MSA_CBRANCH_FMT<0b111, 0b00>;
+class BNZ_H_ENC : MSA_CBRANCH_FMT<0b111, 0b01>;
+class BNZ_W_ENC : MSA_CBRANCH_FMT<0b111, 0b10>;
+class BNZ_D_ENC : MSA_CBRANCH_FMT<0b111, 0b11>;
+
+class BNZ_V_ENC : MSA_CBRANCH_V_FMT<0b01111>;
+
+class BSEL_V_ENC : MSA_VEC_FMT<0b00110, 0b011110>;
+
+class BSELI_B_ENC : MSA_I8_FMT<0b10, 0b000001>;
+
+class BSET_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b001101>;
+class BSET_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b001101>;
+class BSET_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b001101>;
+class BSET_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b001101>;
+
+class BSETI_B_ENC : MSA_BIT_B_FMT<0b100, 0b001001>;
+class BSETI_H_ENC : MSA_BIT_H_FMT<0b100, 0b001001>;
+class BSETI_W_ENC : MSA_BIT_W_FMT<0b100, 0b001001>;
+class BSETI_D_ENC : MSA_BIT_D_FMT<0b100, 0b001001>;
+
+class BZ_B_ENC : MSA_CBRANCH_FMT<0b110, 0b00>;
+class BZ_H_ENC : MSA_CBRANCH_FMT<0b110, 0b01>;
+class BZ_W_ENC : MSA_CBRANCH_FMT<0b110, 0b10>;
+class BZ_D_ENC : MSA_CBRANCH_FMT<0b110, 0b11>;
+
+class BZ_V_ENC : MSA_CBRANCH_V_FMT<0b01011>;
+
+class CEQ_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b001111>;
+class CEQ_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b001111>;
+class CEQ_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b001111>;
+class CEQ_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b001111>;
+
+class CEQI_B_ENC : MSA_I5_FMT<0b000, 0b00, 0b000111>;
+class CEQI_H_ENC : MSA_I5_FMT<0b000, 0b01, 0b000111>;
+class CEQI_W_ENC : MSA_I5_FMT<0b000, 0b10, 0b000111>;
+class CEQI_D_ENC : MSA_I5_FMT<0b000, 0b11, 0b000111>;
+
+class CFCMSA_ENC : MSA_ELM_CFCMSA_FMT<0b0001111110, 0b011001>;
+
+class CLE_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b001111>;
+class CLE_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b001111>;
+class CLE_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b001111>;
+class CLE_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b001111>;
+
+class CLE_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b001111>;
+class CLE_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b001111>;
+class CLE_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b001111>;
+class CLE_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b001111>;
+
+class CLEI_S_B_ENC : MSA_I5_FMT<0b100, 0b00, 0b000111>;
+class CLEI_S_H_ENC : MSA_I5_FMT<0b100, 0b01, 0b000111>;
+class CLEI_S_W_ENC : MSA_I5_FMT<0b100, 0b10, 0b000111>;
+class CLEI_S_D_ENC : MSA_I5_FMT<0b100, 0b11, 0b000111>;
+
+class CLEI_U_B_ENC : MSA_I5_FMT<0b101, 0b00, 0b000111>;
+class CLEI_U_H_ENC : MSA_I5_FMT<0b101, 0b01, 0b000111>;
+class CLEI_U_W_ENC : MSA_I5_FMT<0b101, 0b10, 0b000111>;
+class CLEI_U_D_ENC : MSA_I5_FMT<0b101, 0b11, 0b000111>;
+
+class CLT_S_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b001111>;
+class CLT_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b001111>;
+class CLT_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b001111>;
+class CLT_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b001111>;
+
+class CLT_U_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b001111>;
+class CLT_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b001111>;
+class CLT_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b001111>;
+class CLT_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b001111>;
+
+class CLTI_S_B_ENC : MSA_I5_FMT<0b010, 0b00, 0b000111>;
+class CLTI_S_H_ENC : MSA_I5_FMT<0b010, 0b01, 0b000111>;
+class CLTI_S_W_ENC : MSA_I5_FMT<0b010, 0b10, 0b000111>;
+class CLTI_S_D_ENC : MSA_I5_FMT<0b010, 0b11, 0b000111>;
+
+class CLTI_U_B_ENC : MSA_I5_FMT<0b011, 0b00, 0b000111>;
+class CLTI_U_H_ENC : MSA_I5_FMT<0b011, 0b01, 0b000111>;
+class CLTI_U_W_ENC : MSA_I5_FMT<0b011, 0b10, 0b000111>;
+class CLTI_U_D_ENC : MSA_I5_FMT<0b011, 0b11, 0b000111>;
+
+class COPY_S_B_ENC : MSA_ELM_COPY_B_FMT<0b0010, 0b011001>;
+class COPY_S_H_ENC : MSA_ELM_COPY_H_FMT<0b0010, 0b011001>;
+class COPY_S_W_ENC : MSA_ELM_COPY_W_FMT<0b0010, 0b011001>;
+
+class COPY_U_B_ENC : MSA_ELM_COPY_B_FMT<0b0011, 0b011001>;
+class COPY_U_H_ENC : MSA_ELM_COPY_H_FMT<0b0011, 0b011001>;
+class COPY_U_W_ENC : MSA_ELM_COPY_W_FMT<0b0011, 0b011001>;
+
+class CTCMSA_ENC : MSA_ELM_CTCMSA_FMT<0b0000111110, 0b011001>;
+
+class DIV_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010010>;
+class DIV_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010010>;
+class DIV_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010010>;
+class DIV_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010010>;
+
+class DIV_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b010010>;
+class DIV_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010010>;
+class DIV_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010010>;
+class DIV_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010010>;
+
+class DOTP_S_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010011>;
+class DOTP_S_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010011>;
+class DOTP_S_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010011>;
+
+class DOTP_U_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010011>;
+class DOTP_U_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010011>;
+class DOTP_U_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010011>;
+
+class DPADD_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010011>;
+class DPADD_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010011>;
+class DPADD_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010011>;
+
+class DPADD_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010011>;
+class DPADD_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010011>;
+class DPADD_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010011>;
+
+class DPSUB_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010011>;
+class DPSUB_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010011>;
+class DPSUB_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010011>;
+
+class DPSUB_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010011>;
+class DPSUB_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010011>;
+class DPSUB_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010011>;
+
+class FADD_W_ENC : MSA_3RF_FMT<0b0000, 0b0, 0b011011>;
+class FADD_D_ENC : MSA_3RF_FMT<0b0000, 0b1, 0b011011>;
+
+class FCAF_W_ENC : MSA_3RF_FMT<0b0000, 0b0, 0b011010>;
+class FCAF_D_ENC : MSA_3RF_FMT<0b0000, 0b1, 0b011010>;
+
+class FCEQ_W_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011010>;
+class FCEQ_D_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011010>;
+
+class FCLASS_W_ENC : MSA_2RF_FMT<0b110010000, 0b0, 0b011110>;
+class FCLASS_D_ENC : MSA_2RF_FMT<0b110010000, 0b1, 0b011110>;
+
+class FCLE_W_ENC : MSA_3RF_FMT<0b0110, 0b0, 0b011010>;
+class FCLE_D_ENC : MSA_3RF_FMT<0b0110, 0b1, 0b011010>;
+
+class FCLT_W_ENC : MSA_3RF_FMT<0b0100, 0b0, 0b011010>;
+class FCLT_D_ENC : MSA_3RF_FMT<0b0100, 0b1, 0b011010>;
+
+class FCNE_W_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011100>;
+class FCNE_D_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011100>;
+
+class FCOR_W_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011100>;
+class FCOR_D_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011100>;
+
+class FCUEQ_W_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011010>;
+class FCUEQ_D_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011010>;
+
+class FCULE_W_ENC : MSA_3RF_FMT<0b0111, 0b0, 0b011010>;
+class FCULE_D_ENC : MSA_3RF_FMT<0b0111, 0b1, 0b011010>;
+
+class FCULT_W_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011010>;
+class FCULT_D_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011010>;
+
+class FCUN_W_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011010>;
+class FCUN_D_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011010>;
+
+class FCUNE_W_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011100>;
+class FCUNE_D_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011100>;
+
+class FDIV_W_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011011>;
+class FDIV_D_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011011>;
+
+class FEXDO_H_ENC : MSA_3RF_FMT<0b1000, 0b0, 0b011011>;
+class FEXDO_W_ENC : MSA_3RF_FMT<0b1000, 0b1, 0b011011>;
+
+class FEXP2_W_ENC : MSA_3RF_FMT<0b0111, 0b0, 0b011011>;
+class FEXP2_D_ENC : MSA_3RF_FMT<0b0111, 0b1, 0b011011>;
+
+class FEXUPL_W_ENC : MSA_2RF_FMT<0b110011000, 0b0, 0b011110>;
+class FEXUPL_D_ENC : MSA_2RF_FMT<0b110011000, 0b1, 0b011110>;
+
+class FEXUPR_W_ENC : MSA_2RF_FMT<0b110011001, 0b0, 0b011110>;
+class FEXUPR_D_ENC : MSA_2RF_FMT<0b110011001, 0b1, 0b011110>;
+
+class FFINT_S_W_ENC : MSA_2RF_FMT<0b110011110, 0b0, 0b011110>;
+class FFINT_S_D_ENC : MSA_2RF_FMT<0b110011110, 0b1, 0b011110>;
+
+class FFINT_U_W_ENC : MSA_2RF_FMT<0b110011111, 0b0, 0b011110>;
+class FFINT_U_D_ENC : MSA_2RF_FMT<0b110011111, 0b1, 0b011110>;
+
+class FFQL_W_ENC : MSA_2RF_FMT<0b110011010, 0b0, 0b011110>;
+class FFQL_D_ENC : MSA_2RF_FMT<0b110011010, 0b1, 0b011110>;
+
+class FFQR_W_ENC : MSA_2RF_FMT<0b110011011, 0b0, 0b011110>;
+class FFQR_D_ENC : MSA_2RF_FMT<0b110011011, 0b1, 0b011110>;
+
+class FILL_B_ENC : MSA_2R_FILL_FMT<0b11000000, 0b00, 0b011110>;
+class FILL_H_ENC : MSA_2R_FILL_FMT<0b11000000, 0b01, 0b011110>;
+class FILL_W_ENC : MSA_2R_FILL_FMT<0b11000000, 0b10, 0b011110>;
+
+class FLOG2_W_ENC : MSA_2RF_FMT<0b110010111, 0b0, 0b011110>;
+class FLOG2_D_ENC : MSA_2RF_FMT<0b110010111, 0b1, 0b011110>;
+
+class FMADD_W_ENC : MSA_3RF_FMT<0b0100, 0b0, 0b011011>;
+class FMADD_D_ENC : MSA_3RF_FMT<0b0100, 0b1, 0b011011>;
+
+class FMAX_W_ENC : MSA_3RF_FMT<0b1110, 0b0, 0b011011>;
+class FMAX_D_ENC : MSA_3RF_FMT<0b1110, 0b1, 0b011011>;
+
+class FMAX_A_W_ENC : MSA_3RF_FMT<0b1111, 0b0, 0b011011>;
+class FMAX_A_D_ENC : MSA_3RF_FMT<0b1111, 0b1, 0b011011>;
+
+class FMIN_W_ENC : MSA_3RF_FMT<0b1100, 0b0, 0b011011>;
+class FMIN_D_ENC : MSA_3RF_FMT<0b1100, 0b1, 0b011011>;
+
+class FMIN_A_W_ENC : MSA_3RF_FMT<0b1101, 0b0, 0b011011>;
+class FMIN_A_D_ENC : MSA_3RF_FMT<0b1101, 0b1, 0b011011>;
+
+class FMSUB_W_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011011>;
+class FMSUB_D_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011011>;
+
+class FMUL_W_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011011>;
+class FMUL_D_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011011>;
+
+class FRINT_W_ENC : MSA_2RF_FMT<0b110010110, 0b0, 0b011110>;
+class FRINT_D_ENC : MSA_2RF_FMT<0b110010110, 0b1, 0b011110>;
+
+class FRCP_W_ENC : MSA_2RF_FMT<0b110010101, 0b0, 0b011110>;
+class FRCP_D_ENC : MSA_2RF_FMT<0b110010101, 0b1, 0b011110>;
+
+class FRSQRT_W_ENC : MSA_2RF_FMT<0b110010100, 0b0, 0b011110>;
+class FRSQRT_D_ENC : MSA_2RF_FMT<0b110010100, 0b1, 0b011110>;
+
+class FSAF_W_ENC : MSA_3RF_FMT<0b1000, 0b0, 0b011010>;
+class FSAF_D_ENC : MSA_3RF_FMT<0b1000, 0b1, 0b011010>;
+
+class FSEQ_W_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011010>;
+class FSEQ_D_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011010>;
+
+class FSLE_W_ENC : MSA_3RF_FMT<0b1110, 0b0, 0b011010>;
+class FSLE_D_ENC : MSA_3RF_FMT<0b1110, 0b1, 0b011010>;
+
+class FSLT_W_ENC : MSA_3RF_FMT<0b1100, 0b0, 0b011010>;
+class FSLT_D_ENC : MSA_3RF_FMT<0b1100, 0b1, 0b011010>;
+
+class FSNE_W_ENC : MSA_3RF_FMT<0b1011, 0b0, 0b011100>;
+class FSNE_D_ENC : MSA_3RF_FMT<0b1011, 0b1, 0b011100>;
+
+class FSOR_W_ENC : MSA_3RF_FMT<0b1001, 0b0, 0b011100>;
+class FSOR_D_ENC : MSA_3RF_FMT<0b1001, 0b1, 0b011100>;
+
+class FSQRT_W_ENC : MSA_2RF_FMT<0b110010011, 0b0, 0b011110>;
+class FSQRT_D_ENC : MSA_2RF_FMT<0b110010011, 0b1, 0b011110>;
+
+class FSUB_W_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011011>;
+class FSUB_D_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011011>;
+
+class FSUEQ_W_ENC : MSA_3RF_FMT<0b1011, 0b0, 0b011010>;
+class FSUEQ_D_ENC : MSA_3RF_FMT<0b1011, 0b1, 0b011010>;
+
+class FSULE_W_ENC : MSA_3RF_FMT<0b1111, 0b0, 0b011010>;
+class FSULE_D_ENC : MSA_3RF_FMT<0b1111, 0b1, 0b011010>;
+
+class FSULT_W_ENC : MSA_3RF_FMT<0b1101, 0b0, 0b011010>;
+class FSULT_D_ENC : MSA_3RF_FMT<0b1101, 0b1, 0b011010>;
+
+class FSUN_W_ENC : MSA_3RF_FMT<0b1001, 0b0, 0b011010>;
+class FSUN_D_ENC : MSA_3RF_FMT<0b1001, 0b1, 0b011010>;
+
+class FSUNE_W_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011100>;
+class FSUNE_D_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011100>;
+
+class FTINT_S_W_ENC : MSA_2RF_FMT<0b110011100, 0b0, 0b011110>;
+class FTINT_S_D_ENC : MSA_2RF_FMT<0b110011100, 0b1, 0b011110>;
+
+class FTINT_U_W_ENC : MSA_2RF_FMT<0b110011101, 0b0, 0b011110>;
+class FTINT_U_D_ENC : MSA_2RF_FMT<0b110011101, 0b1, 0b011110>;
+
+class FTQ_H_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011011>;
+class FTQ_W_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011011>;
+
+class FTRUNC_S_W_ENC : MSA_2RF_FMT<0b110010001, 0b0, 0b011110>;
+class FTRUNC_S_D_ENC : MSA_2RF_FMT<0b110010001, 0b1, 0b011110>;
+
+class FTRUNC_U_W_ENC : MSA_2RF_FMT<0b110010010, 0b0, 0b011110>;
+class FTRUNC_U_D_ENC : MSA_2RF_FMT<0b110010010, 0b1, 0b011110>;
+
+class HADD_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010101>;
+class HADD_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010101>;
+class HADD_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010101>;
+
+class HADD_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010101>;
+class HADD_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010101>;
+class HADD_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010101>;
+
+class HSUB_S_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010101>;
+class HSUB_S_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010101>;
+class HSUB_S_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b010101>;
+
+class HSUB_U_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010101>;
+class HSUB_U_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010101>;
+class HSUB_U_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010101>;
+
+class ILVEV_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b010100>;
+class ILVEV_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010100>;
+class ILVEV_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010100>;
+class ILVEV_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b010100>;
+
+class ILVL_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010100>;
+class ILVL_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010100>;
+class ILVL_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010100>;
+class ILVL_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010100>;
+
+class ILVOD_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b010100>;
+class ILVOD_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010100>;
+class ILVOD_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010100>;
+class ILVOD_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010100>;
+
+class ILVR_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b010100>;
+class ILVR_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010100>;
+class ILVR_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010100>;
+class ILVR_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010100>;
+
+class INSERT_B_ENC : MSA_ELM_INSERT_B_FMT<0b0100, 0b011001>;
+class INSERT_H_ENC : MSA_ELM_INSERT_H_FMT<0b0100, 0b011001>;
+class INSERT_W_ENC : MSA_ELM_INSERT_W_FMT<0b0100, 0b011001>;
+
+class INSVE_B_ENC : MSA_ELM_B_FMT<0b0101, 0b011001>;
+class INSVE_H_ENC : MSA_ELM_H_FMT<0b0101, 0b011001>;
+class INSVE_W_ENC : MSA_ELM_W_FMT<0b0101, 0b011001>;
+class INSVE_D_ENC : MSA_ELM_D_FMT<0b0101, 0b011001>;
+
+class LD_B_ENC : MSA_MI10_FMT<0b00, 0b1000>;
+class LD_H_ENC : MSA_MI10_FMT<0b01, 0b1000>;
+class LD_W_ENC : MSA_MI10_FMT<0b10, 0b1000>;
+class LD_D_ENC : MSA_MI10_FMT<0b11, 0b1000>;
+
+class LDI_B_ENC : MSA_I10_FMT<0b110, 0b00, 0b000111>;
+class LDI_H_ENC : MSA_I10_FMT<0b110, 0b01, 0b000111>;
+class LDI_W_ENC : MSA_I10_FMT<0b110, 0b10, 0b000111>;
+class LDI_D_ENC : MSA_I10_FMT<0b110, 0b11, 0b000111>;
+
+class LSA_ENC : SPECIAL_LSA_FMT<0b000101>;
+
+class MADD_Q_H_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011100>;
+class MADD_Q_W_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011100>;
+
+class MADDR_Q_H_ENC : MSA_3RF_FMT<0b1101, 0b0, 0b011100>;
+class MADDR_Q_W_ENC : MSA_3RF_FMT<0b1101, 0b1, 0b011100>;
+
+class MADDV_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010010>;
+class MADDV_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010010>;
+class MADDV_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010010>;
+class MADDV_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010010>;
+
+class MAX_A_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b001110>;
+class MAX_A_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b001110>;
+class MAX_A_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b001110>;
+class MAX_A_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b001110>;
+
+class MAX_S_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b001110>;
+class MAX_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b001110>;
+class MAX_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b001110>;
+class MAX_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b001110>;
+
+class MAX_U_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b001110>;
+class MAX_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b001110>;
+class MAX_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b001110>;
+class MAX_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b001110>;
+
+class MAXI_S_B_ENC : MSA_I5_FMT<0b010, 0b00, 0b000110>;
+class MAXI_S_H_ENC : MSA_I5_FMT<0b010, 0b01, 0b000110>;
+class MAXI_S_W_ENC : MSA_I5_FMT<0b010, 0b10, 0b000110>;
+class MAXI_S_D_ENC : MSA_I5_FMT<0b010, 0b11, 0b000110>;
+
+class MAXI_U_B_ENC : MSA_I5_FMT<0b011, 0b00, 0b000110>;
+class MAXI_U_H_ENC : MSA_I5_FMT<0b011, 0b01, 0b000110>;
+class MAXI_U_W_ENC : MSA_I5_FMT<0b011, 0b10, 0b000110>;
+class MAXI_U_D_ENC : MSA_I5_FMT<0b011, 0b11, 0b000110>;
+
+class MIN_A_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b001110>;
+class MIN_A_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b001110>;
+class MIN_A_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b001110>;
+class MIN_A_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b001110>;
+
+class MIN_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b001110>;
+class MIN_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b001110>;
+class MIN_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b001110>;
+class MIN_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b001110>;
+
+class MIN_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b001110>;
+class MIN_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b001110>;
+class MIN_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b001110>;
+class MIN_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b001110>;
+
+class MINI_S_B_ENC : MSA_I5_FMT<0b100, 0b00, 0b000110>;
+class MINI_S_H_ENC : MSA_I5_FMT<0b100, 0b01, 0b000110>;
+class MINI_S_W_ENC : MSA_I5_FMT<0b100, 0b10, 0b000110>;
+class MINI_S_D_ENC : MSA_I5_FMT<0b100, 0b11, 0b000110>;
+
+class MINI_U_B_ENC : MSA_I5_FMT<0b101, 0b00, 0b000110>;
+class MINI_U_H_ENC : MSA_I5_FMT<0b101, 0b01, 0b000110>;
+class MINI_U_W_ENC : MSA_I5_FMT<0b101, 0b10, 0b000110>;
+class MINI_U_D_ENC : MSA_I5_FMT<0b101, 0b11, 0b000110>;
+
+class MOD_S_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b010010>;
+class MOD_S_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010010>;
+class MOD_S_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010010>;
+class MOD_S_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b010010>;
+
+class MOD_U_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b010010>;
+class MOD_U_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010010>;
+class MOD_U_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010010>;
+class MOD_U_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010010>;
+
+class MOVE_V_ENC : MSA_ELM_FMT<0b0010111110, 0b011001>;
+
+class MSUB_Q_H_ENC : MSA_3RF_FMT<0b0110, 0b0, 0b011100>;
+class MSUB_Q_W_ENC : MSA_3RF_FMT<0b0110, 0b1, 0b011100>;
+
+class MSUBR_Q_H_ENC : MSA_3RF_FMT<0b1110, 0b0, 0b011100>;
+class MSUBR_Q_W_ENC : MSA_3RF_FMT<0b1110, 0b1, 0b011100>;
+
+class MSUBV_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010010>;
+class MSUBV_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010010>;
+class MSUBV_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010010>;
+class MSUBV_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010010>;
+
+class MUL_Q_H_ENC : MSA_3RF_FMT<0b0100, 0b0, 0b011100>;
+class MUL_Q_W_ENC : MSA_3RF_FMT<0b0100, 0b1, 0b011100>;
+
+class MULR_Q_H_ENC : MSA_3RF_FMT<0b1100, 0b0, 0b011100>;
+class MULR_Q_W_ENC : MSA_3RF_FMT<0b1100, 0b1, 0b011100>;
+
+class MULV_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010010>;
+class MULV_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010010>;
+class MULV_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010010>;
+class MULV_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010010>;
+
+class NLOC_B_ENC : MSA_2R_FMT<0b11000010, 0b00, 0b011110>;
+class NLOC_H_ENC : MSA_2R_FMT<0b11000010, 0b01, 0b011110>;
+class NLOC_W_ENC : MSA_2R_FMT<0b11000010, 0b10, 0b011110>;
+class NLOC_D_ENC : MSA_2R_FMT<0b11000010, 0b11, 0b011110>;
+
+class NLZC_B_ENC : MSA_2R_FMT<0b11000011, 0b00, 0b011110>;
+class NLZC_H_ENC : MSA_2R_FMT<0b11000011, 0b01, 0b011110>;
+class NLZC_W_ENC : MSA_2R_FMT<0b11000011, 0b10, 0b011110>;
+class NLZC_D_ENC : MSA_2R_FMT<0b11000011, 0b11, 0b011110>;
+
+class NOR_V_ENC : MSA_VEC_FMT<0b00010, 0b011110>;
+
+class NORI_B_ENC : MSA_I8_FMT<0b10, 0b000000>;
+
+class OR_V_ENC : MSA_VEC_FMT<0b00001, 0b011110>;
+
+class ORI_B_ENC : MSA_I8_FMT<0b01, 0b000000>;
+
+class PCKEV_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010100>;
+class PCKEV_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010100>;
+class PCKEV_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010100>;
+class PCKEV_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010100>;
+
+class PCKOD_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b010100>;
+class PCKOD_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010100>;
+class PCKOD_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010100>;
+class PCKOD_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010100>;
+
+class PCNT_B_ENC : MSA_2R_FMT<0b11000001, 0b00, 0b011110>;
+class PCNT_H_ENC : MSA_2R_FMT<0b11000001, 0b01, 0b011110>;
+class PCNT_W_ENC : MSA_2R_FMT<0b11000001, 0b10, 0b011110>;
+class PCNT_D_ENC : MSA_2R_FMT<0b11000001, 0b11, 0b011110>;
+
+class SAT_S_B_ENC : MSA_BIT_B_FMT<0b000, 0b001010>;
+class SAT_S_H_ENC : MSA_BIT_H_FMT<0b000, 0b001010>;
+class SAT_S_W_ENC : MSA_BIT_W_FMT<0b000, 0b001010>;
+class SAT_S_D_ENC : MSA_BIT_D_FMT<0b000, 0b001010>;
+
+class SAT_U_B_ENC : MSA_BIT_B_FMT<0b001, 0b001010>;
+class SAT_U_H_ENC : MSA_BIT_H_FMT<0b001, 0b001010>;
+class SAT_U_W_ENC : MSA_BIT_W_FMT<0b001, 0b001010>;
+class SAT_U_D_ENC : MSA_BIT_D_FMT<0b001, 0b001010>;
+
+class SHF_B_ENC : MSA_I8_FMT<0b00, 0b000010>;
+class SHF_H_ENC : MSA_I8_FMT<0b01, 0b000010>;
+class SHF_W_ENC : MSA_I8_FMT<0b10, 0b000010>;
+
+class SLD_B_ENC : MSA_3R_INDEX_FMT<0b000, 0b00, 0b010100>;
+class SLD_H_ENC : MSA_3R_INDEX_FMT<0b000, 0b01, 0b010100>;
+class SLD_W_ENC : MSA_3R_INDEX_FMT<0b000, 0b10, 0b010100>;
+class SLD_D_ENC : MSA_3R_INDEX_FMT<0b000, 0b11, 0b010100>;
+
+class SLDI_B_ENC : MSA_ELM_B_FMT<0b0000, 0b011001>;
+class SLDI_H_ENC : MSA_ELM_H_FMT<0b0000, 0b011001>;
+class SLDI_W_ENC : MSA_ELM_W_FMT<0b0000, 0b011001>;
+class SLDI_D_ENC : MSA_ELM_D_FMT<0b0000, 0b011001>;
+
+class SLL_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b001101>;
+class SLL_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b001101>;
+class SLL_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b001101>;
+class SLL_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b001101>;
+
+class SLLI_B_ENC : MSA_BIT_B_FMT<0b000, 0b001001>;
+class SLLI_H_ENC : MSA_BIT_H_FMT<0b000, 0b001001>;
+class SLLI_W_ENC : MSA_BIT_W_FMT<0b000, 0b001001>;
+class SLLI_D_ENC : MSA_BIT_D_FMT<0b000, 0b001001>;
+
+class SPLAT_B_ENC : MSA_3R_INDEX_FMT<0b001, 0b00, 0b010100>;
+class SPLAT_H_ENC : MSA_3R_INDEX_FMT<0b001, 0b01, 0b010100>;
+class SPLAT_W_ENC : MSA_3R_INDEX_FMT<0b001, 0b10, 0b010100>;
+class SPLAT_D_ENC : MSA_3R_INDEX_FMT<0b001, 0b11, 0b010100>;
+
+class SPLATI_B_ENC : MSA_ELM_B_FMT<0b0001, 0b011001>;
+class SPLATI_H_ENC : MSA_ELM_H_FMT<0b0001, 0b011001>;
+class SPLATI_W_ENC : MSA_ELM_W_FMT<0b0001, 0b011001>;
+class SPLATI_D_ENC : MSA_ELM_D_FMT<0b0001, 0b011001>;
+
+class SRA_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b001101>;
+class SRA_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b001101>;
+class SRA_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b001101>;
+class SRA_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b001101>;
+
+class SRAI_B_ENC : MSA_BIT_B_FMT<0b001, 0b001001>;
+class SRAI_H_ENC : MSA_BIT_H_FMT<0b001, 0b001001>;
+class SRAI_W_ENC : MSA_BIT_W_FMT<0b001, 0b001001>;
+class SRAI_D_ENC : MSA_BIT_D_FMT<0b001, 0b001001>;
+
+class SRAR_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010101>;
+class SRAR_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010101>;
+class SRAR_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010101>;
+class SRAR_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010101>;
+
+class SRARI_B_ENC : MSA_BIT_B_FMT<0b010, 0b001010>;
+class SRARI_H_ENC : MSA_BIT_H_FMT<0b010, 0b001010>;
+class SRARI_W_ENC : MSA_BIT_W_FMT<0b010, 0b001010>;
+class SRARI_D_ENC : MSA_BIT_D_FMT<0b010, 0b001010>;
+
+class SRL_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b001101>;
+class SRL_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b001101>;
+class SRL_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b001101>;
+class SRL_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b001101>;
+
+class SRLI_B_ENC : MSA_BIT_B_FMT<0b010, 0b001001>;
+class SRLI_H_ENC : MSA_BIT_H_FMT<0b010, 0b001001>;
+class SRLI_W_ENC : MSA_BIT_W_FMT<0b010, 0b001001>;
+class SRLI_D_ENC : MSA_BIT_D_FMT<0b010, 0b001001>;
+
+class SRLR_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010101>;
+class SRLR_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010101>;
+class SRLR_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010101>;
+class SRLR_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010101>;
+
+class SRLRI_B_ENC : MSA_BIT_B_FMT<0b011, 0b001010>;
+class SRLRI_H_ENC : MSA_BIT_H_FMT<0b011, 0b001010>;
+class SRLRI_W_ENC : MSA_BIT_W_FMT<0b011, 0b001010>;
+class SRLRI_D_ENC : MSA_BIT_D_FMT<0b011, 0b001010>;
+
+class ST_B_ENC : MSA_MI10_FMT<0b00, 0b1001>;
+class ST_H_ENC : MSA_MI10_FMT<0b01, 0b1001>;
+class ST_W_ENC : MSA_MI10_FMT<0b10, 0b1001>;
+class ST_D_ENC : MSA_MI10_FMT<0b11, 0b1001>;
+
+class SUBS_S_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010001>;
+class SUBS_S_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010001>;
+class SUBS_S_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010001>;
+class SUBS_S_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010001>;
+
+class SUBS_U_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010001>;
+class SUBS_U_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010001>;
+class SUBS_U_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010001>;
+class SUBS_U_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010001>;
+
+class SUBSUS_U_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010001>;
+class SUBSUS_U_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010001>;
+class SUBSUS_U_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010001>;
+class SUBSUS_U_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010001>;
+
+class SUBSUU_S_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b010001>;
+class SUBSUU_S_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010001>;
+class SUBSUU_S_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010001>;
+class SUBSUU_S_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010001>;
+
+class SUBV_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b001110>;
+class SUBV_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b001110>;
+class SUBV_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b001110>;
+class SUBV_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b001110>;
+
+class SUBVI_B_ENC : MSA_I5_FMT<0b001, 0b00, 0b000110>;
+class SUBVI_H_ENC : MSA_I5_FMT<0b001, 0b01, 0b000110>;
+class SUBVI_W_ENC : MSA_I5_FMT<0b001, 0b10, 0b000110>;
+class SUBVI_D_ENC : MSA_I5_FMT<0b001, 0b11, 0b000110>;
+
+class VSHF_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010101>;
+class VSHF_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010101>;
+class VSHF_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010101>;
+class VSHF_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010101>;
+
+class XOR_V_ENC : MSA_VEC_FMT<0b00011, 0b011110>;
+
+class XORI_B_ENC : MSA_I8_FMT<0b11, 0b000000>;
+
+// Instruction desc.
+class MSA_BIT_B_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ComplexPattern Imm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, vsplat_uimm3:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_BIT_H_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ComplexPattern Imm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, vsplat_uimm4:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_BIT_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ComplexPattern Imm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, vsplat_uimm5:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_BIT_D_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ComplexPattern Imm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, vsplat_uimm6:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+// This class is deprecated and will be removed soon.
+class MSA_BIT_B_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm3:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt3:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+// This class is deprecated and will be removed soon.
+class MSA_BIT_H_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm4:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+// This class is deprecated and will be removed soon.
+class MSA_BIT_W_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm5:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt5:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+// This class is deprecated and will be removed soon.
+class MSA_BIT_D_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm6:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt6:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_BIT_BINSXI_DESC_BASE<string instr_asm, ValueType Ty,
+ ComplexPattern Mask, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, vsplat_uimm8:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (vselect (Ty Mask:$m), (Ty ROWD:$wd_in),
+ ROWS:$ws))];
+ InstrItinClass Itinerary = itin;
+ string Constraints = "$wd = $wd_in";
+}
+
+class MSA_BIT_BINSLI_DESC_BASE<string instr_asm, ValueType Ty,
+ RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> :
+ MSA_BIT_BINSXI_DESC_BASE<instr_asm, Ty, vsplat_maskl_bits, ROWD, ROWS, itin>;
+
+class MSA_BIT_BINSRI_DESC_BASE<string instr_asm, ValueType Ty,
+ RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> :
+ MSA_BIT_BINSXI_DESC_BASE<instr_asm, Ty, vsplat_maskr_bits, ROWD, ROWS, itin>;
+
+class MSA_BIT_SPLAT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SplatComplexPattern SplatImm,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, SplatImm.OpClass:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, SplatImm:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_COPY_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ValueType VecTy, RegisterOperand ROD,
+ RegisterOperand ROWS,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROD:$rd);
+ dag InOperandList = (ins ROWS:$ws, uimm4:$n);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $ws[$n]");
+ list<dag> Pattern = [(set ROD:$rd, (OpNode (VecTy ROWS:$ws), immZExt4:$n))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_ELM_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm4:$n);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$n]");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$n))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_COPY_PSEUDO_BASE<SDPatternOperator OpNode, ValueType VecTy,
+ RegisterClass RCD, RegisterClass RCWS> :
+ MipsPseudo<(outs RCD:$wd), (ins RCWS:$ws, uimm4:$n),
+ [(set RCD:$wd, (OpNode (VecTy RCWS:$ws), immZExt4:$n))]> {
+ bit usesCustomInserter = 1;
+}
+
+class MSA_I5_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SplatComplexPattern SplatImm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, SplatImm.OpClass:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $imm");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, SplatImm:$imm))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_I8_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SplatComplexPattern SplatImm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, SplatImm.OpClass:$u8);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, SplatImm:$u8))];
+ InstrItinClass Itinerary = itin;
+}
+
+// This class is deprecated and will be removed in the next few patches
+class MSA_I8_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm8:$u8);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt8:$u8))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_I8_SHF_DESC_BASE<string instr_asm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm8:$u8);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8");
+ list<dag> Pattern = [(set ROWD:$wd, (MipsSHF immZExt8:$u8, ROWS:$ws))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_I10_LDI_DESC_BASE<string instr_asm, RegisterOperand ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins vsplat_simm10:$s10);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $s10");
+ // LDI is matched using custom matching code in MipsSEISelDAGToDAG.cpp
+ list<dag> Pattern = [];
+ bit hasSideEffects = 0;
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_2R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_2R_FILL_DESC_BASE<string instr_asm, ValueType VT,
+ SDPatternOperator OpNode, RegisterOperand ROWD,
+ RegisterOperand ROS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROS:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $rs");
+ list<dag> Pattern = [(set ROWD:$wd, (VT (OpNode ROS:$rs)))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_2R_FILL_PSEUDO_BASE<ValueType VT, SDPatternOperator OpNode,
+ RegisterClass RCWD, RegisterClass RCWS = RCWD> :
+ MipsPseudo<(outs RCWD:$wd), (ins RCWS:$fs),
+ [(set RCWD:$wd, (OpNode RCWS:$fs))]> {
+ let usesCustomInserter = 1;
+}
+
+class MSA_2RF_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_3R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, ROWT:$wt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, ROWT:$wt))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_3R_BINSX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, ROWT:$wt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROWS:$ws,
+ ROWT:$wt))];
+ string Constraints = "$wd = $wd_in";
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_3R_SPLAT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, GPR32:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$rt]");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, GPR32:$rt))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_3R_VSHF_DESC_BASE<string instr_asm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, ROWT:$wt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt");
+ list<dag> Pattern = [(set ROWD:$wd, (MipsVSHF ROWD:$wd_in, ROWS:$ws,
+ ROWT:$wt))];
+ string Constraints = "$wd = $wd_in";
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_3R_SLD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, GPR32:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$rt]");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, GPR32:$rt))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_3R_4R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, ROWT:$wt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt");
+ list<dag> Pattern = [(set ROWD:$wd,
+ (OpNode ROWD:$wd_in, ROWS:$ws, ROWT:$wt))];
+ InstrItinClass Itinerary = itin;
+ string Constraints = "$wd = $wd_in";
+}
+
+class MSA_3RF_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> :
+ MSA_3R_DESC_BASE<instr_asm, OpNode, ROWD, ROWS, ROWT, itin>;
+
+class MSA_3RF_4RF_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> :
+ MSA_3R_4R_DESC_BASE<instr_asm, OpNode, ROWD, ROWS, ROWT, itin>;
+
+class MSA_CBRANCH_DESC_BASE<string instr_asm, RegisterOperand ROWD> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins ROWD:$wt, brtarget:$offset);
+ string AsmString = !strconcat(instr_asm, "\t$wt, $offset");
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = IIBranch;
+ bit isBranch = 1;
+ bit isTerminator = 1;
+ bit hasDelaySlot = 1;
+ list<Register> Defs = [AT];
+}
+
+class MSA_INSERT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROS,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWD:$wd_in, ROS:$rs, uimm6:$n);
+ string AsmString = !strconcat(instr_asm, "\t$wd[$n], $rs");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in,
+ ROS:$rs,
+ immZExt6:$n))];
+ InstrItinClass Itinerary = itin;
+ string Constraints = "$wd = $wd_in";
+}
+
+class MSA_INSERT_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
+ RegisterOperand ROWD, RegisterOperand ROFS> :
+ MipsPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, uimm6:$n, ROFS:$fs),
+ [(set ROWD:$wd, (OpNode (Ty ROWD:$wd_in), ROFS:$fs,
+ immZExt6:$n))]> {
+ bit usesCustomInserter = 1;
+ string Constraints = "$wd = $wd_in";
+}
+
+class MSA_INSVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWD:$wd_in, uimm6:$n, ROWS:$ws);
+ string AsmString = !strconcat(instr_asm, "\t$wd[$n], $ws[0]");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in,
+ immZExt6:$n,
+ ROWS:$ws))];
+ InstrItinClass Itinerary = itin;
+ string Constraints = "$wd = $wd_in";
+}
+
+class MSA_VEC_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, ROWT:$wt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, ROWT:$wt))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_ELM_SPLAT_DESC_BASE<string instr_asm, SplatComplexPattern SplatImm,
+ RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, SplatImm.OpClass:$n);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$n]");
+ list<dag> Pattern = [(set ROWD:$wd, (MipsVSHF SplatImm:$n, ROWS:$ws,
+ ROWS:$ws))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_VEC_PSEUDO_BASE<SDPatternOperator OpNode, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD> :
+ MipsPseudo<(outs ROWD:$wd), (ins ROWS:$ws, ROWT:$wt),
+ [(set ROWD:$wd, (OpNode ROWS:$ws, ROWT:$wt))]>;
+
+class ADD_A_B_DESC : MSA_3R_DESC_BASE<"add_a.b", int_mips_add_a_b, MSA128BOpnd>,
+ IsCommutable;
+class ADD_A_H_DESC : MSA_3R_DESC_BASE<"add_a.h", int_mips_add_a_h, MSA128HOpnd>,
+ IsCommutable;
+class ADD_A_W_DESC : MSA_3R_DESC_BASE<"add_a.w", int_mips_add_a_w, MSA128WOpnd>,
+ IsCommutable;
+class ADD_A_D_DESC : MSA_3R_DESC_BASE<"add_a.d", int_mips_add_a_d, MSA128DOpnd>,
+ IsCommutable;
+
+class ADDS_A_B_DESC : MSA_3R_DESC_BASE<"adds_a.b", int_mips_adds_a_b,
+ MSA128BOpnd>, IsCommutable;
+class ADDS_A_H_DESC : MSA_3R_DESC_BASE<"adds_a.h", int_mips_adds_a_h,
+ MSA128HOpnd>, IsCommutable;
+class ADDS_A_W_DESC : MSA_3R_DESC_BASE<"adds_a.w", int_mips_adds_a_w,
+ MSA128WOpnd>, IsCommutable;
+class ADDS_A_D_DESC : MSA_3R_DESC_BASE<"adds_a.d", int_mips_adds_a_d,
+ MSA128DOpnd>, IsCommutable;
+
+class ADDS_S_B_DESC : MSA_3R_DESC_BASE<"adds_s.b", int_mips_adds_s_b,
+ MSA128BOpnd>, IsCommutable;
+class ADDS_S_H_DESC : MSA_3R_DESC_BASE<"adds_s.h", int_mips_adds_s_h,
+ MSA128HOpnd>, IsCommutable;
+class ADDS_S_W_DESC : MSA_3R_DESC_BASE<"adds_s.w", int_mips_adds_s_w,
+ MSA128WOpnd>, IsCommutable;
+class ADDS_S_D_DESC : MSA_3R_DESC_BASE<"adds_s.d", int_mips_adds_s_d,
+ MSA128DOpnd>, IsCommutable;
+
+class ADDS_U_B_DESC : MSA_3R_DESC_BASE<"adds_u.b", int_mips_adds_u_b,
+ MSA128BOpnd>, IsCommutable;
+class ADDS_U_H_DESC : MSA_3R_DESC_BASE<"adds_u.h", int_mips_adds_u_h,
+ MSA128HOpnd>, IsCommutable;
+class ADDS_U_W_DESC : MSA_3R_DESC_BASE<"adds_u.w", int_mips_adds_u_w,
+ MSA128WOpnd>, IsCommutable;
+class ADDS_U_D_DESC : MSA_3R_DESC_BASE<"adds_u.d", int_mips_adds_u_d,
+ MSA128DOpnd>, IsCommutable;
+
+class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", add, MSA128BOpnd>, IsCommutable;
+class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", add, MSA128HOpnd>, IsCommutable;
+class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", add, MSA128WOpnd>, IsCommutable;
+class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", add, MSA128DOpnd>, IsCommutable;
+
+class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", add, vsplati8_uimm5,
+ MSA128BOpnd>;
+class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", add, vsplati16_uimm5,
+ MSA128HOpnd>;
+class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", add, vsplati32_uimm5,
+ MSA128WOpnd>;
+class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", add, vsplati64_uimm5,
+ MSA128DOpnd>;
+
+class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", and, MSA128BOpnd>;
+class AND_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<and, MSA128HOpnd>;
+class AND_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<and, MSA128WOpnd>;
+class AND_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<and, MSA128DOpnd>;
+
+class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", and, vsplati8_uimm8,
+ MSA128BOpnd>;
+
+class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b,
+ MSA128BOpnd>;
+class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h,
+ MSA128HOpnd>;
+class ASUB_S_W_DESC : MSA_3R_DESC_BASE<"asub_s.w", int_mips_asub_s_w,
+ MSA128WOpnd>;
+class ASUB_S_D_DESC : MSA_3R_DESC_BASE<"asub_s.d", int_mips_asub_s_d,
+ MSA128DOpnd>;
+
+class ASUB_U_B_DESC : MSA_3R_DESC_BASE<"asub_u.b", int_mips_asub_u_b,
+ MSA128BOpnd>;
+class ASUB_U_H_DESC : MSA_3R_DESC_BASE<"asub_u.h", int_mips_asub_u_h,
+ MSA128HOpnd>;
+class ASUB_U_W_DESC : MSA_3R_DESC_BASE<"asub_u.w", int_mips_asub_u_w,
+ MSA128WOpnd>;
+class ASUB_U_D_DESC : MSA_3R_DESC_BASE<"asub_u.d", int_mips_asub_u_d,
+ MSA128DOpnd>;
+
+class AVE_S_B_DESC : MSA_3R_DESC_BASE<"ave_s.b", int_mips_ave_s_b, MSA128BOpnd>,
+ IsCommutable;
+class AVE_S_H_DESC : MSA_3R_DESC_BASE<"ave_s.h", int_mips_ave_s_h, MSA128HOpnd>,
+ IsCommutable;
+class AVE_S_W_DESC : MSA_3R_DESC_BASE<"ave_s.w", int_mips_ave_s_w, MSA128WOpnd>,
+ IsCommutable;
+class AVE_S_D_DESC : MSA_3R_DESC_BASE<"ave_s.d", int_mips_ave_s_d, MSA128DOpnd>,
+ IsCommutable;
+
+class AVE_U_B_DESC : MSA_3R_DESC_BASE<"ave_u.b", int_mips_ave_u_b, MSA128BOpnd>,
+ IsCommutable;
+class AVE_U_H_DESC : MSA_3R_DESC_BASE<"ave_u.h", int_mips_ave_u_h, MSA128HOpnd>,
+ IsCommutable;
+class AVE_U_W_DESC : MSA_3R_DESC_BASE<"ave_u.w", int_mips_ave_u_w, MSA128WOpnd>,
+ IsCommutable;
+class AVE_U_D_DESC : MSA_3R_DESC_BASE<"ave_u.d", int_mips_ave_u_d, MSA128DOpnd>,
+ IsCommutable;
+
+class AVER_S_B_DESC : MSA_3R_DESC_BASE<"aver_s.b", int_mips_aver_s_b,
+ MSA128BOpnd>, IsCommutable;
+class AVER_S_H_DESC : MSA_3R_DESC_BASE<"aver_s.h", int_mips_aver_s_h,
+ MSA128HOpnd>, IsCommutable;
+class AVER_S_W_DESC : MSA_3R_DESC_BASE<"aver_s.w", int_mips_aver_s_w,
+ MSA128WOpnd>, IsCommutable;
+class AVER_S_D_DESC : MSA_3R_DESC_BASE<"aver_s.d", int_mips_aver_s_d,
+ MSA128DOpnd>, IsCommutable;
+
+class AVER_U_B_DESC : MSA_3R_DESC_BASE<"aver_u.b", int_mips_aver_u_b,
+ MSA128BOpnd>, IsCommutable;
+class AVER_U_H_DESC : MSA_3R_DESC_BASE<"aver_u.h", int_mips_aver_u_h,
+ MSA128HOpnd>, IsCommutable;
+class AVER_U_W_DESC : MSA_3R_DESC_BASE<"aver_u.w", int_mips_aver_u_w,
+ MSA128WOpnd>, IsCommutable;
+class AVER_U_D_DESC : MSA_3R_DESC_BASE<"aver_u.d", int_mips_aver_u_d,
+ MSA128DOpnd>, IsCommutable;
+
+class BCLR_B_DESC : MSA_3R_DESC_BASE<"bclr.b", vbclr_b, MSA128BOpnd>;
+class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", vbclr_h, MSA128HOpnd>;
+class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", vbclr_w, MSA128WOpnd>;
+class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", vbclr_d, MSA128DOpnd>;
+
+class BCLRI_B_DESC : MSA_BIT_B_DESC_BASE<"bclri.b", and, vsplat_uimm_inv_pow2,
+ MSA128BOpnd>;
+class BCLRI_H_DESC : MSA_BIT_H_DESC_BASE<"bclri.h", and, vsplat_uimm_inv_pow2,
+ MSA128HOpnd>;
+class BCLRI_W_DESC : MSA_BIT_W_DESC_BASE<"bclri.w", and, vsplat_uimm_inv_pow2,
+ MSA128WOpnd>;
+class BCLRI_D_DESC : MSA_BIT_D_DESC_BASE<"bclri.d", and, vsplat_uimm_inv_pow2,
+ MSA128DOpnd>;
+
+class BINSL_B_DESC : MSA_3R_BINSX_DESC_BASE<"binsl.b", int_mips_binsl_b,
+ MSA128BOpnd>;
+class BINSL_H_DESC : MSA_3R_BINSX_DESC_BASE<"binsl.h", int_mips_binsl_h,
+ MSA128HOpnd>;
+class BINSL_W_DESC : MSA_3R_BINSX_DESC_BASE<"binsl.w", int_mips_binsl_w,
+ MSA128WOpnd>;
+class BINSL_D_DESC : MSA_3R_BINSX_DESC_BASE<"binsl.d", int_mips_binsl_d,
+ MSA128DOpnd>;
+
+class BINSLI_B_DESC : MSA_BIT_BINSLI_DESC_BASE<"binsli.b", v16i8, MSA128BOpnd>;
+class BINSLI_H_DESC : MSA_BIT_BINSLI_DESC_BASE<"binsli.h", v8i16, MSA128HOpnd>;
+class BINSLI_W_DESC : MSA_BIT_BINSLI_DESC_BASE<"binsli.w", v4i32, MSA128WOpnd>;
+class BINSLI_D_DESC : MSA_BIT_BINSLI_DESC_BASE<"binsli.d", v2i64, MSA128DOpnd>;
+
+class BINSR_B_DESC : MSA_3R_BINSX_DESC_BASE<"binsr.b", int_mips_binsr_b,
+ MSA128BOpnd>;
+class BINSR_H_DESC : MSA_3R_BINSX_DESC_BASE<"binsr.h", int_mips_binsr_h,
+ MSA128HOpnd>;
+class BINSR_W_DESC : MSA_3R_BINSX_DESC_BASE<"binsr.w", int_mips_binsr_w,
+ MSA128WOpnd>;
+class BINSR_D_DESC : MSA_3R_BINSX_DESC_BASE<"binsr.d", int_mips_binsr_d,
+ MSA128DOpnd>;
+
+class BINSRI_B_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.b", v16i8, MSA128BOpnd>;
+class BINSRI_H_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.h", v8i16, MSA128HOpnd>;
+class BINSRI_W_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.w", v4i32, MSA128WOpnd>;
+class BINSRI_D_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.d", v2i64, MSA128DOpnd>;
+
+class BMNZ_V_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt);
+ string AsmString = "bmnz.v\t$wd, $ws, $wt";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect MSA128BOpnd:$wt,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wd_in))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
+
+class BMNZI_B_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ vsplat_uimm8:$u8);
+ string AsmString = "bmnzi.b\t$wd, $ws, $u8";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect vsplati8_uimm8:$u8,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wd_in))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
+
+class BMZ_V_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt);
+ string AsmString = "bmz.v\t$wd, $ws, $wt";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect MSA128BOpnd:$wt,
+ MSA128BOpnd:$wd_in,
+ MSA128BOpnd:$ws))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
+
+class BMZI_B_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ vsplat_uimm8:$u8);
+ string AsmString = "bmzi.b\t$wd, $ws, $u8";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect vsplati8_uimm8:$u8,
+ MSA128BOpnd:$wd_in,
+ MSA128BOpnd:$ws))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
+
+class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", vbneg_b, MSA128BOpnd>;
+class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", vbneg_h, MSA128HOpnd>;
+class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", vbneg_w, MSA128WOpnd>;
+class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", vbneg_d, MSA128DOpnd>;
+
+class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", xor, vsplat_uimm_pow2, MSA128BOpnd>;
+class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", xor, vsplat_uimm_pow2, MSA128HOpnd>;
+class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", xor, vsplat_uimm_pow2, MSA128WOpnd>;
+class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", xor, vsplat_uimm_pow2, MSA128DOpnd>;
+
+class BNZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bnz.b", MSA128BOpnd>;
+class BNZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bnz.h", MSA128HOpnd>;
+class BNZ_W_DESC : MSA_CBRANCH_DESC_BASE<"bnz.w", MSA128WOpnd>;
+class BNZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bnz.d", MSA128DOpnd>;
+
+class BNZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bnz.v", MSA128BOpnd>;
+
+class BSEL_V_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt);
+ string AsmString = "bsel.v\t$wd, $ws, $wt";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd,
+ (vselect MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
+
+class BSELI_B_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ vsplat_uimm8:$u8);
+ string AsmString = "bseli.b\t$wd, $ws, $u8";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect MSA128BOpnd:$wd_in,
+ MSA128BOpnd:$ws,
+ vsplati8_uimm8:$u8))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
+
+class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", vbset_b, MSA128BOpnd>;
+class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", vbset_h, MSA128HOpnd>;
+class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", vbset_w, MSA128WOpnd>;
+class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", vbset_d, MSA128DOpnd>;
+
+class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", or, vsplat_uimm_pow2,
+ MSA128BOpnd>;
+class BSETI_H_DESC : MSA_BIT_H_DESC_BASE<"bseti.h", or, vsplat_uimm_pow2,
+ MSA128HOpnd>;
+class BSETI_W_DESC : MSA_BIT_W_DESC_BASE<"bseti.w", or, vsplat_uimm_pow2,
+ MSA128WOpnd>;
+class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", or, vsplat_uimm_pow2,
+ MSA128DOpnd>;
+
+class BZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bz.b", MSA128BOpnd>;
+class BZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bz.h", MSA128HOpnd>;
+class BZ_W_DESC : MSA_CBRANCH_DESC_BASE<"bz.w", MSA128WOpnd>;
+class BZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bz.d", MSA128DOpnd>;
+
+class BZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bz.v", MSA128BOpnd>;
+
+class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", vseteq_v16i8, MSA128BOpnd>,
+ IsCommutable;
+class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", vseteq_v8i16, MSA128HOpnd>,
+ IsCommutable;
+class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", vseteq_v4i32, MSA128WOpnd>,
+ IsCommutable;
+class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", vseteq_v2i64, MSA128DOpnd>,
+ IsCommutable;
+
+class CEQI_B_DESC : MSA_I5_DESC_BASE<"ceqi.b", vseteq_v16i8, vsplati8_simm5,
+ MSA128BOpnd>;
+class CEQI_H_DESC : MSA_I5_DESC_BASE<"ceqi.h", vseteq_v8i16, vsplati16_simm5,
+ MSA128HOpnd>;
+class CEQI_W_DESC : MSA_I5_DESC_BASE<"ceqi.w", vseteq_v4i32, vsplati32_simm5,
+ MSA128WOpnd>;
+class CEQI_D_DESC : MSA_I5_DESC_BASE<"ceqi.d", vseteq_v2i64, vsplati64_simm5,
+ MSA128DOpnd>;
+
+class CFCMSA_DESC {
+ dag OutOperandList = (outs GPR32Opnd:$rd);
+ dag InOperandList = (ins MSA128CROpnd:$cs);
+ string AsmString = "cfcmsa\t$rd, $cs";
+ InstrItinClass Itinerary = NoItinerary;
+ bit hasSideEffects = 1;
+}
+
+class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", vsetle_v16i8, MSA128BOpnd>;
+class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", vsetle_v8i16, MSA128HOpnd>;
+class CLE_S_W_DESC : MSA_3R_DESC_BASE<"cle_s.w", vsetle_v4i32, MSA128WOpnd>;
+class CLE_S_D_DESC : MSA_3R_DESC_BASE<"cle_s.d", vsetle_v2i64, MSA128DOpnd>;
+
+class CLE_U_B_DESC : MSA_3R_DESC_BASE<"cle_u.b", vsetule_v16i8, MSA128BOpnd>;
+class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", vsetule_v8i16, MSA128HOpnd>;
+class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", vsetule_v4i32, MSA128WOpnd>;
+class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", vsetule_v2i64, MSA128DOpnd>;
+
+class CLEI_S_B_DESC : MSA_I5_DESC_BASE<"clei_s.b", vsetle_v16i8,
+ vsplati8_simm5, MSA128BOpnd>;
+class CLEI_S_H_DESC : MSA_I5_DESC_BASE<"clei_s.h", vsetle_v8i16,
+ vsplati16_simm5, MSA128HOpnd>;
+class CLEI_S_W_DESC : MSA_I5_DESC_BASE<"clei_s.w", vsetle_v4i32,
+ vsplati32_simm5, MSA128WOpnd>;
+class CLEI_S_D_DESC : MSA_I5_DESC_BASE<"clei_s.d", vsetle_v2i64,
+ vsplati64_simm5, MSA128DOpnd>;
+
+class CLEI_U_B_DESC : MSA_I5_DESC_BASE<"clei_u.b", vsetule_v16i8,
+ vsplati8_uimm5, MSA128BOpnd>;
+class CLEI_U_H_DESC : MSA_I5_DESC_BASE<"clei_u.h", vsetule_v8i16,
+ vsplati16_uimm5, MSA128HOpnd>;
+class CLEI_U_W_DESC : MSA_I5_DESC_BASE<"clei_u.w", vsetule_v4i32,
+ vsplati32_uimm5, MSA128WOpnd>;
+class CLEI_U_D_DESC : MSA_I5_DESC_BASE<"clei_u.d", vsetule_v2i64,
+ vsplati64_uimm5, MSA128DOpnd>;
+
+class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", vsetlt_v16i8, MSA128BOpnd>;
+class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", vsetlt_v8i16, MSA128HOpnd>;
+class CLT_S_W_DESC : MSA_3R_DESC_BASE<"clt_s.w", vsetlt_v4i32, MSA128WOpnd>;
+class CLT_S_D_DESC : MSA_3R_DESC_BASE<"clt_s.d", vsetlt_v2i64, MSA128DOpnd>;
+
+class CLT_U_B_DESC : MSA_3R_DESC_BASE<"clt_u.b", vsetult_v16i8, MSA128BOpnd>;
+class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", vsetult_v8i16, MSA128HOpnd>;
+class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", vsetult_v4i32, MSA128WOpnd>;
+class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", vsetult_v2i64, MSA128DOpnd>;
+
+class CLTI_S_B_DESC : MSA_I5_DESC_BASE<"clti_s.b", vsetlt_v16i8,
+ vsplati8_simm5, MSA128BOpnd>;
+class CLTI_S_H_DESC : MSA_I5_DESC_BASE<"clti_s.h", vsetlt_v8i16,
+ vsplati16_simm5, MSA128HOpnd>;
+class CLTI_S_W_DESC : MSA_I5_DESC_BASE<"clti_s.w", vsetlt_v4i32,
+ vsplati32_simm5, MSA128WOpnd>;
+class CLTI_S_D_DESC : MSA_I5_DESC_BASE<"clti_s.d", vsetlt_v2i64,
+ vsplati64_simm5, MSA128DOpnd>;
+
+class CLTI_U_B_DESC : MSA_I5_DESC_BASE<"clti_u.b", vsetult_v16i8,
+ vsplati8_uimm5, MSA128BOpnd>;
+class CLTI_U_H_DESC : MSA_I5_DESC_BASE<"clti_u.h", vsetult_v8i16,
+ vsplati16_uimm5, MSA128HOpnd>;
+class CLTI_U_W_DESC : MSA_I5_DESC_BASE<"clti_u.w", vsetult_v4i32,
+ vsplati32_uimm5, MSA128WOpnd>;
+class CLTI_U_D_DESC : MSA_I5_DESC_BASE<"clti_u.d", vsetult_v2i64,
+ vsplati64_uimm5, MSA128DOpnd>;
+
+class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", vextract_sext_i8, v16i8,
+ GPR32Opnd, MSA128BOpnd>;
+class COPY_S_H_DESC : MSA_COPY_DESC_BASE<"copy_s.h", vextract_sext_i16, v8i16,
+ GPR32Opnd, MSA128HOpnd>;
+class COPY_S_W_DESC : MSA_COPY_DESC_BASE<"copy_s.w", vextract_sext_i32, v4i32,
+ GPR32Opnd, MSA128WOpnd>;
+
+class COPY_U_B_DESC : MSA_COPY_DESC_BASE<"copy_u.b", vextract_zext_i8, v16i8,
+ GPR32Opnd, MSA128BOpnd>;
+class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", vextract_zext_i16, v8i16,
+ GPR32Opnd, MSA128HOpnd>;
+class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", vextract_zext_i32, v4i32,
+ GPR32Opnd, MSA128WOpnd>;
+
+class COPY_FW_PSEUDO_DESC : MSA_COPY_PSEUDO_BASE<vector_extract, v4f32, FGR32,
+ MSA128W>;
+class COPY_FD_PSEUDO_DESC : MSA_COPY_PSEUDO_BASE<vector_extract, v2f64, FGR64,
+ MSA128D>;
+
+class CTCMSA_DESC {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins MSA128CROpnd:$cd, GPR32Opnd:$rs);
+ string AsmString = "ctcmsa\t$cd, $rs";
+ InstrItinClass Itinerary = NoItinerary;
+ bit hasSideEffects = 1;
+}
+
+class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", sdiv, MSA128BOpnd>;
+class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", sdiv, MSA128HOpnd>;
+class DIV_S_W_DESC : MSA_3R_DESC_BASE<"div_s.w", sdiv, MSA128WOpnd>;
+class DIV_S_D_DESC : MSA_3R_DESC_BASE<"div_s.d", sdiv, MSA128DOpnd>;
+
+class DIV_U_B_DESC : MSA_3R_DESC_BASE<"div_u.b", udiv, MSA128BOpnd>;
+class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", udiv, MSA128HOpnd>;
+class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", udiv, MSA128WOpnd>;
+class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", udiv, MSA128DOpnd>;
+
+class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h,
+ MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>,
+ IsCommutable;
+class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w,
+ MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>,
+ IsCommutable;
+class DOTP_S_D_DESC : MSA_3R_DESC_BASE<"dotp_s.d", int_mips_dotp_s_d,
+ MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>,
+ IsCommutable;
+
+class DOTP_U_H_DESC : MSA_3R_DESC_BASE<"dotp_u.h", int_mips_dotp_u_h,
+ MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>,
+ IsCommutable;
+class DOTP_U_W_DESC : MSA_3R_DESC_BASE<"dotp_u.w", int_mips_dotp_u_w,
+ MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>,
+ IsCommutable;
+class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d,
+ MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>,
+ IsCommutable;
+
+class DPADD_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.h", int_mips_dpadd_s_h,
+ MSA128HOpnd, MSA128BOpnd,
+ MSA128BOpnd>, IsCommutable;
+class DPADD_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.w", int_mips_dpadd_s_w,
+ MSA128WOpnd, MSA128HOpnd,
+ MSA128HOpnd>, IsCommutable;
+class DPADD_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.d", int_mips_dpadd_s_d,
+ MSA128DOpnd, MSA128WOpnd,
+ MSA128WOpnd>, IsCommutable;
+
+class DPADD_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.h", int_mips_dpadd_u_h,
+ MSA128HOpnd, MSA128BOpnd,
+ MSA128BOpnd>, IsCommutable;
+class DPADD_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.w", int_mips_dpadd_u_w,
+ MSA128WOpnd, MSA128HOpnd,
+ MSA128HOpnd>, IsCommutable;
+class DPADD_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.d", int_mips_dpadd_u_d,
+ MSA128DOpnd, MSA128WOpnd,
+ MSA128WOpnd>, IsCommutable;
+
+class DPSUB_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.h", int_mips_dpsub_s_h,
+ MSA128HOpnd, MSA128BOpnd,
+ MSA128BOpnd>;
+class DPSUB_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.w", int_mips_dpsub_s_w,
+ MSA128WOpnd, MSA128HOpnd,
+ MSA128HOpnd>;
+class DPSUB_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.d", int_mips_dpsub_s_d,
+ MSA128DOpnd, MSA128WOpnd,
+ MSA128WOpnd>;
+
+class DPSUB_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.h", int_mips_dpsub_u_h,
+ MSA128HOpnd, MSA128BOpnd,
+ MSA128BOpnd>;
+class DPSUB_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.w", int_mips_dpsub_u_w,
+ MSA128WOpnd, MSA128HOpnd,
+ MSA128HOpnd>;
+class DPSUB_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.d", int_mips_dpsub_u_d,
+ MSA128DOpnd, MSA128WOpnd,
+ MSA128WOpnd>;
+
+class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", fadd, MSA128WOpnd>,
+ IsCommutable;
+class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", fadd, MSA128DOpnd>,
+ IsCommutable;
+
+class FCAF_W_DESC : MSA_3RF_DESC_BASE<"fcaf.w", int_mips_fcaf_w, MSA128WOpnd>,
+ IsCommutable;
+class FCAF_D_DESC : MSA_3RF_DESC_BASE<"fcaf.d", int_mips_fcaf_d, MSA128DOpnd>,
+ IsCommutable;
+
+class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", vfsetoeq_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", vfsetoeq_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w,
+ MSA128WOpnd>;
+class FCLASS_D_DESC : MSA_2RF_DESC_BASE<"fclass.d", int_mips_fclass_d,
+ MSA128DOpnd>;
+
+class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", vfsetole_v4f32, MSA128WOpnd>;
+class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", vfsetole_v2f64, MSA128DOpnd>;
+
+class FCLT_W_DESC : MSA_3RF_DESC_BASE<"fclt.w", vfsetolt_v4f32, MSA128WOpnd>;
+class FCLT_D_DESC : MSA_3RF_DESC_BASE<"fclt.d", vfsetolt_v2f64, MSA128DOpnd>;
+
+class FCNE_W_DESC : MSA_3RF_DESC_BASE<"fcne.w", vfsetone_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", vfsetone_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCOR_W_DESC : MSA_3RF_DESC_BASE<"fcor.w", vfsetord_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCOR_D_DESC : MSA_3RF_DESC_BASE<"fcor.d", vfsetord_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCUEQ_W_DESC : MSA_3RF_DESC_BASE<"fcueq.w", vfsetueq_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCUEQ_D_DESC : MSA_3RF_DESC_BASE<"fcueq.d", vfsetueq_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCULE_W_DESC : MSA_3RF_DESC_BASE<"fcule.w", vfsetule_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCULE_D_DESC : MSA_3RF_DESC_BASE<"fcule.d", vfsetule_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCULT_W_DESC : MSA_3RF_DESC_BASE<"fcult.w", vfsetult_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCULT_D_DESC : MSA_3RF_DESC_BASE<"fcult.d", vfsetult_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", vfsetun_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", vfsetun_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCUNE_W_DESC : MSA_3RF_DESC_BASE<"fcune.w", vfsetune_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCUNE_D_DESC : MSA_3RF_DESC_BASE<"fcune.d", vfsetune_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", fdiv, MSA128WOpnd>;
+class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", fdiv, MSA128DOpnd>;
+
+class FEXDO_H_DESC : MSA_3RF_DESC_BASE<"fexdo.h", int_mips_fexdo_h,
+ MSA128HOpnd, MSA128WOpnd, MSA128WOpnd>;
+class FEXDO_W_DESC : MSA_3RF_DESC_BASE<"fexdo.w", int_mips_fexdo_w,
+ MSA128WOpnd, MSA128DOpnd, MSA128DOpnd>;
+
+// The fexp2.df instruction multiplies the first operand by 2 to the power of
+// the second operand. We therefore need a pseudo-insn in order to invent the
+// 1.0 when we only need to match ISD::FEXP2.
+class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", mul_fexp2, MSA128WOpnd>;
+class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", mul_fexp2, MSA128DOpnd>;
+let usesCustomInserter = 1 in {
+ class FEXP2_W_1_PSEUDO_DESC :
+ MipsPseudo<(outs MSA128W:$wd), (ins MSA128W:$ws),
+ [(set MSA128W:$wd, (fexp2 MSA128W:$ws))]>;
+ class FEXP2_D_1_PSEUDO_DESC :
+ MipsPseudo<(outs MSA128D:$wd), (ins MSA128D:$ws),
+ [(set MSA128D:$wd, (fexp2 MSA128D:$ws))]>;
+}
+
+class FEXUPL_W_DESC : MSA_2RF_DESC_BASE<"fexupl.w", int_mips_fexupl_w,
+ MSA128WOpnd, MSA128HOpnd>;
+class FEXUPL_D_DESC : MSA_2RF_DESC_BASE<"fexupl.d", int_mips_fexupl_d,
+ MSA128DOpnd, MSA128WOpnd>;
+
+class FEXUPR_W_DESC : MSA_2RF_DESC_BASE<"fexupr.w", int_mips_fexupr_w,
+ MSA128WOpnd, MSA128HOpnd>;
+class FEXUPR_D_DESC : MSA_2RF_DESC_BASE<"fexupr.d", int_mips_fexupr_d,
+ MSA128DOpnd, MSA128WOpnd>;
+
+class FFINT_S_W_DESC : MSA_2RF_DESC_BASE<"ffint_s.w", sint_to_fp, MSA128WOpnd>;
+class FFINT_S_D_DESC : MSA_2RF_DESC_BASE<"ffint_s.d", sint_to_fp, MSA128DOpnd>;
+
+class FFINT_U_W_DESC : MSA_2RF_DESC_BASE<"ffint_u.w", uint_to_fp, MSA128WOpnd>;
+class FFINT_U_D_DESC : MSA_2RF_DESC_BASE<"ffint_u.d", uint_to_fp, MSA128DOpnd>;
+
+class FFQL_W_DESC : MSA_2RF_DESC_BASE<"ffql.w", int_mips_ffql_w,
+ MSA128WOpnd, MSA128HOpnd>;
+class FFQL_D_DESC : MSA_2RF_DESC_BASE<"ffql.d", int_mips_ffql_d,
+ MSA128DOpnd, MSA128WOpnd>;
+
+class FFQR_W_DESC : MSA_2RF_DESC_BASE<"ffqr.w", int_mips_ffqr_w,
+ MSA128WOpnd, MSA128HOpnd>;
+class FFQR_D_DESC : MSA_2RF_DESC_BASE<"ffqr.d", int_mips_ffqr_d,
+ MSA128DOpnd, MSA128WOpnd>;
+
+class FILL_B_DESC : MSA_2R_FILL_DESC_BASE<"fill.b", v16i8, vsplati8,
+ MSA128BOpnd, GPR32Opnd>;
+class FILL_H_DESC : MSA_2R_FILL_DESC_BASE<"fill.h", v8i16, vsplati16,
+ MSA128HOpnd, GPR32Opnd>;
+class FILL_W_DESC : MSA_2R_FILL_DESC_BASE<"fill.w", v4i32, vsplati32,
+ MSA128WOpnd, GPR32Opnd>;
+
+class FILL_FW_PSEUDO_DESC : MSA_2R_FILL_PSEUDO_BASE<v4f32, vsplatf32, MSA128W,
+ FGR32>;
+class FILL_FD_PSEUDO_DESC : MSA_2R_FILL_PSEUDO_BASE<v2f64, vsplatf64, MSA128D,
+ FGR64>;
+
+class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128WOpnd>;
+class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128DOpnd>;
+
+class FMADD_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.w", fma, MSA128WOpnd>;
+class FMADD_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.d", fma, MSA128DOpnd>;
+
+class FMAX_W_DESC : MSA_3RF_DESC_BASE<"fmax.w", int_mips_fmax_w, MSA128WOpnd>;
+class FMAX_D_DESC : MSA_3RF_DESC_BASE<"fmax.d", int_mips_fmax_d, MSA128DOpnd>;
+
+class FMAX_A_W_DESC : MSA_3RF_DESC_BASE<"fmax_a.w", int_mips_fmax_a_w,
+ MSA128WOpnd>;
+class FMAX_A_D_DESC : MSA_3RF_DESC_BASE<"fmax_a.d", int_mips_fmax_a_d,
+ MSA128DOpnd>;
+
+class FMIN_W_DESC : MSA_3RF_DESC_BASE<"fmin.w", int_mips_fmin_w, MSA128WOpnd>;
+class FMIN_D_DESC : MSA_3RF_DESC_BASE<"fmin.d", int_mips_fmin_d, MSA128DOpnd>;
+
+class FMIN_A_W_DESC : MSA_3RF_DESC_BASE<"fmin_a.w", int_mips_fmin_a_w,
+ MSA128WOpnd>;
+class FMIN_A_D_DESC : MSA_3RF_DESC_BASE<"fmin_a.d", int_mips_fmin_a_d,
+ MSA128DOpnd>;
+
+class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", fms, MSA128WOpnd>;
+class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", fms, MSA128DOpnd>;
+
+class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", fmul, MSA128WOpnd>;
+class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", fmul, MSA128DOpnd>;
+
+class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", frint, MSA128WOpnd>;
+class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", frint, MSA128DOpnd>;
+
+class FRCP_W_DESC : MSA_2RF_DESC_BASE<"frcp.w", int_mips_frcp_w, MSA128WOpnd>;
+class FRCP_D_DESC : MSA_2RF_DESC_BASE<"frcp.d", int_mips_frcp_d, MSA128DOpnd>;
+
+class FRSQRT_W_DESC : MSA_2RF_DESC_BASE<"frsqrt.w", int_mips_frsqrt_w,
+ MSA128WOpnd>;
+class FRSQRT_D_DESC : MSA_2RF_DESC_BASE<"frsqrt.d", int_mips_frsqrt_d,
+ MSA128DOpnd>;
+
+class FSAF_W_DESC : MSA_3RF_DESC_BASE<"fsaf.w", int_mips_fsaf_w, MSA128WOpnd>;
+class FSAF_D_DESC : MSA_3RF_DESC_BASE<"fsaf.d", int_mips_fsaf_d, MSA128DOpnd>;
+
+class FSEQ_W_DESC : MSA_3RF_DESC_BASE<"fseq.w", int_mips_fseq_w, MSA128WOpnd>;
+class FSEQ_D_DESC : MSA_3RF_DESC_BASE<"fseq.d", int_mips_fseq_d, MSA128DOpnd>;
+
+class FSLE_W_DESC : MSA_3RF_DESC_BASE<"fsle.w", int_mips_fsle_w, MSA128WOpnd>;
+class FSLE_D_DESC : MSA_3RF_DESC_BASE<"fsle.d", int_mips_fsle_d, MSA128DOpnd>;
+
+class FSLT_W_DESC : MSA_3RF_DESC_BASE<"fslt.w", int_mips_fslt_w, MSA128WOpnd>;
+class FSLT_D_DESC : MSA_3RF_DESC_BASE<"fslt.d", int_mips_fslt_d, MSA128DOpnd>;
+
+class FSNE_W_DESC : MSA_3RF_DESC_BASE<"fsne.w", int_mips_fsne_w, MSA128WOpnd>;
+class FSNE_D_DESC : MSA_3RF_DESC_BASE<"fsne.d", int_mips_fsne_d, MSA128DOpnd>;
+
+class FSOR_W_DESC : MSA_3RF_DESC_BASE<"fsor.w", int_mips_fsor_w, MSA128WOpnd>;
+class FSOR_D_DESC : MSA_3RF_DESC_BASE<"fsor.d", int_mips_fsor_d, MSA128DOpnd>;
+
+class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", fsqrt, MSA128WOpnd>;
+class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", fsqrt, MSA128DOpnd>;
+
+class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", fsub, MSA128WOpnd>;
+class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", fsub, MSA128DOpnd>;
+
+class FSUEQ_W_DESC : MSA_3RF_DESC_BASE<"fsueq.w", int_mips_fsueq_w,
+ MSA128WOpnd>;
+class FSUEQ_D_DESC : MSA_3RF_DESC_BASE<"fsueq.d", int_mips_fsueq_d,
+ MSA128DOpnd>;
+
+class FSULE_W_DESC : MSA_3RF_DESC_BASE<"fsule.w", int_mips_fsule_w,
+ MSA128WOpnd>;
+class FSULE_D_DESC : MSA_3RF_DESC_BASE<"fsule.d", int_mips_fsule_d,
+ MSA128DOpnd>;
+
+class FSULT_W_DESC : MSA_3RF_DESC_BASE<"fsult.w", int_mips_fsult_w,
+ MSA128WOpnd>;
+class FSULT_D_DESC : MSA_3RF_DESC_BASE<"fsult.d", int_mips_fsult_d,
+ MSA128DOpnd>;
+
+class FSUN_W_DESC : MSA_3RF_DESC_BASE<"fsun.w", int_mips_fsun_w,
+ MSA128WOpnd>;
+class FSUN_D_DESC : MSA_3RF_DESC_BASE<"fsun.d", int_mips_fsun_d,
+ MSA128DOpnd>;
+
+class FSUNE_W_DESC : MSA_3RF_DESC_BASE<"fsune.w", int_mips_fsune_w,
+ MSA128WOpnd>;
+class FSUNE_D_DESC : MSA_3RF_DESC_BASE<"fsune.d", int_mips_fsune_d,
+ MSA128DOpnd>;
+
+class FTINT_S_W_DESC : MSA_2RF_DESC_BASE<"ftint_s.w", int_mips_ftint_s_w,
+ MSA128WOpnd>;
+class FTINT_S_D_DESC : MSA_2RF_DESC_BASE<"ftint_s.d", int_mips_ftint_s_d,
+ MSA128DOpnd>;
+
+class FTINT_U_W_DESC : MSA_2RF_DESC_BASE<"ftint_u.w", int_mips_ftint_u_w,
+ MSA128WOpnd>;
+class FTINT_U_D_DESC : MSA_2RF_DESC_BASE<"ftint_u.d", int_mips_ftint_u_d,
+ MSA128DOpnd>;
+
+class FTQ_H_DESC : MSA_3RF_DESC_BASE<"ftq.h", int_mips_ftq_h,
+ MSA128HOpnd, MSA128WOpnd, MSA128WOpnd>;
+class FTQ_W_DESC : MSA_3RF_DESC_BASE<"ftq.w", int_mips_ftq_w,
+ MSA128WOpnd, MSA128DOpnd, MSA128DOpnd>;
+
+class FTRUNC_S_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.w", fp_to_sint,
+ MSA128WOpnd>;
+class FTRUNC_S_D_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.d", fp_to_sint,
+ MSA128DOpnd>;
+
+class FTRUNC_U_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_u.w", fp_to_uint,
+ MSA128WOpnd>;
+class FTRUNC_U_D_DESC : MSA_2RF_DESC_BASE<"ftrunc_u.d", fp_to_uint,
+ MSA128DOpnd>;
+
+class HADD_S_H_DESC : MSA_3R_DESC_BASE<"hadd_s.h", int_mips_hadd_s_h,
+ MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>;
+class HADD_S_W_DESC : MSA_3R_DESC_BASE<"hadd_s.w", int_mips_hadd_s_w,
+ MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>;
+class HADD_S_D_DESC : MSA_3R_DESC_BASE<"hadd_s.d", int_mips_hadd_s_d,
+ MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>;
+
+class HADD_U_H_DESC : MSA_3R_DESC_BASE<"hadd_u.h", int_mips_hadd_u_h,
+ MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>;
+class HADD_U_W_DESC : MSA_3R_DESC_BASE<"hadd_u.w", int_mips_hadd_u_w,
+ MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>;
+class HADD_U_D_DESC : MSA_3R_DESC_BASE<"hadd_u.d", int_mips_hadd_u_d,
+ MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>;
+
+class HSUB_S_H_DESC : MSA_3R_DESC_BASE<"hsub_s.h", int_mips_hsub_s_h,
+ MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>;
+class HSUB_S_W_DESC : MSA_3R_DESC_BASE<"hsub_s.w", int_mips_hsub_s_w,
+ MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>;
+class HSUB_S_D_DESC : MSA_3R_DESC_BASE<"hsub_s.d", int_mips_hsub_s_d,
+ MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>;
+
+class HSUB_U_H_DESC : MSA_3R_DESC_BASE<"hsub_u.h", int_mips_hsub_u_h,
+ MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>;
+class HSUB_U_W_DESC : MSA_3R_DESC_BASE<"hsub_u.w", int_mips_hsub_u_w,
+ MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>;
+class HSUB_U_D_DESC : MSA_3R_DESC_BASE<"hsub_u.d", int_mips_hsub_u_d,
+ MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>;
+
+class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", MipsILVEV, MSA128BOpnd>;
+class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", MipsILVEV, MSA128HOpnd>;
+class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", MipsILVEV, MSA128WOpnd>;
+class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", MipsILVEV, MSA128DOpnd>;
+
+class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", MipsILVL, MSA128BOpnd>;
+class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", MipsILVL, MSA128HOpnd>;
+class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", MipsILVL, MSA128WOpnd>;
+class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", MipsILVL, MSA128DOpnd>;
+
+class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", MipsILVOD, MSA128BOpnd>;
+class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", MipsILVOD, MSA128HOpnd>;
+class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", MipsILVOD, MSA128WOpnd>;
+class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", MipsILVOD, MSA128DOpnd>;
+
+class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", MipsILVR, MSA128BOpnd>;
+class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", MipsILVR, MSA128HOpnd>;
+class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", MipsILVR, MSA128WOpnd>;
+class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", MipsILVR, MSA128DOpnd>;
+
+class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", vinsert_v16i8,
+ MSA128BOpnd, GPR32Opnd>;
+class INSERT_H_DESC : MSA_INSERT_DESC_BASE<"insert.h", vinsert_v8i16,
+ MSA128HOpnd, GPR32Opnd>;
+class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", vinsert_v4i32,
+ MSA128WOpnd, GPR32Opnd>;
+
+class INSERT_FW_PSEUDO_DESC : MSA_INSERT_PSEUDO_BASE<vector_insert, v4f32,
+ MSA128WOpnd, FGR32Opnd>;
+class INSERT_FD_PSEUDO_DESC : MSA_INSERT_PSEUDO_BASE<vector_insert, v2f64,
+ MSA128DOpnd, FGR64Opnd>;
+
+class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", int_mips_insve_b,
+ MSA128BOpnd>;
+class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", int_mips_insve_h,
+ MSA128HOpnd>;
+class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", int_mips_insve_w,
+ MSA128WOpnd>;
+class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", int_mips_insve_d,
+ MSA128DOpnd>;
+
+class LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ValueType TyNode, RegisterOperand ROWD,
+ Operand MemOpnd = mem, ComplexPattern Addr = addrRegImm,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins MemOpnd:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $addr");
+ list<dag> Pattern = [(set ROWD:$wd, (TyNode (OpNode Addr:$addr)))];
+ InstrItinClass Itinerary = itin;
+ string DecoderMethod = "DecodeMSA128Mem";
+}
+
+class LD_B_DESC : LD_DESC_BASE<"ld.b", load, v16i8, MSA128BOpnd>;
+class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, MSA128HOpnd>;
+class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, MSA128WOpnd>;
+class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, MSA128DOpnd>;
+
+class LDI_B_DESC : MSA_I10_LDI_DESC_BASE<"ldi.b", MSA128BOpnd>;
+class LDI_H_DESC : MSA_I10_LDI_DESC_BASE<"ldi.h", MSA128HOpnd>;
+class LDI_W_DESC : MSA_I10_LDI_DESC_BASE<"ldi.w", MSA128WOpnd>;
+class LDI_D_DESC : MSA_I10_LDI_DESC_BASE<"ldi.d", MSA128DOpnd>;
+
+class LSA_DESC {
+ dag OutOperandList = (outs GPR32Opnd:$rd);
+ dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt, LSAImm:$sa);
+ string AsmString = "lsa\t$rd, $rs, $rt, $sa";
+ list<dag> Pattern = [(set GPR32Opnd:$rd, (add GPR32Opnd:$rs,
+ (shl GPR32Opnd:$rt,
+ immZExt2Lsa:$sa)))];
+ InstrItinClass Itinerary = NoItinerary;
+}
+
+class MADD_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.h", int_mips_madd_q_h,
+ MSA128HOpnd>;
+class MADD_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.w", int_mips_madd_q_w,
+ MSA128WOpnd>;
+
+class MADDR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.h", int_mips_maddr_q_h,
+ MSA128HOpnd>;
+class MADDR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.w", int_mips_maddr_q_w,
+ MSA128WOpnd>;
+
+class MADDV_B_DESC : MSA_3R_4R_DESC_BASE<"maddv.b", muladd, MSA128BOpnd>;
+class MADDV_H_DESC : MSA_3R_4R_DESC_BASE<"maddv.h", muladd, MSA128HOpnd>;
+class MADDV_W_DESC : MSA_3R_4R_DESC_BASE<"maddv.w", muladd, MSA128WOpnd>;
+class MADDV_D_DESC : MSA_3R_4R_DESC_BASE<"maddv.d", muladd, MSA128DOpnd>;
+
+class MAX_A_B_DESC : MSA_3R_DESC_BASE<"max_a.b", int_mips_max_a_b, MSA128BOpnd>;
+class MAX_A_H_DESC : MSA_3R_DESC_BASE<"max_a.h", int_mips_max_a_h, MSA128HOpnd>;
+class MAX_A_W_DESC : MSA_3R_DESC_BASE<"max_a.w", int_mips_max_a_w, MSA128WOpnd>;
+class MAX_A_D_DESC : MSA_3R_DESC_BASE<"max_a.d", int_mips_max_a_d, MSA128DOpnd>;
+
+class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", MipsVSMax, MSA128BOpnd>;
+class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", MipsVSMax, MSA128HOpnd>;
+class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", MipsVSMax, MSA128WOpnd>;
+class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", MipsVSMax, MSA128DOpnd>;
+
+class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", MipsVUMax, MSA128BOpnd>;
+class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", MipsVUMax, MSA128HOpnd>;
+class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", MipsVUMax, MSA128WOpnd>;
+class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", MipsVUMax, MSA128DOpnd>;
+
+class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", MipsVSMax, vsplati8_simm5,
+ MSA128BOpnd>;
+class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", MipsVSMax, vsplati16_simm5,
+ MSA128HOpnd>;
+class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", MipsVSMax, vsplati32_simm5,
+ MSA128WOpnd>;
+class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", MipsVSMax, vsplati64_simm5,
+ MSA128DOpnd>;
+
+class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", MipsVUMax, vsplati8_uimm5,
+ MSA128BOpnd>;
+class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", MipsVUMax, vsplati16_uimm5,
+ MSA128HOpnd>;
+class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", MipsVUMax, vsplati32_uimm5,
+ MSA128WOpnd>;
+class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", MipsVUMax, vsplati64_uimm5,
+ MSA128DOpnd>;
+
+class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, MSA128BOpnd>;
+class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, MSA128HOpnd>;
+class MIN_A_W_DESC : MSA_3R_DESC_BASE<"min_a.w", int_mips_min_a_w, MSA128WOpnd>;
+class MIN_A_D_DESC : MSA_3R_DESC_BASE<"min_a.d", int_mips_min_a_d, MSA128DOpnd>;
+
+class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", MipsVSMin, MSA128BOpnd>;
+class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", MipsVSMin, MSA128HOpnd>;
+class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", MipsVSMin, MSA128WOpnd>;
+class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", MipsVSMin, MSA128DOpnd>;
+
+class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", MipsVUMin, MSA128BOpnd>;
+class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", MipsVUMin, MSA128HOpnd>;
+class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", MipsVUMin, MSA128WOpnd>;
+class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", MipsVUMin, MSA128DOpnd>;
+
+class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", MipsVSMin, vsplati8_simm5,
+ MSA128BOpnd>;
+class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", MipsVSMin, vsplati16_simm5,
+ MSA128HOpnd>;
+class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", MipsVSMin, vsplati32_simm5,
+ MSA128WOpnd>;
+class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", MipsVSMin, vsplati64_simm5,
+ MSA128DOpnd>;
+
+class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", MipsVUMin, vsplati8_uimm5,
+ MSA128BOpnd>;
+class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", MipsVUMin, vsplati16_uimm5,
+ MSA128HOpnd>;
+class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", MipsVUMin, vsplati32_uimm5,
+ MSA128WOpnd>;
+class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", MipsVUMin, vsplati64_uimm5,
+ MSA128DOpnd>;
+
+class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", srem, MSA128BOpnd>;
+class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", srem, MSA128HOpnd>;
+class MOD_S_W_DESC : MSA_3R_DESC_BASE<"mod_s.w", srem, MSA128WOpnd>;
+class MOD_S_D_DESC : MSA_3R_DESC_BASE<"mod_s.d", srem, MSA128DOpnd>;
+
+class MOD_U_B_DESC : MSA_3R_DESC_BASE<"mod_u.b", urem, MSA128BOpnd>;
+class MOD_U_H_DESC : MSA_3R_DESC_BASE<"mod_u.h", urem, MSA128HOpnd>;
+class MOD_U_W_DESC : MSA_3R_DESC_BASE<"mod_u.w", urem, MSA128WOpnd>;
+class MOD_U_D_DESC : MSA_3R_DESC_BASE<"mod_u.d", urem, MSA128DOpnd>;
+
+class MOVE_V_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$ws);
+ string AsmString = "move.v\t$wd, $ws";
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = NoItinerary;
+}
+
+class MSUB_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.h", int_mips_msub_q_h,
+ MSA128HOpnd>;
+class MSUB_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.w", int_mips_msub_q_w,
+ MSA128WOpnd>;
+
+class MSUBR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.h", int_mips_msubr_q_h,
+ MSA128HOpnd>;
+class MSUBR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.w", int_mips_msubr_q_w,
+ MSA128WOpnd>;
+
+class MSUBV_B_DESC : MSA_3R_4R_DESC_BASE<"msubv.b", mulsub, MSA128BOpnd>;
+class MSUBV_H_DESC : MSA_3R_4R_DESC_BASE<"msubv.h", mulsub, MSA128HOpnd>;
+class MSUBV_W_DESC : MSA_3R_4R_DESC_BASE<"msubv.w", mulsub, MSA128WOpnd>;
+class MSUBV_D_DESC : MSA_3R_4R_DESC_BASE<"msubv.d", mulsub, MSA128DOpnd>;
+
+class MUL_Q_H_DESC : MSA_3RF_DESC_BASE<"mul_q.h", int_mips_mul_q_h,
+ MSA128HOpnd>;
+class MUL_Q_W_DESC : MSA_3RF_DESC_BASE<"mul_q.w", int_mips_mul_q_w,
+ MSA128WOpnd>;
+
+class MULR_Q_H_DESC : MSA_3RF_DESC_BASE<"mulr_q.h", int_mips_mulr_q_h,
+ MSA128HOpnd>;
+class MULR_Q_W_DESC : MSA_3RF_DESC_BASE<"mulr_q.w", int_mips_mulr_q_w,
+ MSA128WOpnd>;
+
+class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", mul, MSA128BOpnd>;
+class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", mul, MSA128HOpnd>;
+class MULV_W_DESC : MSA_3R_DESC_BASE<"mulv.w", mul, MSA128WOpnd>;
+class MULV_D_DESC : MSA_3R_DESC_BASE<"mulv.d", mul, MSA128DOpnd>;
+
+class NLOC_B_DESC : MSA_2R_DESC_BASE<"nloc.b", int_mips_nloc_b, MSA128BOpnd>;
+class NLOC_H_DESC : MSA_2R_DESC_BASE<"nloc.h", int_mips_nloc_h, MSA128HOpnd>;
+class NLOC_W_DESC : MSA_2R_DESC_BASE<"nloc.w", int_mips_nloc_w, MSA128WOpnd>;
+class NLOC_D_DESC : MSA_2R_DESC_BASE<"nloc.d", int_mips_nloc_d, MSA128DOpnd>;
+
+class NLZC_B_DESC : MSA_2R_DESC_BASE<"nlzc.b", ctlz, MSA128BOpnd>;
+class NLZC_H_DESC : MSA_2R_DESC_BASE<"nlzc.h", ctlz, MSA128HOpnd>;
+class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", ctlz, MSA128WOpnd>;
+class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", ctlz, MSA128DOpnd>;
+
+class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", MipsVNOR, MSA128BOpnd>;
+class NOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<MipsVNOR, MSA128HOpnd>;
+class NOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<MipsVNOR, MSA128WOpnd>;
+class NOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<MipsVNOR, MSA128DOpnd>;
+
+class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", MipsVNOR, vsplati8_uimm8,
+ MSA128BOpnd>;
+
+class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", or, MSA128BOpnd>;
+class OR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<or, MSA128HOpnd>;
+class OR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<or, MSA128WOpnd>;
+class OR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<or, MSA128DOpnd>;
+
+class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", or, vsplati8_uimm8, MSA128BOpnd>;
+
+class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", MipsPCKEV, MSA128BOpnd>;
+class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", MipsPCKEV, MSA128HOpnd>;
+class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", MipsPCKEV, MSA128WOpnd>;
+class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", MipsPCKEV, MSA128DOpnd>;
+
+class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", MipsPCKOD, MSA128BOpnd>;
+class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", MipsPCKOD, MSA128HOpnd>;
+class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", MipsPCKOD, MSA128WOpnd>;
+class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", MipsPCKOD, MSA128DOpnd>;
+
+class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", ctpop, MSA128BOpnd>;
+class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", ctpop, MSA128HOpnd>;
+class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>;
+class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>;
+
+class SAT_S_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b,
+ MSA128BOpnd>;
+class SAT_S_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h,
+ MSA128HOpnd>;
+class SAT_S_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w,
+ MSA128WOpnd>;
+class SAT_S_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d,
+ MSA128DOpnd>;
+
+class SAT_U_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b,
+ MSA128BOpnd>;
+class SAT_U_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h,
+ MSA128HOpnd>;
+class SAT_U_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w,
+ MSA128WOpnd>;
+class SAT_U_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d,
+ MSA128DOpnd>;
+
+class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>;
+class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>;
+class SHF_W_DESC : MSA_I8_SHF_DESC_BASE<"shf.w", MSA128WOpnd>;
+
+class SLD_B_DESC : MSA_3R_SLD_DESC_BASE<"sld.b", int_mips_sld_b, MSA128BOpnd>;
+class SLD_H_DESC : MSA_3R_SLD_DESC_BASE<"sld.h", int_mips_sld_h, MSA128HOpnd>;
+class SLD_W_DESC : MSA_3R_SLD_DESC_BASE<"sld.w", int_mips_sld_w, MSA128WOpnd>;
+class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>;
+
+class SLDI_B_DESC : MSA_ELM_DESC_BASE<"sldi.b", int_mips_sldi_b, MSA128BOpnd>;
+class SLDI_H_DESC : MSA_ELM_DESC_BASE<"sldi.h", int_mips_sldi_h, MSA128HOpnd>;
+class SLDI_W_DESC : MSA_ELM_DESC_BASE<"sldi.w", int_mips_sldi_w, MSA128WOpnd>;
+class SLDI_D_DESC : MSA_ELM_DESC_BASE<"sldi.d", int_mips_sldi_d, MSA128DOpnd>;
+
+class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>;
+class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>;
+class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", shl, MSA128WOpnd>;
+class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", shl, MSA128DOpnd>;
+
+class SLLI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.b", shl, vsplati8_uimm3,
+ MSA128BOpnd>;
+class SLLI_H_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.h", shl, vsplati16_uimm4,
+ MSA128HOpnd>;
+class SLLI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.w", shl, vsplati32_uimm5,
+ MSA128WOpnd>;
+class SLLI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.d", shl, vsplati64_uimm6,
+ MSA128DOpnd>;
+
+class SPLAT_B_DESC : MSA_3R_SPLAT_DESC_BASE<"splat.b", vsplati8_elt,
+ MSA128BOpnd>;
+class SPLAT_H_DESC : MSA_3R_SPLAT_DESC_BASE<"splat.h", vsplati16_elt,
+ MSA128HOpnd>;
+class SPLAT_W_DESC : MSA_3R_SPLAT_DESC_BASE<"splat.w", vsplati32_elt,
+ MSA128WOpnd>;
+class SPLAT_D_DESC : MSA_3R_SPLAT_DESC_BASE<"splat.d", vsplati64_elt,
+ MSA128DOpnd>;
+
+class SPLATI_B_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.b", vsplati8_uimm4,
+ MSA128BOpnd>;
+class SPLATI_H_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.h", vsplati16_uimm3,
+ MSA128HOpnd>;
+class SPLATI_W_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.w", vsplati32_uimm2,
+ MSA128WOpnd>;
+class SPLATI_D_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.d", vsplati64_uimm1,
+ MSA128DOpnd>;
+
+class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", sra, MSA128BOpnd>;
+class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", sra, MSA128HOpnd>;
+class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", sra, MSA128WOpnd>;
+class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", sra, MSA128DOpnd>;
+
+class SRAI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.b", sra, vsplati8_uimm3,
+ MSA128BOpnd>;
+class SRAI_H_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.h", sra, vsplati16_uimm4,
+ MSA128HOpnd>;
+class SRAI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.w", sra, vsplati32_uimm5,
+ MSA128WOpnd>;
+class SRAI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.d", sra, vsplati64_uimm6,
+ MSA128DOpnd>;
+
+class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, MSA128BOpnd>;
+class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128HOpnd>;
+class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>;
+class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>;
+
+class SRARI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srari.b", int_mips_srari_b,
+ MSA128BOpnd>;
+class SRARI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srari.h", int_mips_srari_h,
+ MSA128HOpnd>;
+class SRARI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srari.w", int_mips_srari_w,
+ MSA128WOpnd>;
+class SRARI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srari.d", int_mips_srari_d,
+ MSA128DOpnd>;
+
+class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>;
+class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>;
+class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", srl, MSA128WOpnd>;
+class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", srl, MSA128DOpnd>;
+
+class SRLI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.b", srl, vsplati8_uimm3,
+ MSA128BOpnd>;
+class SRLI_H_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.h", srl, vsplati16_uimm4,
+ MSA128HOpnd>;
+class SRLI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.w", srl, vsplati32_uimm5,
+ MSA128WOpnd>;
+class SRLI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.d", srl, vsplati64_uimm6,
+ MSA128DOpnd>;
+
+class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, MSA128BOpnd>;
+class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128HOpnd>;
+class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>;
+class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>;
+
+class SRLRI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srlri.b", int_mips_srlri_b,
+ MSA128BOpnd>;
+class SRLRI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srlri.h", int_mips_srlri_h,
+ MSA128HOpnd>;
+class SRLRI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srlri.w", int_mips_srlri_w,
+ MSA128WOpnd>;
+class SRLRI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srlri.d", int_mips_srlri_d,
+ MSA128DOpnd>;
+
+class ST_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ValueType TyNode, RegisterOperand ROWD,
+ Operand MemOpnd = mem, ComplexPattern Addr = addrRegImm,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins ROWD:$wd, MemOpnd:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $addr");
+ list<dag> Pattern = [(OpNode (TyNode ROWD:$wd), Addr:$addr)];
+ InstrItinClass Itinerary = itin;
+ string DecoderMethod = "DecodeMSA128Mem";
+}
+
+class ST_B_DESC : ST_DESC_BASE<"st.b", store, v16i8, MSA128BOpnd>;
+class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, MSA128HOpnd>;
+class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, MSA128WOpnd>;
+class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, MSA128DOpnd>;
+
+class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b,
+ MSA128BOpnd>;
+class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h,
+ MSA128HOpnd>;
+class SUBS_S_W_DESC : MSA_3R_DESC_BASE<"subs_s.w", int_mips_subs_s_w,
+ MSA128WOpnd>;
+class SUBS_S_D_DESC : MSA_3R_DESC_BASE<"subs_s.d", int_mips_subs_s_d,
+ MSA128DOpnd>;
+
+class SUBS_U_B_DESC : MSA_3R_DESC_BASE<"subs_u.b", int_mips_subs_u_b,
+ MSA128BOpnd>;
+class SUBS_U_H_DESC : MSA_3R_DESC_BASE<"subs_u.h", int_mips_subs_u_h,
+ MSA128HOpnd>;
+class SUBS_U_W_DESC : MSA_3R_DESC_BASE<"subs_u.w", int_mips_subs_u_w,
+ MSA128WOpnd>;
+class SUBS_U_D_DESC : MSA_3R_DESC_BASE<"subs_u.d", int_mips_subs_u_d,
+ MSA128DOpnd>;
+
+class SUBSUS_U_B_DESC : MSA_3R_DESC_BASE<"subsus_u.b", int_mips_subsus_u_b,
+ MSA128BOpnd>;
+class SUBSUS_U_H_DESC : MSA_3R_DESC_BASE<"subsus_u.h", int_mips_subsus_u_h,
+ MSA128HOpnd>;
+class SUBSUS_U_W_DESC : MSA_3R_DESC_BASE<"subsus_u.w", int_mips_subsus_u_w,
+ MSA128WOpnd>;
+class SUBSUS_U_D_DESC : MSA_3R_DESC_BASE<"subsus_u.d", int_mips_subsus_u_d,
+ MSA128DOpnd>;
+
+class SUBSUU_S_B_DESC : MSA_3R_DESC_BASE<"subsuu_s.b", int_mips_subsuu_s_b,
+ MSA128BOpnd>;
+class SUBSUU_S_H_DESC : MSA_3R_DESC_BASE<"subsuu_s.h", int_mips_subsuu_s_h,
+ MSA128HOpnd>;
+class SUBSUU_S_W_DESC : MSA_3R_DESC_BASE<"subsuu_s.w", int_mips_subsuu_s_w,
+ MSA128WOpnd>;
+class SUBSUU_S_D_DESC : MSA_3R_DESC_BASE<"subsuu_s.d", int_mips_subsuu_s_d,
+ MSA128DOpnd>;
+
+class SUBV_B_DESC : MSA_3R_DESC_BASE<"subv.b", sub, MSA128BOpnd>;
+class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", sub, MSA128HOpnd>;
+class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", sub, MSA128WOpnd>;
+class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", sub, MSA128DOpnd>;
+
+class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", sub, vsplati8_uimm5,
+ MSA128BOpnd>;
+class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", sub, vsplati16_uimm5,
+ MSA128HOpnd>;
+class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", sub, vsplati32_uimm5,
+ MSA128WOpnd>;
+class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", sub, vsplati64_uimm5,
+ MSA128DOpnd>;
+
+class VSHF_B_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.b", MSA128BOpnd>;
+class VSHF_H_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.h", MSA128HOpnd>;
+class VSHF_W_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.w", MSA128WOpnd>;
+class VSHF_D_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.d", MSA128DOpnd>;
+
+class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", xor, MSA128BOpnd>;
+class XOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<xor, MSA128HOpnd>;
+class XOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<xor, MSA128WOpnd>;
+class XOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<xor, MSA128DOpnd>;
+
+class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", xor, vsplati8_uimm8,
+ MSA128BOpnd>;
+
+// Instruction defs.
+def ADD_A_B : ADD_A_B_ENC, ADD_A_B_DESC;
+def ADD_A_H : ADD_A_H_ENC, ADD_A_H_DESC;
+def ADD_A_W : ADD_A_W_ENC, ADD_A_W_DESC;
+def ADD_A_D : ADD_A_D_ENC, ADD_A_D_DESC;
+
+def ADDS_A_B : ADDS_A_B_ENC, ADDS_A_B_DESC;
+def ADDS_A_H : ADDS_A_H_ENC, ADDS_A_H_DESC;
+def ADDS_A_W : ADDS_A_W_ENC, ADDS_A_W_DESC;
+def ADDS_A_D : ADDS_A_D_ENC, ADDS_A_D_DESC;
+
+def ADDS_S_B : ADDS_S_B_ENC, ADDS_S_B_DESC;
+def ADDS_S_H : ADDS_S_H_ENC, ADDS_S_H_DESC;
+def ADDS_S_W : ADDS_S_W_ENC, ADDS_S_W_DESC;
+def ADDS_S_D : ADDS_S_D_ENC, ADDS_S_D_DESC;
+
+def ADDS_U_B : ADDS_U_B_ENC, ADDS_U_B_DESC;
+def ADDS_U_H : ADDS_U_H_ENC, ADDS_U_H_DESC;
+def ADDS_U_W : ADDS_U_W_ENC, ADDS_U_W_DESC;
+def ADDS_U_D : ADDS_U_D_ENC, ADDS_U_D_DESC;
+
+def ADDV_B : ADDV_B_ENC, ADDV_B_DESC;
+def ADDV_H : ADDV_H_ENC, ADDV_H_DESC;
+def ADDV_W : ADDV_W_ENC, ADDV_W_DESC;
+def ADDV_D : ADDV_D_ENC, ADDV_D_DESC;
+
+def ADDVI_B : ADDVI_B_ENC, ADDVI_B_DESC;
+def ADDVI_H : ADDVI_H_ENC, ADDVI_H_DESC;
+def ADDVI_W : ADDVI_W_ENC, ADDVI_W_DESC;
+def ADDVI_D : ADDVI_D_ENC, ADDVI_D_DESC;
+
+def AND_V : AND_V_ENC, AND_V_DESC;
+def AND_V_H_PSEUDO : AND_V_H_PSEUDO_DESC,
+ PseudoInstExpansion<(AND_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def AND_V_W_PSEUDO : AND_V_W_PSEUDO_DESC,
+ PseudoInstExpansion<(AND_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def AND_V_D_PSEUDO : AND_V_D_PSEUDO_DESC,
+ PseudoInstExpansion<(AND_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+
+def ANDI_B : ANDI_B_ENC, ANDI_B_DESC;
+
+def ASUB_S_B : ASUB_S_B_ENC, ASUB_S_B_DESC;
+def ASUB_S_H : ASUB_S_H_ENC, ASUB_S_H_DESC;
+def ASUB_S_W : ASUB_S_W_ENC, ASUB_S_W_DESC;
+def ASUB_S_D : ASUB_S_D_ENC, ASUB_S_D_DESC;
+
+def ASUB_U_B : ASUB_U_B_ENC, ASUB_U_B_DESC;
+def ASUB_U_H : ASUB_U_H_ENC, ASUB_U_H_DESC;
+def ASUB_U_W : ASUB_U_W_ENC, ASUB_U_W_DESC;
+def ASUB_U_D : ASUB_U_D_ENC, ASUB_U_D_DESC;
+
+def AVE_S_B : AVE_S_B_ENC, AVE_S_B_DESC;
+def AVE_S_H : AVE_S_H_ENC, AVE_S_H_DESC;
+def AVE_S_W : AVE_S_W_ENC, AVE_S_W_DESC;
+def AVE_S_D : AVE_S_D_ENC, AVE_S_D_DESC;
+
+def AVE_U_B : AVE_U_B_ENC, AVE_U_B_DESC;
+def AVE_U_H : AVE_U_H_ENC, AVE_U_H_DESC;
+def AVE_U_W : AVE_U_W_ENC, AVE_U_W_DESC;
+def AVE_U_D : AVE_U_D_ENC, AVE_U_D_DESC;
+
+def AVER_S_B : AVER_S_B_ENC, AVER_S_B_DESC;
+def AVER_S_H : AVER_S_H_ENC, AVER_S_H_DESC;
+def AVER_S_W : AVER_S_W_ENC, AVER_S_W_DESC;
+def AVER_S_D : AVER_S_D_ENC, AVER_S_D_DESC;
+
+def AVER_U_B : AVER_U_B_ENC, AVER_U_B_DESC;
+def AVER_U_H : AVER_U_H_ENC, AVER_U_H_DESC;
+def AVER_U_W : AVER_U_W_ENC, AVER_U_W_DESC;
+def AVER_U_D : AVER_U_D_ENC, AVER_U_D_DESC;
+
+def BCLR_B : BCLR_B_ENC, BCLR_B_DESC;
+def BCLR_H : BCLR_H_ENC, BCLR_H_DESC;
+def BCLR_W : BCLR_W_ENC, BCLR_W_DESC;
+def BCLR_D : BCLR_D_ENC, BCLR_D_DESC;
+
+def BCLRI_B : BCLRI_B_ENC, BCLRI_B_DESC;
+def BCLRI_H : BCLRI_H_ENC, BCLRI_H_DESC;
+def BCLRI_W : BCLRI_W_ENC, BCLRI_W_DESC;
+def BCLRI_D : BCLRI_D_ENC, BCLRI_D_DESC;
+
+def BINSL_B : BINSL_B_ENC, BINSL_B_DESC;
+def BINSL_H : BINSL_H_ENC, BINSL_H_DESC;
+def BINSL_W : BINSL_W_ENC, BINSL_W_DESC;
+def BINSL_D : BINSL_D_ENC, BINSL_D_DESC;
+
+def BINSLI_B : BINSLI_B_ENC, BINSLI_B_DESC;
+def BINSLI_H : BINSLI_H_ENC, BINSLI_H_DESC;
+def BINSLI_W : BINSLI_W_ENC, BINSLI_W_DESC;
+def BINSLI_D : BINSLI_D_ENC, BINSLI_D_DESC;
+
+def BINSR_B : BINSR_B_ENC, BINSR_B_DESC;
+def BINSR_H : BINSR_H_ENC, BINSR_H_DESC;
+def BINSR_W : BINSR_W_ENC, BINSR_W_DESC;
+def BINSR_D : BINSR_D_ENC, BINSR_D_DESC;
+
+def BINSRI_B : BINSRI_B_ENC, BINSRI_B_DESC;
+def BINSRI_H : BINSRI_H_ENC, BINSRI_H_DESC;
+def BINSRI_W : BINSRI_W_ENC, BINSRI_W_DESC;
+def BINSRI_D : BINSRI_D_ENC, BINSRI_D_DESC;
+
+def BMNZ_V : BMNZ_V_ENC, BMNZ_V_DESC;
+
+def BMNZI_B : BMNZI_B_ENC, BMNZI_B_DESC;
+
+def BMZ_V : BMZ_V_ENC, BMZ_V_DESC;
+
+def BMZI_B : BMZI_B_ENC, BMZI_B_DESC;
+
+def BNEG_B : BNEG_B_ENC, BNEG_B_DESC;
+def BNEG_H : BNEG_H_ENC, BNEG_H_DESC;
+def BNEG_W : BNEG_W_ENC, BNEG_W_DESC;
+def BNEG_D : BNEG_D_ENC, BNEG_D_DESC;
+
+def BNEGI_B : BNEGI_B_ENC, BNEGI_B_DESC;
+def BNEGI_H : BNEGI_H_ENC, BNEGI_H_DESC;
+def BNEGI_W : BNEGI_W_ENC, BNEGI_W_DESC;
+def BNEGI_D : BNEGI_D_ENC, BNEGI_D_DESC;
+
+def BNZ_B : BNZ_B_ENC, BNZ_B_DESC;
+def BNZ_H : BNZ_H_ENC, BNZ_H_DESC;
+def BNZ_W : BNZ_W_ENC, BNZ_W_DESC;
+def BNZ_D : BNZ_D_ENC, BNZ_D_DESC;
+
+def BNZ_V : BNZ_V_ENC, BNZ_V_DESC;
+
+def BSEL_V : BSEL_V_ENC, BSEL_V_DESC;
+
+class MSA_BSEL_PSEUDO_BASE<RegisterOperand RO, ValueType Ty> :
+ MipsPseudo<(outs RO:$wd), (ins RO:$wd_in, RO:$ws, RO:$wt),
+ [(set RO:$wd, (Ty (vselect RO:$wd_in, RO:$ws, RO:$wt)))]>,
+ PseudoInstExpansion<(BSEL_V MSA128BOpnd:$wd, MSA128BOpnd:$wd_in,
+ MSA128BOpnd:$ws, MSA128BOpnd:$wt)> {
+ let Constraints = "$wd_in = $wd";
+}
+
+def BSEL_H_PSEUDO : MSA_BSEL_PSEUDO_BASE<MSA128HOpnd, v8i16>;
+def BSEL_W_PSEUDO : MSA_BSEL_PSEUDO_BASE<MSA128WOpnd, v4i32>;
+def BSEL_D_PSEUDO : MSA_BSEL_PSEUDO_BASE<MSA128DOpnd, v2i64>;
+def BSEL_FW_PSEUDO : MSA_BSEL_PSEUDO_BASE<MSA128WOpnd, v4f32>;
+def BSEL_FD_PSEUDO : MSA_BSEL_PSEUDO_BASE<MSA128DOpnd, v2f64>;
+
+def BSELI_B : BSELI_B_ENC, BSELI_B_DESC;
+
+def BSET_B : BSET_B_ENC, BSET_B_DESC;
+def BSET_H : BSET_H_ENC, BSET_H_DESC;
+def BSET_W : BSET_W_ENC, BSET_W_DESC;
+def BSET_D : BSET_D_ENC, BSET_D_DESC;
+
+def BSETI_B : BSETI_B_ENC, BSETI_B_DESC;
+def BSETI_H : BSETI_H_ENC, BSETI_H_DESC;
+def BSETI_W : BSETI_W_ENC, BSETI_W_DESC;
+def BSETI_D : BSETI_D_ENC, BSETI_D_DESC;
+
+def BZ_B : BZ_B_ENC, BZ_B_DESC;
+def BZ_H : BZ_H_ENC, BZ_H_DESC;
+def BZ_W : BZ_W_ENC, BZ_W_DESC;
+def BZ_D : BZ_D_ENC, BZ_D_DESC;
+
+def BZ_V : BZ_V_ENC, BZ_V_DESC;
+
+def CEQ_B : CEQ_B_ENC, CEQ_B_DESC;
+def CEQ_H : CEQ_H_ENC, CEQ_H_DESC;
+def CEQ_W : CEQ_W_ENC, CEQ_W_DESC;
+def CEQ_D : CEQ_D_ENC, CEQ_D_DESC;
+
+def CEQI_B : CEQI_B_ENC, CEQI_B_DESC;
+def CEQI_H : CEQI_H_ENC, CEQI_H_DESC;
+def CEQI_W : CEQI_W_ENC, CEQI_W_DESC;
+def CEQI_D : CEQI_D_ENC, CEQI_D_DESC;
+
+def CFCMSA : CFCMSA_ENC, CFCMSA_DESC;
+
+def CLE_S_B : CLE_S_B_ENC, CLE_S_B_DESC;
+def CLE_S_H : CLE_S_H_ENC, CLE_S_H_DESC;
+def CLE_S_W : CLE_S_W_ENC, CLE_S_W_DESC;
+def CLE_S_D : CLE_S_D_ENC, CLE_S_D_DESC;
+
+def CLE_U_B : CLE_U_B_ENC, CLE_U_B_DESC;
+def CLE_U_H : CLE_U_H_ENC, CLE_U_H_DESC;
+def CLE_U_W : CLE_U_W_ENC, CLE_U_W_DESC;
+def CLE_U_D : CLE_U_D_ENC, CLE_U_D_DESC;
+
+def CLEI_S_B : CLEI_S_B_ENC, CLEI_S_B_DESC;
+def CLEI_S_H : CLEI_S_H_ENC, CLEI_S_H_DESC;
+def CLEI_S_W : CLEI_S_W_ENC, CLEI_S_W_DESC;
+def CLEI_S_D : CLEI_S_D_ENC, CLEI_S_D_DESC;
+
+def CLEI_U_B : CLEI_U_B_ENC, CLEI_U_B_DESC;
+def CLEI_U_H : CLEI_U_H_ENC, CLEI_U_H_DESC;
+def CLEI_U_W : CLEI_U_W_ENC, CLEI_U_W_DESC;
+def CLEI_U_D : CLEI_U_D_ENC, CLEI_U_D_DESC;
+
+def CLT_S_B : CLT_S_B_ENC, CLT_S_B_DESC;
+def CLT_S_H : CLT_S_H_ENC, CLT_S_H_DESC;
+def CLT_S_W : CLT_S_W_ENC, CLT_S_W_DESC;
+def CLT_S_D : CLT_S_D_ENC, CLT_S_D_DESC;
+
+def CLT_U_B : CLT_U_B_ENC, CLT_U_B_DESC;
+def CLT_U_H : CLT_U_H_ENC, CLT_U_H_DESC;
+def CLT_U_W : CLT_U_W_ENC, CLT_U_W_DESC;
+def CLT_U_D : CLT_U_D_ENC, CLT_U_D_DESC;
+
+def CLTI_S_B : CLTI_S_B_ENC, CLTI_S_B_DESC;
+def CLTI_S_H : CLTI_S_H_ENC, CLTI_S_H_DESC;
+def CLTI_S_W : CLTI_S_W_ENC, CLTI_S_W_DESC;
+def CLTI_S_D : CLTI_S_D_ENC, CLTI_S_D_DESC;
+
+def CLTI_U_B : CLTI_U_B_ENC, CLTI_U_B_DESC;
+def CLTI_U_H : CLTI_U_H_ENC, CLTI_U_H_DESC;
+def CLTI_U_W : CLTI_U_W_ENC, CLTI_U_W_DESC;
+def CLTI_U_D : CLTI_U_D_ENC, CLTI_U_D_DESC;
+
+def COPY_S_B : COPY_S_B_ENC, COPY_S_B_DESC;
+def COPY_S_H : COPY_S_H_ENC, COPY_S_H_DESC;
+def COPY_S_W : COPY_S_W_ENC, COPY_S_W_DESC;
+
+def COPY_U_B : COPY_U_B_ENC, COPY_U_B_DESC;
+def COPY_U_H : COPY_U_H_ENC, COPY_U_H_DESC;
+def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC;
+
+def COPY_FW_PSEUDO : COPY_FW_PSEUDO_DESC;
+def COPY_FD_PSEUDO : COPY_FD_PSEUDO_DESC;
+
+def CTCMSA : CTCMSA_ENC, CTCMSA_DESC;
+
+def DIV_S_B : DIV_S_B_ENC, DIV_S_B_DESC;
+def DIV_S_H : DIV_S_H_ENC, DIV_S_H_DESC;
+def DIV_S_W : DIV_S_W_ENC, DIV_S_W_DESC;
+def DIV_S_D : DIV_S_D_ENC, DIV_S_D_DESC;
+
+def DIV_U_B : DIV_U_B_ENC, DIV_U_B_DESC;
+def DIV_U_H : DIV_U_H_ENC, DIV_U_H_DESC;
+def DIV_U_W : DIV_U_W_ENC, DIV_U_W_DESC;
+def DIV_U_D : DIV_U_D_ENC, DIV_U_D_DESC;
+
+def DOTP_S_H : DOTP_S_H_ENC, DOTP_S_H_DESC;
+def DOTP_S_W : DOTP_S_W_ENC, DOTP_S_W_DESC;
+def DOTP_S_D : DOTP_S_D_ENC, DOTP_S_D_DESC;
+
+def DOTP_U_H : DOTP_U_H_ENC, DOTP_U_H_DESC;
+def DOTP_U_W : DOTP_U_W_ENC, DOTP_U_W_DESC;
+def DOTP_U_D : DOTP_U_D_ENC, DOTP_U_D_DESC;
+
+def DPADD_S_H : DPADD_S_H_ENC, DPADD_S_H_DESC;
+def DPADD_S_W : DPADD_S_W_ENC, DPADD_S_W_DESC;
+def DPADD_S_D : DPADD_S_D_ENC, DPADD_S_D_DESC;
+
+def DPADD_U_H : DPADD_U_H_ENC, DPADD_U_H_DESC;
+def DPADD_U_W : DPADD_U_W_ENC, DPADD_U_W_DESC;
+def DPADD_U_D : DPADD_U_D_ENC, DPADD_U_D_DESC;
+
+def DPSUB_S_H : DPSUB_S_H_ENC, DPSUB_S_H_DESC;
+def DPSUB_S_W : DPSUB_S_W_ENC, DPSUB_S_W_DESC;
+def DPSUB_S_D : DPSUB_S_D_ENC, DPSUB_S_D_DESC;
+
+def DPSUB_U_H : DPSUB_U_H_ENC, DPSUB_U_H_DESC;
+def DPSUB_U_W : DPSUB_U_W_ENC, DPSUB_U_W_DESC;
+def DPSUB_U_D : DPSUB_U_D_ENC, DPSUB_U_D_DESC;
+
+def FADD_W : FADD_W_ENC, FADD_W_DESC;
+def FADD_D : FADD_D_ENC, FADD_D_DESC;
+
+def FCAF_W : FCAF_W_ENC, FCAF_W_DESC;
+def FCAF_D : FCAF_D_ENC, FCAF_D_DESC;
+
+def FCEQ_W : FCEQ_W_ENC, FCEQ_W_DESC;
+def FCEQ_D : FCEQ_D_ENC, FCEQ_D_DESC;
+
+def FCLE_W : FCLE_W_ENC, FCLE_W_DESC;
+def FCLE_D : FCLE_D_ENC, FCLE_D_DESC;
+
+def FCLT_W : FCLT_W_ENC, FCLT_W_DESC;
+def FCLT_D : FCLT_D_ENC, FCLT_D_DESC;
+
+def FCLASS_W : FCLASS_W_ENC, FCLASS_W_DESC;
+def FCLASS_D : FCLASS_D_ENC, FCLASS_D_DESC;
+
+def FCNE_W : FCNE_W_ENC, FCNE_W_DESC;
+def FCNE_D : FCNE_D_ENC, FCNE_D_DESC;
+
+def FCOR_W : FCOR_W_ENC, FCOR_W_DESC;
+def FCOR_D : FCOR_D_ENC, FCOR_D_DESC;
+
+def FCUEQ_W : FCUEQ_W_ENC, FCUEQ_W_DESC;
+def FCUEQ_D : FCUEQ_D_ENC, FCUEQ_D_DESC;
+
+def FCULE_W : FCULE_W_ENC, FCULE_W_DESC;
+def FCULE_D : FCULE_D_ENC, FCULE_D_DESC;
+
+def FCULT_W : FCULT_W_ENC, FCULT_W_DESC;
+def FCULT_D : FCULT_D_ENC, FCULT_D_DESC;
+
+def FCUN_W : FCUN_W_ENC, FCUN_W_DESC;
+def FCUN_D : FCUN_D_ENC, FCUN_D_DESC;
+
+def FCUNE_W : FCUNE_W_ENC, FCUNE_W_DESC;
+def FCUNE_D : FCUNE_D_ENC, FCUNE_D_DESC;
+
+def FDIV_W : FDIV_W_ENC, FDIV_W_DESC;
+def FDIV_D : FDIV_D_ENC, FDIV_D_DESC;
+
+def FEXDO_H : FEXDO_H_ENC, FEXDO_H_DESC;
+def FEXDO_W : FEXDO_W_ENC, FEXDO_W_DESC;
+
+def FEXP2_W : FEXP2_W_ENC, FEXP2_W_DESC;
+def FEXP2_D : FEXP2_D_ENC, FEXP2_D_DESC;
+def FEXP2_W_1_PSEUDO : FEXP2_W_1_PSEUDO_DESC;
+def FEXP2_D_1_PSEUDO : FEXP2_D_1_PSEUDO_DESC;
+
+def FEXUPL_W : FEXUPL_W_ENC, FEXUPL_W_DESC;
+def FEXUPL_D : FEXUPL_D_ENC, FEXUPL_D_DESC;
+
+def FEXUPR_W : FEXUPR_W_ENC, FEXUPR_W_DESC;
+def FEXUPR_D : FEXUPR_D_ENC, FEXUPR_D_DESC;
+
+def FFINT_S_W : FFINT_S_W_ENC, FFINT_S_W_DESC;
+def FFINT_S_D : FFINT_S_D_ENC, FFINT_S_D_DESC;
+
+def FFINT_U_W : FFINT_U_W_ENC, FFINT_U_W_DESC;
+def FFINT_U_D : FFINT_U_D_ENC, FFINT_U_D_DESC;
+
+def FFQL_W : FFQL_W_ENC, FFQL_W_DESC;
+def FFQL_D : FFQL_D_ENC, FFQL_D_DESC;
+
+def FFQR_W : FFQR_W_ENC, FFQR_W_DESC;
+def FFQR_D : FFQR_D_ENC, FFQR_D_DESC;
+
+def FILL_B : FILL_B_ENC, FILL_B_DESC;
+def FILL_H : FILL_H_ENC, FILL_H_DESC;
+def FILL_W : FILL_W_ENC, FILL_W_DESC;
+def FILL_FW_PSEUDO : FILL_FW_PSEUDO_DESC;
+def FILL_FD_PSEUDO : FILL_FD_PSEUDO_DESC;
+
+def FLOG2_W : FLOG2_W_ENC, FLOG2_W_DESC;
+def FLOG2_D : FLOG2_D_ENC, FLOG2_D_DESC;
+
+def FMADD_W : FMADD_W_ENC, FMADD_W_DESC;
+def FMADD_D : FMADD_D_ENC, FMADD_D_DESC;
+
+def FMAX_W : FMAX_W_ENC, FMAX_W_DESC;
+def FMAX_D : FMAX_D_ENC, FMAX_D_DESC;
+
+def FMAX_A_W : FMAX_A_W_ENC, FMAX_A_W_DESC;
+def FMAX_A_D : FMAX_A_D_ENC, FMAX_A_D_DESC;
+
+def FMIN_W : FMIN_W_ENC, FMIN_W_DESC;
+def FMIN_D : FMIN_D_ENC, FMIN_D_DESC;
+
+def FMIN_A_W : FMIN_A_W_ENC, FMIN_A_W_DESC;
+def FMIN_A_D : FMIN_A_D_ENC, FMIN_A_D_DESC;
+
+def FMSUB_W : FMSUB_W_ENC, FMSUB_W_DESC;
+def FMSUB_D : FMSUB_D_ENC, FMSUB_D_DESC;
+
+def FMUL_W : FMUL_W_ENC, FMUL_W_DESC;
+def FMUL_D : FMUL_D_ENC, FMUL_D_DESC;
+
+def FRINT_W : FRINT_W_ENC, FRINT_W_DESC;
+def FRINT_D : FRINT_D_ENC, FRINT_D_DESC;
+
+def FRCP_W : FRCP_W_ENC, FRCP_W_DESC;
+def FRCP_D : FRCP_D_ENC, FRCP_D_DESC;
+
+def FRSQRT_W : FRSQRT_W_ENC, FRSQRT_W_DESC;
+def FRSQRT_D : FRSQRT_D_ENC, FRSQRT_D_DESC;
+
+def FSAF_W : FSAF_W_ENC, FSAF_W_DESC;
+def FSAF_D : FSAF_D_ENC, FSAF_D_DESC;
+
+def FSEQ_W : FSEQ_W_ENC, FSEQ_W_DESC;
+def FSEQ_D : FSEQ_D_ENC, FSEQ_D_DESC;
+
+def FSLE_W : FSLE_W_ENC, FSLE_W_DESC;
+def FSLE_D : FSLE_D_ENC, FSLE_D_DESC;
+
+def FSLT_W : FSLT_W_ENC, FSLT_W_DESC;
+def FSLT_D : FSLT_D_ENC, FSLT_D_DESC;
+
+def FSNE_W : FSNE_W_ENC, FSNE_W_DESC;
+def FSNE_D : FSNE_D_ENC, FSNE_D_DESC;
+
+def FSOR_W : FSOR_W_ENC, FSOR_W_DESC;
+def FSOR_D : FSOR_D_ENC, FSOR_D_DESC;
+
+def FSQRT_W : FSQRT_W_ENC, FSQRT_W_DESC;
+def FSQRT_D : FSQRT_D_ENC, FSQRT_D_DESC;
+
+def FSUB_W : FSUB_W_ENC, FSUB_W_DESC;
+def FSUB_D : FSUB_D_ENC, FSUB_D_DESC;
+
+def FSUEQ_W : FSUEQ_W_ENC, FSUEQ_W_DESC;
+def FSUEQ_D : FSUEQ_D_ENC, FSUEQ_D_DESC;
+
+def FSULE_W : FSULE_W_ENC, FSULE_W_DESC;
+def FSULE_D : FSULE_D_ENC, FSULE_D_DESC;
+
+def FSULT_W : FSULT_W_ENC, FSULT_W_DESC;
+def FSULT_D : FSULT_D_ENC, FSULT_D_DESC;
+
+def FSUN_W : FSUN_W_ENC, FSUN_W_DESC;
+def FSUN_D : FSUN_D_ENC, FSUN_D_DESC;
+
+def FSUNE_W : FSUNE_W_ENC, FSUNE_W_DESC;
+def FSUNE_D : FSUNE_D_ENC, FSUNE_D_DESC;
+
+def FTINT_S_W : FTINT_S_W_ENC, FTINT_S_W_DESC;
+def FTINT_S_D : FTINT_S_D_ENC, FTINT_S_D_DESC;
+
+def FTINT_U_W : FTINT_U_W_ENC, FTINT_U_W_DESC;
+def FTINT_U_D : FTINT_U_D_ENC, FTINT_U_D_DESC;
+
+def FTQ_H : FTQ_H_ENC, FTQ_H_DESC;
+def FTQ_W : FTQ_W_ENC, FTQ_W_DESC;
+
+def FTRUNC_S_W : FTRUNC_S_W_ENC, FTRUNC_S_W_DESC;
+def FTRUNC_S_D : FTRUNC_S_D_ENC, FTRUNC_S_D_DESC;
+
+def FTRUNC_U_W : FTRUNC_U_W_ENC, FTRUNC_U_W_DESC;
+def FTRUNC_U_D : FTRUNC_U_D_ENC, FTRUNC_U_D_DESC;
+
+def HADD_S_H : HADD_S_H_ENC, HADD_S_H_DESC;
+def HADD_S_W : HADD_S_W_ENC, HADD_S_W_DESC;
+def HADD_S_D : HADD_S_D_ENC, HADD_S_D_DESC;
+
+def HADD_U_H : HADD_U_H_ENC, HADD_U_H_DESC;
+def HADD_U_W : HADD_U_W_ENC, HADD_U_W_DESC;
+def HADD_U_D : HADD_U_D_ENC, HADD_U_D_DESC;
+
+def HSUB_S_H : HSUB_S_H_ENC, HSUB_S_H_DESC;
+def HSUB_S_W : HSUB_S_W_ENC, HSUB_S_W_DESC;
+def HSUB_S_D : HSUB_S_D_ENC, HSUB_S_D_DESC;
+
+def HSUB_U_H : HSUB_U_H_ENC, HSUB_U_H_DESC;
+def HSUB_U_W : HSUB_U_W_ENC, HSUB_U_W_DESC;
+def HSUB_U_D : HSUB_U_D_ENC, HSUB_U_D_DESC;
+
+def ILVEV_B : ILVEV_B_ENC, ILVEV_B_DESC;
+def ILVEV_H : ILVEV_H_ENC, ILVEV_H_DESC;
+def ILVEV_W : ILVEV_W_ENC, ILVEV_W_DESC;
+def ILVEV_D : ILVEV_D_ENC, ILVEV_D_DESC;
+
+def ILVL_B : ILVL_B_ENC, ILVL_B_DESC;
+def ILVL_H : ILVL_H_ENC, ILVL_H_DESC;
+def ILVL_W : ILVL_W_ENC, ILVL_W_DESC;
+def ILVL_D : ILVL_D_ENC, ILVL_D_DESC;
+
+def ILVOD_B : ILVOD_B_ENC, ILVOD_B_DESC;
+def ILVOD_H : ILVOD_H_ENC, ILVOD_H_DESC;
+def ILVOD_W : ILVOD_W_ENC, ILVOD_W_DESC;
+def ILVOD_D : ILVOD_D_ENC, ILVOD_D_DESC;
+
+def ILVR_B : ILVR_B_ENC, ILVR_B_DESC;
+def ILVR_H : ILVR_H_ENC, ILVR_H_DESC;
+def ILVR_W : ILVR_W_ENC, ILVR_W_DESC;
+def ILVR_D : ILVR_D_ENC, ILVR_D_DESC;
+
+def INSERT_B : INSERT_B_ENC, INSERT_B_DESC;
+def INSERT_H : INSERT_H_ENC, INSERT_H_DESC;
+def INSERT_W : INSERT_W_ENC, INSERT_W_DESC;
+
+// INSERT_FW_PSEUDO defined after INSVE_W
+// INSERT_FD_PSEUDO defined after INSVE_D
+
+def INSVE_B : INSVE_B_ENC, INSVE_B_DESC;
+def INSVE_H : INSVE_H_ENC, INSVE_H_DESC;
+def INSVE_W : INSVE_W_ENC, INSVE_W_DESC;
+def INSVE_D : INSVE_D_ENC, INSVE_D_DESC;
+
+def INSERT_FW_PSEUDO : INSERT_FW_PSEUDO_DESC;
+def INSERT_FD_PSEUDO : INSERT_FD_PSEUDO_DESC;
+
+def LD_B: LD_B_ENC, LD_B_DESC;
+def LD_H: LD_H_ENC, LD_H_DESC;
+def LD_W: LD_W_ENC, LD_W_DESC;
+def LD_D: LD_D_ENC, LD_D_DESC;
+
+def LDI_B : LDI_B_ENC, LDI_B_DESC;
+def LDI_H : LDI_H_ENC, LDI_H_DESC;
+def LDI_W : LDI_W_ENC, LDI_W_DESC;
+def LDI_D : LDI_D_ENC, LDI_D_DESC;
+
+def LSA : LSA_ENC, LSA_DESC;
+
+def MADD_Q_H : MADD_Q_H_ENC, MADD_Q_H_DESC;
+def MADD_Q_W : MADD_Q_W_ENC, MADD_Q_W_DESC;
+
+def MADDR_Q_H : MADDR_Q_H_ENC, MADDR_Q_H_DESC;
+def MADDR_Q_W : MADDR_Q_W_ENC, MADDR_Q_W_DESC;
+
+def MADDV_B : MADDV_B_ENC, MADDV_B_DESC;
+def MADDV_H : MADDV_H_ENC, MADDV_H_DESC;
+def MADDV_W : MADDV_W_ENC, MADDV_W_DESC;
+def MADDV_D : MADDV_D_ENC, MADDV_D_DESC;
+
+def MAX_A_B : MAX_A_B_ENC, MAX_A_B_DESC;
+def MAX_A_H : MAX_A_H_ENC, MAX_A_H_DESC;
+def MAX_A_W : MAX_A_W_ENC, MAX_A_W_DESC;
+def MAX_A_D : MAX_A_D_ENC, MAX_A_D_DESC;
+
+def MAX_S_B : MAX_S_B_ENC, MAX_S_B_DESC;
+def MAX_S_H : MAX_S_H_ENC, MAX_S_H_DESC;
+def MAX_S_W : MAX_S_W_ENC, MAX_S_W_DESC;
+def MAX_S_D : MAX_S_D_ENC, MAX_S_D_DESC;
+
+def MAX_U_B : MAX_U_B_ENC, MAX_U_B_DESC;
+def MAX_U_H : MAX_U_H_ENC, MAX_U_H_DESC;
+def MAX_U_W : MAX_U_W_ENC, MAX_U_W_DESC;
+def MAX_U_D : MAX_U_D_ENC, MAX_U_D_DESC;
+
+def MAXI_S_B : MAXI_S_B_ENC, MAXI_S_B_DESC;
+def MAXI_S_H : MAXI_S_H_ENC, MAXI_S_H_DESC;
+def MAXI_S_W : MAXI_S_W_ENC, MAXI_S_W_DESC;
+def MAXI_S_D : MAXI_S_D_ENC, MAXI_S_D_DESC;
+
+def MAXI_U_B : MAXI_U_B_ENC, MAXI_U_B_DESC;
+def MAXI_U_H : MAXI_U_H_ENC, MAXI_U_H_DESC;
+def MAXI_U_W : MAXI_U_W_ENC, MAXI_U_W_DESC;
+def MAXI_U_D : MAXI_U_D_ENC, MAXI_U_D_DESC;
+
+def MIN_A_B : MIN_A_B_ENC, MIN_A_B_DESC;
+def MIN_A_H : MIN_A_H_ENC, MIN_A_H_DESC;
+def MIN_A_W : MIN_A_W_ENC, MIN_A_W_DESC;
+def MIN_A_D : MIN_A_D_ENC, MIN_A_D_DESC;
+
+def MIN_S_B : MIN_S_B_ENC, MIN_S_B_DESC;
+def MIN_S_H : MIN_S_H_ENC, MIN_S_H_DESC;
+def MIN_S_W : MIN_S_W_ENC, MIN_S_W_DESC;
+def MIN_S_D : MIN_S_D_ENC, MIN_S_D_DESC;
+
+def MIN_U_B : MIN_U_B_ENC, MIN_U_B_DESC;
+def MIN_U_H : MIN_U_H_ENC, MIN_U_H_DESC;
+def MIN_U_W : MIN_U_W_ENC, MIN_U_W_DESC;
+def MIN_U_D : MIN_U_D_ENC, MIN_U_D_DESC;
+
+def MINI_S_B : MINI_S_B_ENC, MINI_S_B_DESC;
+def MINI_S_H : MINI_S_H_ENC, MINI_S_H_DESC;
+def MINI_S_W : MINI_S_W_ENC, MINI_S_W_DESC;
+def MINI_S_D : MINI_S_D_ENC, MINI_S_D_DESC;
+
+def MINI_U_B : MINI_U_B_ENC, MINI_U_B_DESC;
+def MINI_U_H : MINI_U_H_ENC, MINI_U_H_DESC;
+def MINI_U_W : MINI_U_W_ENC, MINI_U_W_DESC;
+def MINI_U_D : MINI_U_D_ENC, MINI_U_D_DESC;
+
+def MOD_S_B : MOD_S_B_ENC, MOD_S_B_DESC;
+def MOD_S_H : MOD_S_H_ENC, MOD_S_H_DESC;
+def MOD_S_W : MOD_S_W_ENC, MOD_S_W_DESC;
+def MOD_S_D : MOD_S_D_ENC, MOD_S_D_DESC;
+
+def MOD_U_B : MOD_U_B_ENC, MOD_U_B_DESC;
+def MOD_U_H : MOD_U_H_ENC, MOD_U_H_DESC;
+def MOD_U_W : MOD_U_W_ENC, MOD_U_W_DESC;
+def MOD_U_D : MOD_U_D_ENC, MOD_U_D_DESC;
+
+def MOVE_V : MOVE_V_ENC, MOVE_V_DESC;
+
+def MSUB_Q_H : MSUB_Q_H_ENC, MSUB_Q_H_DESC;
+def MSUB_Q_W : MSUB_Q_W_ENC, MSUB_Q_W_DESC;
+
+def MSUBR_Q_H : MSUBR_Q_H_ENC, MSUBR_Q_H_DESC;
+def MSUBR_Q_W : MSUBR_Q_W_ENC, MSUBR_Q_W_DESC;
+
+def MSUBV_B : MSUBV_B_ENC, MSUBV_B_DESC;
+def MSUBV_H : MSUBV_H_ENC, MSUBV_H_DESC;
+def MSUBV_W : MSUBV_W_ENC, MSUBV_W_DESC;
+def MSUBV_D : MSUBV_D_ENC, MSUBV_D_DESC;
+
+def MUL_Q_H : MUL_Q_H_ENC, MUL_Q_H_DESC;
+def MUL_Q_W : MUL_Q_W_ENC, MUL_Q_W_DESC;
+
+def MULR_Q_H : MULR_Q_H_ENC, MULR_Q_H_DESC;
+def MULR_Q_W : MULR_Q_W_ENC, MULR_Q_W_DESC;
+
+def MULV_B : MULV_B_ENC, MULV_B_DESC;
+def MULV_H : MULV_H_ENC, MULV_H_DESC;
+def MULV_W : MULV_W_ENC, MULV_W_DESC;
+def MULV_D : MULV_D_ENC, MULV_D_DESC;
+
+def NLOC_B : NLOC_B_ENC, NLOC_B_DESC;
+def NLOC_H : NLOC_H_ENC, NLOC_H_DESC;
+def NLOC_W : NLOC_W_ENC, NLOC_W_DESC;
+def NLOC_D : NLOC_D_ENC, NLOC_D_DESC;
+
+def NLZC_B : NLZC_B_ENC, NLZC_B_DESC;
+def NLZC_H : NLZC_H_ENC, NLZC_H_DESC;
+def NLZC_W : NLZC_W_ENC, NLZC_W_DESC;
+def NLZC_D : NLZC_D_ENC, NLZC_D_DESC;
+
+def NOR_V : NOR_V_ENC, NOR_V_DESC;
+def NOR_V_H_PSEUDO : NOR_V_H_PSEUDO_DESC,
+ PseudoInstExpansion<(NOR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def NOR_V_W_PSEUDO : NOR_V_W_PSEUDO_DESC,
+ PseudoInstExpansion<(NOR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def NOR_V_D_PSEUDO : NOR_V_D_PSEUDO_DESC,
+ PseudoInstExpansion<(NOR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+
+def NORI_B : NORI_B_ENC, NORI_B_DESC;
+
+def OR_V : OR_V_ENC, OR_V_DESC;
+def OR_V_H_PSEUDO : OR_V_H_PSEUDO_DESC,
+ PseudoInstExpansion<(OR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def OR_V_W_PSEUDO : OR_V_W_PSEUDO_DESC,
+ PseudoInstExpansion<(OR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def OR_V_D_PSEUDO : OR_V_D_PSEUDO_DESC,
+ PseudoInstExpansion<(OR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+
+def ORI_B : ORI_B_ENC, ORI_B_DESC;
+
+def PCKEV_B : PCKEV_B_ENC, PCKEV_B_DESC;
+def PCKEV_H : PCKEV_H_ENC, PCKEV_H_DESC;
+def PCKEV_W : PCKEV_W_ENC, PCKEV_W_DESC;
+def PCKEV_D : PCKEV_D_ENC, PCKEV_D_DESC;
+
+def PCKOD_B : PCKOD_B_ENC, PCKOD_B_DESC;
+def PCKOD_H : PCKOD_H_ENC, PCKOD_H_DESC;
+def PCKOD_W : PCKOD_W_ENC, PCKOD_W_DESC;
+def PCKOD_D : PCKOD_D_ENC, PCKOD_D_DESC;
+
+def PCNT_B : PCNT_B_ENC, PCNT_B_DESC;
+def PCNT_H : PCNT_H_ENC, PCNT_H_DESC;
+def PCNT_W : PCNT_W_ENC, PCNT_W_DESC;
+def PCNT_D : PCNT_D_ENC, PCNT_D_DESC;
+
+def SAT_S_B : SAT_S_B_ENC, SAT_S_B_DESC;
+def SAT_S_H : SAT_S_H_ENC, SAT_S_H_DESC;
+def SAT_S_W : SAT_S_W_ENC, SAT_S_W_DESC;
+def SAT_S_D : SAT_S_D_ENC, SAT_S_D_DESC;
+
+def SAT_U_B : SAT_U_B_ENC, SAT_U_B_DESC;
+def SAT_U_H : SAT_U_H_ENC, SAT_U_H_DESC;
+def SAT_U_W : SAT_U_W_ENC, SAT_U_W_DESC;
+def SAT_U_D : SAT_U_D_ENC, SAT_U_D_DESC;
+
+def SHF_B : SHF_B_ENC, SHF_B_DESC;
+def SHF_H : SHF_H_ENC, SHF_H_DESC;
+def SHF_W : SHF_W_ENC, SHF_W_DESC;
+
+def SLD_B : SLD_B_ENC, SLD_B_DESC;
+def SLD_H : SLD_H_ENC, SLD_H_DESC;
+def SLD_W : SLD_W_ENC, SLD_W_DESC;
+def SLD_D : SLD_D_ENC, SLD_D_DESC;
+
+def SLDI_B : SLDI_B_ENC, SLDI_B_DESC;
+def SLDI_H : SLDI_H_ENC, SLDI_H_DESC;
+def SLDI_W : SLDI_W_ENC, SLDI_W_DESC;
+def SLDI_D : SLDI_D_ENC, SLDI_D_DESC;
+
+def SLL_B : SLL_B_ENC, SLL_B_DESC;
+def SLL_H : SLL_H_ENC, SLL_H_DESC;
+def SLL_W : SLL_W_ENC, SLL_W_DESC;
+def SLL_D : SLL_D_ENC, SLL_D_DESC;
+
+def SLLI_B : SLLI_B_ENC, SLLI_B_DESC;
+def SLLI_H : SLLI_H_ENC, SLLI_H_DESC;
+def SLLI_W : SLLI_W_ENC, SLLI_W_DESC;
+def SLLI_D : SLLI_D_ENC, SLLI_D_DESC;
+
+def SPLAT_B : SPLAT_B_ENC, SPLAT_B_DESC;
+def SPLAT_H : SPLAT_H_ENC, SPLAT_H_DESC;
+def SPLAT_W : SPLAT_W_ENC, SPLAT_W_DESC;
+def SPLAT_D : SPLAT_D_ENC, SPLAT_D_DESC;
+
+def SPLATI_B : SPLATI_B_ENC, SPLATI_B_DESC;
+def SPLATI_H : SPLATI_H_ENC, SPLATI_H_DESC;
+def SPLATI_W : SPLATI_W_ENC, SPLATI_W_DESC;
+def SPLATI_D : SPLATI_D_ENC, SPLATI_D_DESC;
+
+def SRA_B : SRA_B_ENC, SRA_B_DESC;
+def SRA_H : SRA_H_ENC, SRA_H_DESC;
+def SRA_W : SRA_W_ENC, SRA_W_DESC;
+def SRA_D : SRA_D_ENC, SRA_D_DESC;
+
+def SRAI_B : SRAI_B_ENC, SRAI_B_DESC;
+def SRAI_H : SRAI_H_ENC, SRAI_H_DESC;
+def SRAI_W : SRAI_W_ENC, SRAI_W_DESC;
+def SRAI_D : SRAI_D_ENC, SRAI_D_DESC;
+
+def SRAR_B : SRAR_B_ENC, SRAR_B_DESC;
+def SRAR_H : SRAR_H_ENC, SRAR_H_DESC;
+def SRAR_W : SRAR_W_ENC, SRAR_W_DESC;
+def SRAR_D : SRAR_D_ENC, SRAR_D_DESC;
+
+def SRARI_B : SRARI_B_ENC, SRARI_B_DESC;
+def SRARI_H : SRARI_H_ENC, SRARI_H_DESC;
+def SRARI_W : SRARI_W_ENC, SRARI_W_DESC;
+def SRARI_D : SRARI_D_ENC, SRARI_D_DESC;
+
+def SRL_B : SRL_B_ENC, SRL_B_DESC;
+def SRL_H : SRL_H_ENC, SRL_H_DESC;
+def SRL_W : SRL_W_ENC, SRL_W_DESC;
+def SRL_D : SRL_D_ENC, SRL_D_DESC;
+
+def SRLI_B : SRLI_B_ENC, SRLI_B_DESC;
+def SRLI_H : SRLI_H_ENC, SRLI_H_DESC;
+def SRLI_W : SRLI_W_ENC, SRLI_W_DESC;
+def SRLI_D : SRLI_D_ENC, SRLI_D_DESC;
+
+def SRLR_B : SRLR_B_ENC, SRLR_B_DESC;
+def SRLR_H : SRLR_H_ENC, SRLR_H_DESC;
+def SRLR_W : SRLR_W_ENC, SRLR_W_DESC;
+def SRLR_D : SRLR_D_ENC, SRLR_D_DESC;
+
+def SRLRI_B : SRLRI_B_ENC, SRLRI_B_DESC;
+def SRLRI_H : SRLRI_H_ENC, SRLRI_H_DESC;
+def SRLRI_W : SRLRI_W_ENC, SRLRI_W_DESC;
+def SRLRI_D : SRLRI_D_ENC, SRLRI_D_DESC;
+
+def ST_B: ST_B_ENC, ST_B_DESC;
+def ST_H: ST_H_ENC, ST_H_DESC;
+def ST_W: ST_W_ENC, ST_W_DESC;
+def ST_D: ST_D_ENC, ST_D_DESC;
+
+def SUBS_S_B : SUBS_S_B_ENC, SUBS_S_B_DESC;
+def SUBS_S_H : SUBS_S_H_ENC, SUBS_S_H_DESC;
+def SUBS_S_W : SUBS_S_W_ENC, SUBS_S_W_DESC;
+def SUBS_S_D : SUBS_S_D_ENC, SUBS_S_D_DESC;
+
+def SUBS_U_B : SUBS_U_B_ENC, SUBS_U_B_DESC;
+def SUBS_U_H : SUBS_U_H_ENC, SUBS_U_H_DESC;
+def SUBS_U_W : SUBS_U_W_ENC, SUBS_U_W_DESC;
+def SUBS_U_D : SUBS_U_D_ENC, SUBS_U_D_DESC;
+
+def SUBSUS_U_B : SUBSUS_U_B_ENC, SUBSUS_U_B_DESC;
+def SUBSUS_U_H : SUBSUS_U_H_ENC, SUBSUS_U_H_DESC;
+def SUBSUS_U_W : SUBSUS_U_W_ENC, SUBSUS_U_W_DESC;
+def SUBSUS_U_D : SUBSUS_U_D_ENC, SUBSUS_U_D_DESC;
+
+def SUBSUU_S_B : SUBSUU_S_B_ENC, SUBSUU_S_B_DESC;
+def SUBSUU_S_H : SUBSUU_S_H_ENC, SUBSUU_S_H_DESC;
+def SUBSUU_S_W : SUBSUU_S_W_ENC, SUBSUU_S_W_DESC;
+def SUBSUU_S_D : SUBSUU_S_D_ENC, SUBSUU_S_D_DESC;
+
+def SUBV_B : SUBV_B_ENC, SUBV_B_DESC;
+def SUBV_H : SUBV_H_ENC, SUBV_H_DESC;
+def SUBV_W : SUBV_W_ENC, SUBV_W_DESC;
+def SUBV_D : SUBV_D_ENC, SUBV_D_DESC;
+
+def SUBVI_B : SUBVI_B_ENC, SUBVI_B_DESC;
+def SUBVI_H : SUBVI_H_ENC, SUBVI_H_DESC;
+def SUBVI_W : SUBVI_W_ENC, SUBVI_W_DESC;
+def SUBVI_D : SUBVI_D_ENC, SUBVI_D_DESC;
+
+def VSHF_B : VSHF_B_ENC, VSHF_B_DESC;
+def VSHF_H : VSHF_H_ENC, VSHF_H_DESC;
+def VSHF_W : VSHF_W_ENC, VSHF_W_DESC;
+def VSHF_D : VSHF_D_ENC, VSHF_D_DESC;
+
+def XOR_V : XOR_V_ENC, XOR_V_DESC;
+def XOR_V_H_PSEUDO : XOR_V_H_PSEUDO_DESC,
+ PseudoInstExpansion<(XOR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def XOR_V_W_PSEUDO : XOR_V_W_PSEUDO_DESC,
+ PseudoInstExpansion<(XOR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def XOR_V_D_PSEUDO : XOR_V_D_PSEUDO_DESC,
+ PseudoInstExpansion<(XOR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+
+def XORI_B : XORI_B_ENC, XORI_B_DESC;
+
+// Patterns.
+class MSAPat<dag pattern, dag result, list<Predicate> pred = [HasMSA]> :
+ Pat<pattern, result>, Requires<pred>;
+
+def : MSAPat<(extractelt (v4i32 MSA128W:$ws), immZExt4:$idx),
+ (COPY_S_W MSA128W:$ws, immZExt4:$idx)>;
+
+def : MSAPat<(v16i8 (load addr:$addr)), (LD_B addr:$addr)>;
+def : MSAPat<(v8i16 (load addr:$addr)), (LD_H addr:$addr)>;
+def : MSAPat<(v4i32 (load addr:$addr)), (LD_W addr:$addr)>;
+def : MSAPat<(v2i64 (load addr:$addr)), (LD_D addr:$addr)>;
+def : MSAPat<(v8f16 (load addr:$addr)), (LD_H addr:$addr)>;
+def : MSAPat<(v4f32 (load addr:$addr)), (LD_W addr:$addr)>;
+def : MSAPat<(v2f64 (load addr:$addr)), (LD_D addr:$addr)>;
+
+def : MSAPat<(v8f16 (load addrRegImm:$addr)), (LD_H addrRegImm:$addr)>;
+def : MSAPat<(v4f32 (load addrRegImm:$addr)), (LD_W addrRegImm:$addr)>;
+def : MSAPat<(v2f64 (load addrRegImm:$addr)), (LD_D addrRegImm:$addr)>;
+
+def : MSAPat<(store (v16i8 MSA128B:$ws), addr:$addr),
+ (ST_B MSA128B:$ws, addr:$addr)>;
+def : MSAPat<(store (v8i16 MSA128H:$ws), addr:$addr),
+ (ST_H MSA128H:$ws, addr:$addr)>;
+def : MSAPat<(store (v4i32 MSA128W:$ws), addr:$addr),
+ (ST_W MSA128W:$ws, addr:$addr)>;
+def : MSAPat<(store (v2i64 MSA128D:$ws), addr:$addr),
+ (ST_D MSA128D:$ws, addr:$addr)>;
+def : MSAPat<(store (v8f16 MSA128H:$ws), addr:$addr),
+ (ST_H MSA128H:$ws, addr:$addr)>;
+def : MSAPat<(store (v4f32 MSA128W:$ws), addr:$addr),
+ (ST_W MSA128W:$ws, addr:$addr)>;
+def : MSAPat<(store (v2f64 MSA128D:$ws), addr:$addr),
+ (ST_D MSA128D:$ws, addr:$addr)>;
+
+def ST_FH : MSAPat<(store (v8f16 MSA128H:$ws), addrRegImm:$addr),
+ (ST_H MSA128H:$ws, addrRegImm:$addr)>;
+def ST_FW : MSAPat<(store (v4f32 MSA128W:$ws), addrRegImm:$addr),
+ (ST_W MSA128W:$ws, addrRegImm:$addr)>;
+def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addrRegImm:$addr),
+ (ST_D MSA128D:$ws, addrRegImm:$addr)>;
+
+class MSA_FABS_PSEUDO_DESC_BASE<RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> :
+ MipsPseudo<(outs ROWD:$wd),
+ (ins ROWS:$ws),
+ [(set ROWD:$wd, (fabs ROWS:$ws))]> {
+ InstrItinClass Itinerary = itin;
+}
+def FABS_W : MSA_FABS_PSEUDO_DESC_BASE<MSA128WOpnd>,
+ PseudoInstExpansion<(FMAX_A_W MSA128WOpnd:$wd, MSA128WOpnd:$ws,
+ MSA128WOpnd:$ws)>;
+def FABS_D : MSA_FABS_PSEUDO_DESC_BASE<MSA128DOpnd>,
+ PseudoInstExpansion<(FMAX_A_D MSA128DOpnd:$wd, MSA128DOpnd:$ws,
+ MSA128DOpnd:$ws)>;
+
+class MSABitconvertPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC, list<Predicate> preds = [HasMSA]> :
+ MSAPat<(DstVT (bitconvert SrcVT:$src)),
+ (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>;
+
+// These are endian-independant because the element size doesnt change
+def : MSABitconvertPat<v8i16, v8f16, MSA128H>;
+def : MSABitconvertPat<v4i32, v4f32, MSA128W>;
+def : MSABitconvertPat<v2i64, v2f64, MSA128D>;
+def : MSABitconvertPat<v8f16, v8i16, MSA128H>;
+def : MSABitconvertPat<v4f32, v4i32, MSA128W>;
+def : MSABitconvertPat<v2f64, v2i64, MSA128D>;
+
+// Little endian bitcasts are always no-ops
+def : MSABitconvertPat<v16i8, v8i16, MSA128B, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v16i8, v4i32, MSA128B, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v16i8, v2i64, MSA128B, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v16i8, v8f16, MSA128B, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v16i8, v4f32, MSA128B, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v16i8, v2f64, MSA128B, [HasMSA, IsLE]>;
+
+def : MSABitconvertPat<v8i16, v16i8, MSA128H, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v8i16, v4i32, MSA128H, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v8i16, v2i64, MSA128H, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v8i16, v4f32, MSA128H, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v8i16, v2f64, MSA128H, [HasMSA, IsLE]>;
+
+def : MSABitconvertPat<v4i32, v16i8, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4i32, v8i16, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4i32, v2i64, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4i32, v8f16, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4i32, v2f64, MSA128W, [HasMSA, IsLE]>;
+
+def : MSABitconvertPat<v2i64, v16i8, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2i64, v8i16, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2i64, v4i32, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2i64, v8f16, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2i64, v4f32, MSA128D, [HasMSA, IsLE]>;
+
+def : MSABitconvertPat<v4f32, v16i8, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4f32, v8i16, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4f32, v2i64, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4f32, v8f16, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4f32, v2f64, MSA128W, [HasMSA, IsLE]>;
+
+def : MSABitconvertPat<v2f64, v16i8, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2f64, v8i16, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2f64, v4i32, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2f64, v8f16, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2f64, v4f32, MSA128D, [HasMSA, IsLE]>;
+
+// Big endian bitcasts expand to shuffle instructions.
+// This is because bitcast is defined to be a store/load sequence and the
+// vector store/load instructions are mixed-endian with respect to the vector
+// as a whole (little endian with respect to element order, but big endian
+// elements).
+
+class MSABitconvertReverseQuartersPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC, MSAInst Insn,
+ RegisterClass ViaRC> :
+ MSAPat<(DstVT (bitconvert SrcVT:$src)),
+ (COPY_TO_REGCLASS (Insn (COPY_TO_REGCLASS SrcVT:$src, ViaRC), 27),
+ DstRC),
+ [HasMSA, IsBE]>;
+
+class MSABitconvertReverseHalvesPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC, MSAInst Insn,
+ RegisterClass ViaRC> :
+ MSAPat<(DstVT (bitconvert SrcVT:$src)),
+ (COPY_TO_REGCLASS (Insn (COPY_TO_REGCLASS SrcVT:$src, ViaRC), 177),
+ DstRC),
+ [HasMSA, IsBE]>;
+
+class MSABitconvertReverseBInHPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC> :
+ MSABitconvertReverseHalvesPat<DstVT, SrcVT, DstRC, SHF_B, MSA128B>;
+
+class MSABitconvertReverseBInWPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC> :
+ MSABitconvertReverseQuartersPat<DstVT, SrcVT, DstRC, SHF_B, MSA128B>;
+
+class MSABitconvertReverseBInDPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC> :
+ MSAPat<(DstVT (bitconvert SrcVT:$src)),
+ (COPY_TO_REGCLASS
+ (SHF_W
+ (COPY_TO_REGCLASS
+ (SHF_B (COPY_TO_REGCLASS SrcVT:$src, MSA128B), 27),
+ MSA128W), 177),
+ DstRC),
+ [HasMSA, IsBE]>;
+
+class MSABitconvertReverseHInWPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC> :
+ MSABitconvertReverseHalvesPat<DstVT, SrcVT, DstRC, SHF_H, MSA128H>;
+
+class MSABitconvertReverseHInDPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC> :
+ MSABitconvertReverseQuartersPat<DstVT, SrcVT, DstRC, SHF_H, MSA128H>;
+
+class MSABitconvertReverseWInDPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC> :
+ MSABitconvertReverseHalvesPat<DstVT, SrcVT, DstRC, SHF_W, MSA128W>;
+
+def : MSABitconvertReverseBInHPat<v8i16, v16i8, MSA128H>;
+def : MSABitconvertReverseBInHPat<v8f16, v16i8, MSA128H>;
+def : MSABitconvertReverseBInWPat<v4i32, v16i8, MSA128W>;
+def : MSABitconvertReverseBInWPat<v4f32, v16i8, MSA128W>;
+def : MSABitconvertReverseBInDPat<v2i64, v16i8, MSA128D>;
+def : MSABitconvertReverseBInDPat<v2f64, v16i8, MSA128D>;
+
+def : MSABitconvertReverseBInHPat<v16i8, v8i16, MSA128B>;
+def : MSABitconvertReverseHInWPat<v4i32, v8i16, MSA128W>;
+def : MSABitconvertReverseHInWPat<v4f32, v8i16, MSA128W>;
+def : MSABitconvertReverseHInDPat<v2i64, v8i16, MSA128D>;
+def : MSABitconvertReverseHInDPat<v2f64, v8i16, MSA128D>;
+
+def : MSABitconvertReverseBInHPat<v16i8, v8f16, MSA128B>;
+def : MSABitconvertReverseHInWPat<v4i32, v8f16, MSA128W>;
+def : MSABitconvertReverseHInWPat<v4f32, v8f16, MSA128W>;
+def : MSABitconvertReverseHInDPat<v2i64, v8f16, MSA128D>;
+def : MSABitconvertReverseHInDPat<v2f64, v8f16, MSA128D>;
+
+def : MSABitconvertReverseBInWPat<v16i8, v4i32, MSA128B>;
+def : MSABitconvertReverseHInWPat<v8i16, v4i32, MSA128H>;
+def : MSABitconvertReverseHInWPat<v8f16, v4i32, MSA128H>;
+def : MSABitconvertReverseWInDPat<v2i64, v4i32, MSA128D>;
+def : MSABitconvertReverseWInDPat<v2f64, v4i32, MSA128D>;
+
+def : MSABitconvertReverseBInWPat<v16i8, v4f32, MSA128B>;
+def : MSABitconvertReverseHInWPat<v8i16, v4f32, MSA128H>;
+def : MSABitconvertReverseHInWPat<v8f16, v4f32, MSA128H>;
+def : MSABitconvertReverseWInDPat<v2i64, v4f32, MSA128D>;
+def : MSABitconvertReverseWInDPat<v2f64, v4f32, MSA128D>;
+
+def : MSABitconvertReverseBInDPat<v16i8, v2i64, MSA128B>;
+def : MSABitconvertReverseHInDPat<v8i16, v2i64, MSA128H>;
+def : MSABitconvertReverseHInDPat<v8f16, v2i64, MSA128H>;
+def : MSABitconvertReverseWInDPat<v4i32, v2i64, MSA128W>;
+def : MSABitconvertReverseWInDPat<v4f32, v2i64, MSA128W>;
+
+def : MSABitconvertReverseBInDPat<v16i8, v2f64, MSA128B>;
+def : MSABitconvertReverseHInDPat<v8i16, v2f64, MSA128H>;
+def : MSABitconvertReverseHInDPat<v8f16, v2f64, MSA128H>;
+def : MSABitconvertReverseWInDPat<v4i32, v2f64, MSA128W>;
+def : MSABitconvertReverseWInDPat<v4f32, v2f64, MSA128W>;
+
+// Pseudos used to implement BNZ.df, and BZ.df
+
+class MSA_CBRANCH_PSEUDO_DESC_BASE<SDPatternOperator OpNode, ValueType TyNode,
+ RegisterClass RCWS,
+ InstrItinClass itin = NoItinerary> :
+ MipsPseudo<(outs GPR32:$dst),
+ (ins RCWS:$ws),
+ [(set GPR32:$dst, (OpNode (TyNode RCWS:$ws)))]> {
+ bit usesCustomInserter = 1;
+}
+
+def SNZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v16i8,
+ MSA128B, NoItinerary>;
+def SNZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v8i16,
+ MSA128H, NoItinerary>;
+def SNZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v4i32,
+ MSA128W, NoItinerary>;
+def SNZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v2i64,
+ MSA128D, NoItinerary>;
+def SNZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyNonZero, v16i8,
+ MSA128B, NoItinerary>;
+
+def SZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v16i8,
+ MSA128B, NoItinerary>;
+def SZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v8i16,
+ MSA128H, NoItinerary>;
+def SZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v4i32,
+ MSA128W, NoItinerary>;
+def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v2i64,
+ MSA128D, NoItinerary>;
+def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8,
+ MSA128B, NoItinerary>;
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index a7299d7..dedf802 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -22,6 +23,53 @@ static cl::opt<bool>
FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
cl::desc("Always use $gp as the global base register."));
+// class MipsCallEntry.
+MipsCallEntry::MipsCallEntry(const StringRef &N) {
+#ifndef NDEBUG
+ Name = N;
+ Val = 0;
+#endif
+}
+
+MipsCallEntry::MipsCallEntry(const GlobalValue *V) {
+#ifndef NDEBUG
+ Val = V;
+#endif
+}
+
+bool MipsCallEntry::isConstant(const MachineFrameInfo *) const {
+ return false;
+}
+
+bool MipsCallEntry::isAliased(const MachineFrameInfo *) const {
+ return false;
+}
+
+bool MipsCallEntry::mayAlias(const MachineFrameInfo *) const {
+ return false;
+}
+
+void MipsCallEntry::printCustom(raw_ostream &O) const {
+ O << "MipsCallEntry: ";
+#ifndef NDEBUG
+ if (Val)
+ O << Val->getName();
+ else
+ O << Name;
+#endif
+}
+
+MipsFunctionInfo::~MipsFunctionInfo() {
+ for (StringMap<const MipsCallEntry *>::iterator
+ I = ExternalCallEntries.begin(), E = ExternalCallEntries.end(); I != E;
+ ++I)
+ delete I->getValue();
+
+ for (ValueMap<const GlobalValue *, const MipsCallEntry *>::iterator
+ I = GlobalCallEntries.begin(), E = GlobalCallEntries.end(); I != E; ++I)
+ delete I->second;
+}
+
bool MipsFunctionInfo::globalBaseRegSet() const {
return GlobalBaseReg;
}
@@ -72,4 +120,22 @@ bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
|| FI == EhDataRegFI[2] || FI == EhDataRegFI[3]);
}
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(const StringRef &Name) {
+ const MipsCallEntry *&E = ExternalCallEntries[Name];
+
+ if (!E)
+ E = new MipsCallEntry(Name);
+
+ return MachinePointerInfo(E);
+}
+
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) {
+ const MipsCallEntry *&E = GlobalCallEntries[Val];
+
+ if (!E)
+ E = new MipsCallEntry(Val);
+
+ return MachinePointerInfo(E);
+}
+
void MipsFunctionInfo::anchor() { }
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index b05b348..43bf682 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -15,56 +15,48 @@
#define MIPS_MACHINE_FUNCTION_INFO_H
#include "MipsSubtarget.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ValueMap.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include <utility>
namespace llvm {
+/// \brief A class derived from PseudoSourceValue that represents a GOT entry
+/// resolved by lazy-binding.
+class MipsCallEntry : public PseudoSourceValue {
+public:
+ explicit MipsCallEntry(const StringRef &N);
+ explicit MipsCallEntry(const GlobalValue *V);
+ virtual bool isConstant(const MachineFrameInfo *) const;
+ virtual bool isAliased(const MachineFrameInfo *) const;
+ virtual bool mayAlias(const MachineFrameInfo *) const;
+
+private:
+ virtual void printCustom(raw_ostream &O) const;
+#ifndef NDEBUG
+ std::string Name;
+ const GlobalValue *Val;
+#endif
+};
+
/// MipsFunctionInfo - This class is derived from MachineFunction private
/// Mips target-specific information for each MachineFunction.
class MipsFunctionInfo : public MachineFunctionInfo {
- virtual void anchor();
-
- MachineFunction& MF;
- /// SRetReturnReg - Some subtargets require that sret lowering includes
- /// returning the value of the returned struct in a register. This field
- /// holds the virtual register into which the sret argument is passed.
- unsigned SRetReturnReg;
-
- /// GlobalBaseReg - keeps track of the virtual register initialized for
- /// use as the global base register. This is used for PIC in some PIC
- /// relocation models.
- unsigned GlobalBaseReg;
-
- /// Mips16SPAliasReg - keeps track of the virtual register initialized for
- /// use as an alias for SP for use in load/store of halfword/byte from/to
- /// the stack
- unsigned Mips16SPAliasReg;
-
- /// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
-
- /// True if function has a byval argument.
- bool HasByvalArg;
-
- /// Size of incoming argument area.
- unsigned IncomingArgSize;
-
- /// CallsEhReturn - Whether the function calls llvm.eh.return.
- bool CallsEhReturn;
-
- /// Frame objects for spilling eh data registers.
- int EhDataRegFI[4];
-
public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0),
VarArgsFrameIndex(0), CallsEhReturn(false)
{}
+ ~MipsFunctionInfo();
+
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
@@ -92,6 +84,51 @@ public:
int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; }
bool isEhDataRegFI(int FI) const;
+ /// \brief Create a MachinePointerInfo that has a MipsCallEntr object
+ /// representing a GOT entry for an external function.
+ MachinePointerInfo callPtrInfo(const StringRef &Name);
+
+ /// \brief Create a MachinePointerInfo that has a MipsCallEntr object
+ /// representing a GOT entry for a global function.
+ MachinePointerInfo callPtrInfo(const GlobalValue *Val);
+
+private:
+ virtual void anchor();
+
+ MachineFunction& MF;
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ /// Mips16SPAliasReg - keeps track of the virtual register initialized for
+ /// use as an alias for SP for use in load/store of halfword/byte from/to
+ /// the stack
+ unsigned Mips16SPAliasReg;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ /// True if function has a byval argument.
+ bool HasByvalArg;
+
+ /// Size of incoming argument area.
+ unsigned IncomingArgSize;
+
+ /// CallsEhReturn - Whether the function calls llvm.eh.return.
+ bool CallsEhReturn;
+
+ /// Frame objects for spilling eh data registers.
+ int EhDataRegFI[4];
+
+ /// MipsCallEntry maps.
+ StringMap<const MipsCallEntry *> ExternalCallEntries;
+ ValueMap<const GlobalValue *, const MipsCallEntry *> GlobalCallEntries;
};
} // end of namespace llvm
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 1919077..fe60841 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -14,9 +14,17 @@
#define DEBUG_TYPE "mips-os16"
#include "MipsOs16.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+
+static cl::opt<std::string> Mips32FunctionMask(
+ "mips32-function-mask",
+ cl::init(""),
+ cl::desc("Force function to be mips32"),
+ cl::Hidden);
+
namespace {
// Figure out if we need float point based on the function signature.
@@ -85,18 +93,43 @@ namespace llvm {
bool MipsOs16::runOnModule(Module &M) {
- DEBUG(errs() << "Run on Module MipsOs16\n");
+ bool usingMask = Mips32FunctionMask.length() > 0;
+ bool doneUsingMask = false; // this will make it stop repeating
+ DEBUG(dbgs() << "Run on Module MipsOs16 \n" << Mips32FunctionMask << "\n");
+ if (usingMask)
+ DEBUG(dbgs() << "using mask \n" << Mips32FunctionMask << "\n");
+ unsigned int functionIndex = 0;
bool modified = false;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
if (F->isDeclaration()) continue;
DEBUG(dbgs() << "Working on " << F->getName() << "\n");
- if (needsFP(*F)) {
- DEBUG(dbgs() << " need to compile as nomips16 \n");
- F->addFnAttr("nomips16");
+ if (usingMask) {
+ if (!doneUsingMask) {
+ if (functionIndex == Mips32FunctionMask.length())
+ functionIndex = 0;
+ switch (Mips32FunctionMask[functionIndex]) {
+ case '1':
+ DEBUG(dbgs() << "mask forced mips32: " << F->getName() << "\n");
+ F->addFnAttr("nomips16");
+ break;
+ case '.':
+ doneUsingMask = true;
+ break;
+ default:
+ break;
+ }
+ functionIndex++;
+ }
}
else {
- F->addFnAttr("mips16");
- DEBUG(dbgs() << " no need to compile as nomips16 \n");
+ if (needsFP(*F)) {
+ DEBUG(dbgs() << "os16 forced mips32: " << F->getName() << "\n");
+ F->addFnAttr("nomips16");
+ }
+ else {
+ DEBUG(dbgs() << "os16 forced mips16: " << F->getName() << "\n");
+ F->addFnAttr("mips16");
+ }
}
}
return modified;
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 0b5fc33..3105b02 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -47,6 +47,11 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST)
unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
+const TargetRegisterClass *
+MipsRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ return Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+}
unsigned
MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
@@ -56,7 +61,7 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
return 0;
case Mips::GPR32RegClassID:
case Mips::GPR64RegClassID:
- case Mips::DSPRegsRegClassID: {
+ case Mips::DSPRRegClassID: {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
return 28 - TFI->hasFP(MF);
}
@@ -78,26 +83,34 @@ const uint16_t* MipsRegisterInfo::
getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_SaveList;
- else if (!Subtarget.hasMips64())
- return CSR_O32_SaveList;
- else if (Subtarget.isABI_N32())
+
+ if (Subtarget.isABI_N64())
+ return CSR_N64_SaveList;
+
+ if (Subtarget.isABI_N32())
return CSR_N32_SaveList;
- assert(Subtarget.isABI_N64());
- return CSR_N64_SaveList;
+ if (Subtarget.isFP64bit())
+ return CSR_O32_FP64_SaveList;
+
+ return CSR_O32_SaveList;
}
const uint32_t*
MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_RegMask;
- else if (!Subtarget.hasMips64())
- return CSR_O32_RegMask;
- else if (Subtarget.isABI_N32())
+
+ if (Subtarget.isABI_N64())
+ return CSR_N64_RegMask;
+
+ if (Subtarget.isABI_N32())
return CSR_N32_RegMask;
- assert(Subtarget.isABI_N64());
- return CSR_N64_RegMask;
+ if (Subtarget.isFP64bit())
+ return CSR_O32_FP64_RegMask;
+
+ return CSR_O32_RegMask;
}
const uint32_t *MipsRegisterInfo::getMips16RetHelperMask() {
@@ -123,7 +136,7 @@ getReservedRegs(const MachineFunction &MF) const {
for (unsigned I = 0; I < array_lengthof(ReservedGPR64); ++I)
Reserved.set(ReservedGPR64[I]);
- if (Subtarget.hasMips64()) {
+ if (Subtarget.isFP64bit()) {
// Reserve all registers in AFGR64.
for (RegIter Reg = Mips::AFGR64RegClass.begin(),
EReg = Mips::AFGR64RegClass.end(); Reg != EReg; ++Reg)
@@ -146,7 +159,6 @@ getReservedRegs(const MachineFunction &MF) const {
// Reserve hardware registers.
Reserved.set(Mips::HWR29);
- Reserved.set(Mips::HWR29_64);
// Reserve DSP control register.
Reserved.set(Mips::DSPPos);
@@ -155,6 +167,16 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(Mips::DSPEFI);
Reserved.set(Mips::DSPOutFlag);
+ // Reserve MSA control registers.
+ Reserved.set(Mips::MSAIR);
+ Reserved.set(Mips::MSACSR);
+ Reserved.set(Mips::MSAAccess);
+ Reserved.set(Mips::MSASave);
+ Reserved.set(Mips::MSAModify);
+ Reserved.set(Mips::MSARequest);
+ Reserved.set(Mips::MSAMap);
+ Reserved.set(Mips::MSAUnmap);
+
// Reserve RA if in mips16 mode.
if (Subtarget.inMips16Mode()) {
Reserved.set(Mips::RA);
@@ -218,12 +240,3 @@ getFrameRegister(const MachineFunction &MF) const {
}
-unsigned MipsRegisterInfo::
-getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
-}
-
-unsigned MipsRegisterInfo::
-getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
-}
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 20ba41d..0450c6f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -42,6 +42,9 @@ public:
void adjustMipsStackFrame(MachineFunction &MF) const;
/// Code Generation virtual methods...
+ const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const;
+
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
@@ -65,10 +68,6 @@ public:
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
- /// Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
-
/// \brief Return GPR register class.
virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index c72c30d..3173d09 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -11,9 +11,8 @@
// Declarations that describe the MIPS register file
//===----------------------------------------------------------------------===//
let Namespace = "Mips" in {
-def sub_fpeven : SubRegIndex<32>;
-def sub_fpodd : SubRegIndex<32, 32>;
def sub_32 : SubRegIndex<32>;
+def sub_64 : SubRegIndex<64>;
def sub_lo : SubRegIndex<32>;
def sub_hi : SubRegIndex<32, 32>;
def sub_dsp16_19 : SubRegIndex<4, 16>;
@@ -54,17 +53,24 @@ class FPR<bits<16> Enc, string n> : MipsReg<Enc, n>;
// Mips 64-bit (aliased) FPU Registers
class AFPR<bits<16> Enc, string n, list<Register> subregs>
: MipsRegWithSubRegs<Enc, n, subregs> {
- let SubRegIndices = [sub_fpeven, sub_fpodd];
+ let SubRegIndices = [sub_lo, sub_hi];
let CoveredBySubRegs = 1;
}
class AFPR64<bits<16> Enc, string n, list<Register> subregs>
: MipsRegWithSubRegs<Enc, n, subregs> {
- let SubRegIndices = [sub_32];
+ let SubRegIndices = [sub_lo, sub_hi];
+ let CoveredBySubRegs = 1;
+}
+
+// Mips 128-bit (aliased) MSA Registers
+class AFPR128<bits<16> Enc, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<Enc, n, subregs> {
+ let SubRegIndices = [sub_64];
}
// Accumulator Registers
-class ACC<bits<16> Enc, string n, list<Register> subregs>
+class ACCReg<bits<16> Enc, string n, list<Register> subregs>
: MipsRegWithSubRegs<Enc, n, subregs> {
let SubRegIndices = [sub_lo, sub_hi];
let CoveredBySubRegs = 1;
@@ -150,6 +156,10 @@ let Namespace = "Mips" in {
foreach I = 0-31 in
def F#I : FPR<I, "f"#I>, DwarfRegNum<[!add(I, 32)]>;
+ // Higher half of 64-bit FP registers.
+ foreach I = 0-31 in
+ def F_HI#I : FPR<I, "f"#I>, DwarfRegNum<[!add(I, 32)]>;
+
/// Mips Double point precision FPU Registers (aliased
/// with the single precision to hold 64 bit values)
foreach I = 0-15 in
@@ -159,22 +169,28 @@ let Namespace = "Mips" in {
/// Mips Double point precision FPU Registers in MFP64 mode.
foreach I = 0-31 in
- def D#I#_64 : AFPR64<I, "f"#I, [!cast<FPR>("F"#I)]>,
+ def D#I#_64 : AFPR64<I, "f"#I, [!cast<FPR>("F"#I), !cast<FPR>("F_HI"#I)]>,
DwarfRegNum<[!add(I, 32)]>;
+ /// Mips MSA registers
+ /// MSA and FPU cannot both be present unless the FPU has 64-bit registers
+ foreach I = 0-31 in
+ def W#I : AFPR128<I, "w"#I, [!cast<AFPR64>("D"#I#"_64")]>,
+ DwarfRegNum<[!add(I, 32)]>;
+
// Hi/Lo registers
- def HI : Register<"ac0">, DwarfRegNum<[64]>;
- def HI1 : Register<"ac1">, DwarfRegNum<[176]>;
- def HI2 : Register<"ac2">, DwarfRegNum<[178]>;
- def HI3 : Register<"ac3">, DwarfRegNum<[180]>;
- def LO : Register<"ac0">, DwarfRegNum<[65]>;
- def LO1 : Register<"ac1">, DwarfRegNum<[177]>;
- def LO2 : Register<"ac2">, DwarfRegNum<[179]>;
- def LO3 : Register<"ac3">, DwarfRegNum<[181]>;
+ def HI0 : MipsReg<0, "ac0">, DwarfRegNum<[64]>;
+ def HI1 : MipsReg<1, "ac1">, DwarfRegNum<[176]>;
+ def HI2 : MipsReg<2, "ac2">, DwarfRegNum<[178]>;
+ def HI3 : MipsReg<3, "ac3">, DwarfRegNum<[180]>;
+ def LO0 : MipsReg<0, "ac0">, DwarfRegNum<[65]>;
+ def LO1 : MipsReg<1, "ac1">, DwarfRegNum<[177]>;
+ def LO2 : MipsReg<2, "ac2">, DwarfRegNum<[179]>;
+ def LO3 : MipsReg<3, "ac3">, DwarfRegNum<[181]>;
let SubRegIndices = [sub_32] in {
- def HI64 : RegisterWithSubRegs<"hi", [HI]>;
- def LO64 : RegisterWithSubRegs<"lo", [LO]>;
+ def HI0_64 : RegisterWithSubRegs<"hi", [HI0]>;
+ def LO0_64 : RegisterWithSubRegs<"lo", [LO0]>;
}
// FP control registers.
@@ -185,20 +201,22 @@ let Namespace = "Mips" in {
foreach I = 0-7 in
def FCC#I : MipsReg<#I, "fcc"#I>;
+ // COP2 registers.
+ foreach I = 0-31 in
+ def COP2#I : MipsReg<#I, ""#I>;
+
// PC register
def PC : Register<"pc">;
// Hardware register $29
def HWR29 : MipsReg<29, "29">;
- def HWR29_64 : MipsReg<29, "29">;
// Accum registers
- def AC0 : ACC<0, "ac0", [LO, HI]>;
- def AC1 : ACC<1, "ac1", [LO1, HI1]>;
- def AC2 : ACC<2, "ac2", [LO2, HI2]>;
- def AC3 : ACC<3, "ac3", [LO3, HI3]>;
+ foreach I = 0-3 in
+ def AC#I : ACCReg<#I, "ac"#I,
+ [!cast<Register>("LO"#I), !cast<Register>("HI"#I)]>;
- def AC0_64 : ACC<0, "ac0", [LO64, HI64]>;
+ def AC0_64 : ACCReg<0, "ac0", [LO0_64, HI0_64]>;
// DSP-ASE control register fields.
def DSPPos : Register<"">;
@@ -217,6 +235,16 @@ let Namespace = "Mips" in {
def DSPOutFlag : RegisterWithSubRegs<"", [DSPOutFlag16_19, DSPOutFlag20,
DSPOutFlag21, DSPOutFlag22,
DSPOutFlag23]>;
+
+ // MSA-ASE control registers.
+ def MSAIR : MipsReg<0, "0">;
+ def MSACSR : MipsReg<1, "1">;
+ def MSAAccess : MipsReg<2, "2">;
+ def MSASave : MipsReg<3, "3">;
+ def MSAModify : MipsReg<4, "4">;
+ def MSARequest : MipsReg<5, "5">;
+ def MSAMap : MipsReg<6, "6">;
+ def MSAUnmap : MipsReg<7, "7">;
}
//===----------------------------------------------------------------------===//
@@ -239,7 +267,7 @@ class GPR32Class<list<ValueType> regTypes> :
K0, K1, GP, SP, FP, RA)>;
def GPR32 : GPR32Class<[i32]>;
-def DSPRegs : GPR32Class<[v4i8, v2i16]>;
+def DSPR : GPR32Class<[v4i8, v2i16]>;
def GPR64 : RegisterClass<"Mips", [i64], 64, (add
// Reserved
@@ -281,6 +309,9 @@ def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable;
// * FGR32 - 32 32-bit registers (single float only mode)
def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>;
+def FGRH32 : RegisterClass<"Mips", [f32], 32, (sequence "F_HI%u", 0, 31)>,
+ Unallocatable;
+
def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
// Return Values and Arguments
D0, D1,
@@ -303,33 +334,48 @@ def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>,
def FCC : RegisterClass<"Mips", [i32], 32, (sequence "FCC%u", 0, 7)>,
Unallocatable;
+def MSA128B: RegisterClass<"Mips", [v16i8], 128,
+ (sequence "W%u", 0, 31)>;
+def MSA128H: RegisterClass<"Mips", [v8i16, v8f16], 128,
+ (sequence "W%u", 0, 31)>;
+def MSA128W: RegisterClass<"Mips", [v4i32, v4f32], 128,
+ (sequence "W%u", 0, 31)>;
+def MSA128D: RegisterClass<"Mips", [v2i64, v2f64], 128,
+ (sequence "W%u", 0, 31)>;
+
+def MSACtrl: RegisterClass<"Mips", [i32], 32, (add
+ MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap)>;
+
// Hi/Lo Registers
-def LORegs : RegisterClass<"Mips", [i32], 32, (add LO)>;
-def HIRegs : RegisterClass<"Mips", [i32], 32, (add HI)>;
-def LORegsDSP : RegisterClass<"Mips", [i32], 32, (add LO, LO1, LO2, LO3)>;
-def HIRegsDSP : RegisterClass<"Mips", [i32], 32, (add HI, HI1, HI2, HI3)>;
-def LORegs64 : RegisterClass<"Mips", [i64], 64, (add LO64)>;
-def HIRegs64 : RegisterClass<"Mips", [i64], 64, (add HI64)>;
+def LO32 : RegisterClass<"Mips", [i32], 32, (add LO0)>;
+def HI32 : RegisterClass<"Mips", [i32], 32, (add HI0)>;
+def LO32DSP : RegisterClass<"Mips", [i32], 32, (sequence "LO%u", 0, 3)>;
+def HI32DSP : RegisterClass<"Mips", [i32], 32, (sequence "HI%u", 0, 3)>;
+def LO64 : RegisterClass<"Mips", [i64], 64, (add LO0_64)>;
+def HI64 : RegisterClass<"Mips", [i64], 64, (add HI0_64)>;
// Hardware registers
def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
-def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable;
// Accumulator Registers
-def ACRegs : RegisterClass<"Mips", [untyped], 64, (add AC0)> {
+def ACC64 : RegisterClass<"Mips", [untyped], 64, (add AC0)> {
let Size = 64;
}
-def ACRegs128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> {
+def ACC128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> {
let Size = 128;
}
-def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
+def ACC64DSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
let Size = 64;
}
def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>;
+// Coprocessor 2 registers.
+def COP2 : RegisterClass<"Mips", [i32], 32, (sequence "COP2%u", 0, 31)>,
+ Unallocatable;
+
// Register Operands.
class MipsAsmRegOperand : AsmOperandClass {
@@ -345,9 +391,19 @@ def GPR64AsmOperand : MipsAsmRegOperand {
let ParserMethod = "parseGPR64";
}
-def ACRegsDSPAsmOperand : MipsAsmRegOperand {
- let Name = "ACRegsDSPAsm";
- let ParserMethod = "parseACRegsDSP";
+def ACC64DSPAsmOperand : MipsAsmRegOperand {
+ let Name = "ACC64DSPAsm";
+ let ParserMethod = "parseACC64DSP";
+}
+
+def LO32DSPAsmOperand : MipsAsmRegOperand {
+ let Name = "LO32DSPAsm";
+ let ParserMethod = "parseLO32DSP";
+}
+
+def HI32DSPAsmOperand : MipsAsmRegOperand {
+ let Name = "HI32DSPAsm";
+ let ParserMethod = "parseHI32DSP";
}
def CCRAsmOperand : MipsAsmRegOperand {
@@ -370,11 +426,41 @@ def FGR32AsmOperand : MipsAsmRegOperand {
let ParserMethod = "parseFGR32Regs";
}
+def FGRH32AsmOperand : MipsAsmRegOperand {
+ let Name = "FGRH32Asm";
+ let ParserMethod = "parseFGRH32Regs";
+}
+
def FCCRegsAsmOperand : MipsAsmRegOperand {
let Name = "FCCRegsAsm";
let ParserMethod = "parseFCCRegs";
}
+def MSA128BAsmOperand : MipsAsmRegOperand {
+ let Name = "MSA128BAsm";
+ let ParserMethod = "parseMSA128BRegs";
+}
+
+def MSA128HAsmOperand : MipsAsmRegOperand {
+ let Name = "MSA128HAsm";
+ let ParserMethod = "parseMSA128HRegs";
+}
+
+def MSA128WAsmOperand : MipsAsmRegOperand {
+ let Name = "MSA128WAsm";
+ let ParserMethod = "parseMSA128WRegs";
+}
+
+def MSA128DAsmOperand : MipsAsmRegOperand {
+ let Name = "MSA128DAsm";
+ let ParserMethod = "parseMSA128DRegs";
+}
+
+def MSA128CRAsmOperand : MipsAsmRegOperand {
+ let Name = "MSA128CRAsm";
+ let ParserMethod = "parseMSA128CtrlRegs";
+}
+
def GPR32Opnd : RegisterOperand<GPR32> {
let ParserMatchClass = GPR32AsmOperand;
}
@@ -383,6 +469,10 @@ def GPR64Opnd : RegisterOperand<GPR64> {
let ParserMatchClass = GPR64AsmOperand;
}
+def DSPROpnd : RegisterOperand<DSPR> {
+ let ParserMatchClass = GPR32AsmOperand;
+}
+
def CCROpnd : RegisterOperand<CCR> {
let ParserMatchClass = CCRAsmOperand;
}
@@ -392,35 +482,68 @@ def HWRegsAsmOperand : MipsAsmRegOperand {
let ParserMethod = "parseHWRegs";
}
-def HW64RegsAsmOperand : MipsAsmRegOperand {
- let Name = "HW64RegsAsm";
- let ParserMethod = "parseHW64Regs";
+def COP2AsmOperand : MipsAsmRegOperand {
+ let Name = "COP2Asm";
+ let ParserMethod = "parseCOP2";
}
def HWRegsOpnd : RegisterOperand<HWRegs> {
let ParserMatchClass = HWRegsAsmOperand;
}
-def HW64RegsOpnd : RegisterOperand<HWRegs64> {
- let ParserMatchClass = HW64RegsAsmOperand;
-}
-
-def AFGR64RegsOpnd : RegisterOperand<AFGR64> {
+def AFGR64Opnd : RegisterOperand<AFGR64> {
let ParserMatchClass = AFGR64AsmOperand;
}
-def FGR64RegsOpnd : RegisterOperand<FGR64> {
+def FGR64Opnd : RegisterOperand<FGR64> {
let ParserMatchClass = FGR64AsmOperand;
}
-def FGR32RegsOpnd : RegisterOperand<FGR32> {
+def FGR32Opnd : RegisterOperand<FGR32> {
let ParserMatchClass = FGR32AsmOperand;
}
+def FGRH32Opnd : RegisterOperand<FGRH32> {
+ let ParserMatchClass = FGRH32AsmOperand;
+}
+
def FCCRegsOpnd : RegisterOperand<FCC> {
let ParserMatchClass = FCCRegsAsmOperand;
}
-def ACRegsDSPOpnd : RegisterOperand<ACRegsDSP> {
- let ParserMatchClass = ACRegsDSPAsmOperand;
+def LO32DSPOpnd : RegisterOperand<LO32DSP> {
+ let ParserMatchClass = LO32DSPAsmOperand;
}
+
+def HI32DSPOpnd : RegisterOperand<HI32DSP> {
+ let ParserMatchClass = HI32DSPAsmOperand;
+}
+
+def ACC64DSPOpnd : RegisterOperand<ACC64DSP> {
+ let ParserMatchClass = ACC64DSPAsmOperand;
+}
+
+def COP2Opnd : RegisterOperand<COP2> {
+ let ParserMatchClass = COP2AsmOperand;
+}
+
+def MSA128BOpnd : RegisterOperand<MSA128B> {
+ let ParserMatchClass = MSA128BAsmOperand;
+}
+
+def MSA128HOpnd : RegisterOperand<MSA128H> {
+ let ParserMatchClass = MSA128HAsmOperand;
+}
+
+def MSA128WOpnd : RegisterOperand<MSA128W> {
+ let ParserMatchClass = MSA128WAsmOperand;
+}
+
+def MSA128DOpnd : RegisterOperand<MSA128D> {
+ let ParserMatchClass = MSA128DAsmOperand;
+}
+
+def MSA128CROpnd : RegisterOperand<MSACtrl> {
+ let ParserMatchClass = MSA128CRAsmOperand;
+}
+
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index d9e0fa4..33ed4b3 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -32,6 +32,21 @@ using namespace llvm;
namespace {
typedef MachineBasicBlock::iterator Iter;
+static std::pair<unsigned, unsigned> getMFHiLoOpc(unsigned Src) {
+ if (Mips::ACC64RegClass.contains(Src))
+ return std::make_pair((unsigned)Mips::PseudoMFHI,
+ (unsigned)Mips::PseudoMFLO);
+
+ if (Mips::ACC64DSPRegClass.contains(Src))
+ return std::make_pair((unsigned)Mips::MFHI_DSP, (unsigned)Mips::MFLO_DSP);
+
+ if (Mips::ACC128RegClass.contains(Src))
+ return std::make_pair((unsigned)Mips::PseudoMFHI64,
+ (unsigned)Mips::PseudoMFLO64);
+
+ return std::make_pair(0, 0);
+}
+
/// Helper class to expand pseudos.
class ExpandPseudo {
public:
@@ -43,10 +58,11 @@ private:
void expandLoadCCond(MachineBasicBlock &MBB, Iter I);
void expandStoreCCond(MachineBasicBlock &MBB, Iter I);
void expandLoadACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
- void expandStoreACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+ void expandStoreACC(MachineBasicBlock &MBB, Iter I, unsigned MFHiOpc,
+ unsigned MFLoOpc, unsigned RegSize);
bool expandCopy(MachineBasicBlock &MBB, Iter I);
- bool expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst,
- unsigned Src, unsigned RegSize);
+ bool expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned MFHiOpc,
+ unsigned MFLoOpc);
MachineFunction &MF;
MachineRegisterInfo &MRI;
@@ -70,32 +86,26 @@ bool ExpandPseudo::expand() {
bool ExpandPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
switch(I->getOpcode()) {
case Mips::LOAD_CCOND_DSP:
- case Mips::LOAD_CCOND_DSP_P8:
expandLoadCCond(MBB, I);
break;
case Mips::STORE_CCOND_DSP:
- case Mips::STORE_CCOND_DSP_P8:
expandStoreCCond(MBB, I);
break;
- case Mips::LOAD_AC64:
- case Mips::LOAD_AC64_P8:
- case Mips::LOAD_AC_DSP:
- case Mips::LOAD_AC_DSP_P8:
+ case Mips::LOAD_ACC64:
+ case Mips::LOAD_ACC64DSP:
expandLoadACC(MBB, I, 4);
break;
- case Mips::LOAD_AC128:
- case Mips::LOAD_AC128_P8:
+ case Mips::LOAD_ACC128:
expandLoadACC(MBB, I, 8);
break;
- case Mips::STORE_AC64:
- case Mips::STORE_AC64_P8:
- case Mips::STORE_AC_DSP:
- case Mips::STORE_AC_DSP_P8:
- expandStoreACC(MBB, I, 4);
+ case Mips::STORE_ACC64:
+ expandStoreACC(MBB, I, Mips::PseudoMFHI, Mips::PseudoMFLO, 4);
+ break;
+ case Mips::STORE_ACC64DSP:
+ expandStoreACC(MBB, I, Mips::MFHI_DSP, Mips::MFLO_DSP, 4);
break;
- case Mips::STORE_AC128:
- case Mips::STORE_AC128_P8:
- expandStoreACC(MBB, I, 8);
+ case Mips::STORE_ACC128:
+ expandStoreACC(MBB, I, Mips::PseudoMFHI64, Mips::PseudoMFLO64, 8);
break;
case TargetOpcode::COPY:
if (!expandCopy(MBB, I))
@@ -179,10 +189,11 @@ void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I,
}
void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I,
+ unsigned MFHiOpc, unsigned MFLoOpc,
unsigned RegSize) {
- // copy $vr0, lo
+ // mflo $vr0, src
// store $vr0, FI
- // copy $vr1, hi
+ // mfhi $vr1, src
// store $vr1, FI + 4
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
@@ -197,33 +208,29 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I,
unsigned VR1 = MRI.createVirtualRegister(RC);
unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
unsigned SrcKill = getKillRegState(I->getOperand(0).isKill());
- unsigned Lo = RegInfo.getSubReg(Src, Mips::sub_lo);
- unsigned Hi = RegInfo.getSubReg(Src, Mips::sub_hi);
DebugLoc DL = I->getDebugLoc();
- BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(Lo, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(MFLoOpc), VR0).addReg(Src);
TII.storeRegToStack(MBB, I, VR0, true, FI, RC, &RegInfo, 0);
- BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(Hi, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(MFHiOpc), VR1).addReg(Src, SrcKill);
TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize);
}
bool ExpandPseudo::expandCopy(MachineBasicBlock &MBB, Iter I) {
- unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
-
- if (Mips::ACRegsDSPRegClass.contains(Dst, Src))
- return expandCopyACC(MBB, I, Dst, Src, 4);
+ unsigned Src = I->getOperand(1).getReg();
+ std::pair<unsigned, unsigned> Opcodes = getMFHiLoOpc(Src);
- if (Mips::ACRegs128RegClass.contains(Dst, Src))
- return expandCopyACC(MBB, I, Dst, Src, 8);
+ if (!Opcodes.first)
+ return false;
- return false;
+ return expandCopyACC(MBB, I, Opcodes.first, Opcodes.second);
}
-bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst,
- unsigned Src, unsigned RegSize) {
- // copy $vr0, src_lo
+bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I,
+ unsigned MFHiOpc, unsigned MFLoOpc) {
+ // mflo $vr0, src
// copy dst_lo, $vr0
- // copy $vr1, src_hi
+ // mfhi $vr1, src
// copy dst_hi, $vr1
const MipsSEInstrInfo &TII =
@@ -231,20 +238,20 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst,
const MipsRegisterInfo &RegInfo =
*static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
- const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+ unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
+ unsigned VRegSize = RegInfo.getMinimalPhysRegClass(Dst)->getSize() / 2;
+ const TargetRegisterClass *RC = RegInfo.intRegClass(VRegSize);
unsigned VR0 = MRI.createVirtualRegister(RC);
unsigned VR1 = MRI.createVirtualRegister(RC);
unsigned SrcKill = getKillRegState(I->getOperand(1).isKill());
unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo);
unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi);
- unsigned SrcLo = RegInfo.getSubReg(Src, Mips::sub_lo);
- unsigned SrcHi = RegInfo.getSubReg(Src, Mips::sub_hi);
DebugLoc DL = I->getDebugLoc();
- BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(SrcLo, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(MFLoOpc), VR0).addReg(Src);
BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstLo)
.addReg(VR0, RegState::Kill);
- BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(MFHiOpc), VR1).addReg(Src, SrcKill);
BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi)
.addReg(VR1, RegState::Kill);
return true;
@@ -321,9 +328,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
// one for each of the paired single precision registers.
if (Mips::AFGR64RegClass.contains(Reg)) {
unsigned Reg0 =
- MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_fpeven), true);
+ MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_lo), true);
unsigned Reg1 =
- MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_fpodd), true);
+ MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_hi), true);
if (!STI.isLittle())
std::swap(Reg0, Reg1);
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 193a66c..8fa9e46 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -21,7 +21,7 @@ namespace llvm {
class MipsSEFrameLowering : public MipsFrameLowering {
public:
explicit MipsSEFrameLowering(const MipsSubtarget &STI)
- : MipsFrameLowering(STI, STI.hasMips64() ? 16 : 8) {}
+ : MipsFrameLowering(STI, STI.stackAlignment()) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 3b6480a..737660e 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -66,6 +66,21 @@ void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
MIB.addReg(Mips::DSPEFI, Flag);
}
+unsigned MipsSEDAGToDAGISel::getMSACtrlReg(const SDValue RegIdx) const {
+ switch (cast<ConstantSDNode>(RegIdx)->getZExtValue()) {
+ default:
+ llvm_unreachable("Could not map int to register");
+ case 0: return Mips::MSAIR;
+ case 1: return Mips::MSACSR;
+ case 2: return Mips::MSAAccess;
+ case 3: return Mips::MSASave;
+ case 4: return Mips::MSAModify;
+ case 5: return Mips::MSARequest;
+ case 6: return Mips::MSAMap;
+ case 7: return Mips::MSAUnmap;
+ }
+}
+
bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
const MachineInstr& MI) {
unsigned DstReg = 0, ZeroReg = 0;
@@ -301,6 +316,20 @@ bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
return false;
}
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsSEDAGToDAGISel::selectAddrRegReg(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ return false;
+}
+
bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
Base = Addr;
@@ -314,6 +343,263 @@ bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
selectAddrDefault(Addr, Base, Offset);
}
+/// Used on microMIPS Load/Store unaligned instructions (12-bit offset)
+bool MipsSEDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ EVT ValTy = Addr.getValueType();
+
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<12>(CN->getSExtValue())) {
+
+ // If the first operand is a FI then get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0)))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool MipsSEDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ return selectAddrRegImm12(Addr, Base, Offset) ||
+ selectAddrDefault(Addr, Base, Offset);
+}
+
+// Select constant vector splats.
+//
+// Returns true and sets Imm if:
+// * MSA is enabled
+// * N is a ISD::BUILD_VECTOR representing a constant splat
+bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
+ if (!Subtarget.hasMSA())
+ return false;
+
+ BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N);
+
+ if (Node == NULL)
+ return false;
+
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, 8,
+ !Subtarget.isLittle()))
+ return false;
+
+ Imm = SplatValue;
+
+ return true;
+}
+
+// Select constant vector splats.
+//
+// In addition to the requirements of selectVSplat(), this function returns
+// true and sets Imm if:
+// * The splat value is the same width as the elements of the vector
+// * The splat value fits in an integer with the specified signed-ness and
+// width.
+//
+// This function looks through ISD::BITCAST nodes.
+// TODO: This might not be appropriate for big-endian MSA since BITCAST is
+// sometimes a shuffle in big-endian mode.
+//
+// It's worth noting that this function is not used as part of the selection
+// of ldi.[bhwd] since it does not permit using the wrong-typed ldi.[bhwd]
+// instruction to achieve the desired bit pattern. ldi.[bhwd] is selected in
+// MipsSEDAGToDAGISel::selectNode.
+bool MipsSEDAGToDAGISel::
+selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
+ unsigned ImmBitSize) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat (N.getNode(), ImmValue) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ if (( Signed && ImmValue.isSignedIntN(ImmBitSize)) ||
+ (!Signed && ImmValue.isIntN(ImmBitSize))) {
+ Imm = CurDAG->getTargetConstant(ImmValue, EltTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm1(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 1);
+}
+
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm2(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 2);
+}
+
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm3(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 3);
+}
+
+// Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm4(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 4);
+}
+
+// Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm5(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 5);
+}
+
+// Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm6(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 6);
+}
+
+// Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm8(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 8);
+}
+
+// Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatSimm5(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, true, 5);
+}
+
+// Select constant vector splats whose value is a power of 2.
+//
+// In addition to the requirements of selectVSplat(), this function returns
+// true and sets Imm if:
+// * The splat value is the same width as the elements of the vector
+// * The splat value is a power of two.
+//
+// This function looks through ISD::BITCAST nodes.
+// TODO: This might not be appropriate for big-endian MSA since BITCAST is
+// sometimes a shuffle in big-endian mode.
+bool MipsSEDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat (N.getNode(), ImmValue) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ int32_t Log2 = ImmValue.exactLogBase2();
+
+ if (Log2 != -1) {
+ Imm = CurDAG->getTargetConstant(Log2, EltTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Select constant vector splats whose value only has a consecutive sequence
+// of left-most bits set (e.g. 0b11...1100...00).
+//
+// In addition to the requirements of selectVSplat(), this function returns
+// true and sets Imm if:
+// * The splat value is the same width as the elements of the vector
+// * The splat value is a consecutive sequence of left-most bits.
+//
+// This function looks through ISD::BITCAST nodes.
+// TODO: This might not be appropriate for big-endian MSA since BITCAST is
+// sometimes a shuffle in big-endian mode.
+bool MipsSEDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat(N.getNode(), ImmValue) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ // Extract the run of set bits starting with bit zero from the bitwise
+ // inverse of ImmValue, and test that the inverse of this is the same
+ // as the original value.
+ if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) {
+
+ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), EltTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Select constant vector splats whose value only has a consecutive sequence
+// of right-most bits set (e.g. 0b00...0011...11).
+//
+// In addition to the requirements of selectVSplat(), this function returns
+// true and sets Imm if:
+// * The splat value is the same width as the elements of the vector
+// * The splat value is a consecutive sequence of right-most bits.
+//
+// This function looks through ISD::BITCAST nodes.
+// TODO: This might not be appropriate for big-endian MSA since BITCAST is
+// sometimes a shuffle in big-endian mode.
+bool MipsSEDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat(N.getNode(), ImmValue) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ // Extract the run of set bits starting with bit zero, and test that the
+ // result is the same as the original value
+ if (ImmValue == (ImmValue & ~(ImmValue + 1))) {
+ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), EltTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool MipsSEDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N,
+ SDValue &Imm) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat(N.getNode(), ImmValue) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ int32_t Log2 = (~ImmValue).exactLogBase2();
+
+ if (Log2 != -1) {
+ Imm = CurDAG->getTargetConstant(Log2, EltTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
SDLoc DL(Node);
@@ -348,6 +634,11 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
Mips::ZERO_64, MVT::i64);
Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero);
+ } else if (Subtarget.isFP64bit()) {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO, MVT::i32);
+ Result = CurDAG->getMachineNode(Mips::BuildPairF64_64, DL, MVT::f64,
+ Zero, Zero);
} else {
SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
Mips::ZERO, MVT::i32);
@@ -401,24 +692,71 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
return std::make_pair(true, RegOpnd);
}
+ case ISD::INTRINSIC_W_CHAIN: {
+ switch (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ default:
+ break;
+
+ case Intrinsic::mips_cfcmsa: {
+ SDValue ChainIn = Node->getOperand(0);
+ SDValue RegIdx = Node->getOperand(2);
+ SDValue Reg = CurDAG->getCopyFromReg(ChainIn, DL,
+ getMSACtrlReg(RegIdx), MVT::i32);
+ return std::make_pair(true, Reg.getNode());
+ }
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue()) {
+ default:
+ break;
+
+ case Intrinsic::mips_move_v:
+ // Like an assignment but will always produce a move.v even if
+ // unnecessary.
+ return std::make_pair(true,
+ CurDAG->getMachineNode(Mips::MOVE_V, DL,
+ Node->getValueType(0),
+ Node->getOperand(1)));
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_VOID: {
+ switch (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ default:
+ break;
+
+ case Intrinsic::mips_ctcmsa: {
+ SDValue ChainIn = Node->getOperand(0);
+ SDValue RegIdx = Node->getOperand(2);
+ SDValue Value = Node->getOperand(3);
+ SDValue ChainOut = CurDAG->getCopyToReg(ChainIn, DL,
+ getMSACtrlReg(RegIdx), Value);
+ return std::make_pair(true, ChainOut.getNode());
+ }
+ }
+ break;
+ }
+
case MipsISD::ThreadPointer: {
EVT PtrVT = getTargetLowering()->getPointerTy();
- unsigned RdhwrOpc, SrcReg, DestReg;
+ unsigned RdhwrOpc, DestReg;
if (PtrVT == MVT::i32) {
RdhwrOpc = Mips::RDHWR;
- SrcReg = Mips::HWR29;
DestReg = Mips::V1;
} else {
RdhwrOpc = Mips::RDHWR64;
- SrcReg = Mips::HWR29_64;
DestReg = Mips::V1_64;
}
SDNode *Rdhwr =
CurDAG->getMachineNode(RdhwrOpc, SDLoc(Node),
Node->getValueType(0),
- CurDAG->getRegister(SrcReg, PtrVT));
+ CurDAG->getRegister(Mips::HWR29, MVT::i32));
SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg,
SDValue(Rdhwr, 0));
SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT);
@@ -426,18 +764,81 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
return std::make_pair(true, ResNode.getNode());
}
- case MipsISD::InsertLOHI: {
- unsigned RCID = Subtarget.hasDSP() ? Mips::ACRegsDSPRegClassID :
- Mips::ACRegsRegClassID;
- SDValue RegClass = CurDAG->getTargetConstant(RCID, MVT::i32);
- SDValue LoIdx = CurDAG->getTargetConstant(Mips::sub_lo, MVT::i32);
- SDValue HiIdx = CurDAG->getTargetConstant(Mips::sub_hi, MVT::i32);
- const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx,
- Node->getOperand(1), HiIdx };
- SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
- MVT::Untyped, Ops);
+ case ISD::BUILD_VECTOR: {
+ // Select appropriate ldi.[bhwd] instructions for constant splats of
+ // 128-bit when MSA is enabled. Fixup any register class mismatches that
+ // occur as a result.
+ //
+ // This allows the compiler to use a wider range of immediates than would
+ // otherwise be allowed. If, for example, v4i32 could only use ldi.h then
+ // it would not be possible to load { 0x01010101, 0x01010101, 0x01010101,
+ // 0x01010101 } without using a constant pool. This would be sub-optimal
+ // when // 'ldi.b wd, 1' is capable of producing that bit-pattern in the
+ // same set/ of registers. Similarly, ldi.h isn't capable of producing {
+ // 0x00000000, 0x00000001, 0x00000000, 0x00000001 } but 'ldi.d wd, 1' can.
+
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Node);
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned LdiOp;
+ EVT ResVecTy = BVN->getValueType(0);
+ EVT ViaVecTy;
+
+ if (!Subtarget.hasMSA() || !BVN->getValueType(0).is128BitVector())
+ return std::make_pair(false, (SDNode*)NULL);
+
+ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, 8,
+ !Subtarget.isLittle()))
+ return std::make_pair(false, (SDNode*)NULL);
+
+ switch (SplatBitSize) {
+ default:
+ return std::make_pair(false, (SDNode*)NULL);
+ case 8:
+ LdiOp = Mips::LDI_B;
+ ViaVecTy = MVT::v16i8;
+ break;
+ case 16:
+ LdiOp = Mips::LDI_H;
+ ViaVecTy = MVT::v8i16;
+ break;
+ case 32:
+ LdiOp = Mips::LDI_W;
+ ViaVecTy = MVT::v4i32;
+ break;
+ case 64:
+ LdiOp = Mips::LDI_D;
+ ViaVecTy = MVT::v2i64;
+ break;
+ }
+
+ if (!SplatValue.isSignedIntN(10))
+ return std::make_pair(false, (SDNode*)NULL);
+
+ SDValue Imm = CurDAG->getTargetConstant(SplatValue,
+ ViaVecTy.getVectorElementType());
+
+ SDNode *Res = CurDAG->getMachineNode(LdiOp, SDLoc(Node), ViaVecTy, Imm);
+
+ if (ResVecTy != ViaVecTy) {
+ // If LdiOp is writing to a different register class to ResVecTy, then
+ // fix it up here. This COPY_TO_REGCLASS should never cause a move.v
+ // since the source and destination register sets contain the same
+ // registers.
+ const TargetLowering *TLI = getTargetLowering();
+ MVT ResVecTySimple = ResVecTy.getSimpleVT();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple);
+ Res = CurDAG->getMachineNode(Mips::COPY_TO_REGCLASS, SDLoc(Node),
+ ResVecTy, SDValue(Res, 0),
+ CurDAG->getTargetConstant(RC->getID(),
+ MVT::i32));
+ }
+
return std::make_pair(true, Res);
}
+
}
return std::make_pair(false, (SDNode*)NULL);
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 03ed1f9..dc52064 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -30,6 +30,8 @@ private:
void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
MachineFunction &MF);
+ unsigned getMSACtrlReg(const SDValue RegIdx) const;
+
bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, SDLoc dl,
@@ -41,12 +43,54 @@ private:
virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
+ virtual bool selectAddrRegReg(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
+ virtual bool selectAddrRegImm12(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectIntAddrMM(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// \brief Select constant vector splats.
+ virtual bool selectVSplat(SDNode *N, APInt &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a given integer.
+ virtual bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
+ unsigned ImmBitSize) const;
+ /// \brief Select constant vector splats whose value fits in a uimm1.
+ virtual bool selectVSplatUimm1(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm2.
+ virtual bool selectVSplatUimm2(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm3.
+ virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm4.
+ virtual bool selectVSplatUimm4(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm5.
+ virtual bool selectVSplatUimm5(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm6.
+ virtual bool selectVSplatUimm6(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm8.
+ virtual bool selectVSplatUimm8(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a simm5.
+ virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is a power of 2.
+ virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is the inverse of a
+ /// power of 2.
+ virtual bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is a run of set bits
+ /// ending at the most significant bit
+ virtual bool selectVSplatMaskL(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is a run of set bits
+ /// starting at bit zero.
+ virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const;
+
virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
virtual void processFunctionAfterISel(MachineFunction &MF);
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index a0aacb5..809adc0 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -10,6 +10,7 @@
// Subclass of MipsTargetLowering specialized for mips32/64.
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-isel"
#include "MipsSEISelLowering.h"
#include "MipsRegisterInfo.h"
#include "MipsTargetMachine.h"
@@ -17,6 +18,8 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
@@ -25,22 +28,40 @@ static cl::opt<bool>
EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
cl::desc("MIPS: Enable tail calls."), cl::init(false));
+static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
+ cl::desc("Expand double precision loads and "
+ "stores to their single precision "
+ "counterparts"));
+
MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
: MipsTargetLowering(TM) {
// Set up the register classes
-
- clearRegisterClasses();
-
addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
if (HasMips64)
addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
+ if (Subtarget->hasDSP() || Subtarget->hasMSA()) {
+ // Expand all truncating stores and extending loads.
+ unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+
+ for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) {
+ for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1)
+ setTruncStoreAction((MVT::SimpleValueType)VT0,
+ (MVT::SimpleValueType)VT1, Expand);
+
+ setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand);
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand);
+ }
+ }
+
if (Subtarget->hasDSP()) {
MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
- addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
+ addRegisterClass(VecTys[i], &Mips::DSPRRegClass);
// Expand all builtin opcodes.
for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
@@ -53,20 +74,6 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::BITCAST, VecTys[i], Legal);
}
- // Expand all truncating stores and extending loads.
- unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
-
- for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) {
- for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1)
- setTruncStoreAction((MVT::SimpleValueType)VT0,
- (MVT::SimpleValueType)VT1, Expand);
-
- setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand);
- setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand);
- }
-
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
@@ -77,12 +84,28 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
if (Subtarget->hasDSPR2())
setOperationAction(ISD::MUL, MVT::v2i16, Legal);
- if (!TM.Options.UseSoftFloat) {
+ if (Subtarget->hasMSA()) {
+ addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass);
+ addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass);
+ addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass);
+ addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass);
+ addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass);
+ addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
+ addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
+
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::VSELECT);
+ setTargetDAGCombine(ISD::XOR);
+ }
+
+ if (!Subtarget->mipsSEUsesSoftFloat()) {
addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
// When dealing with single precision only, use libcalls
if (!Subtarget->isSingleFloat()) {
- if (HasMips64)
+ if (Subtarget->isFP64bit())
addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
else
addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
@@ -115,6 +138,15 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::SUBE);
setTargetDAGCombine(ISD::MUL);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+
+ if (NoDPLoadStore) {
+ setOperationAction(ISD::LOAD, MVT::f64, Custom);
+ setOperationAction(ISD::STORE, MVT::f64, Custom);
+ }
+
computeRegisterProperties();
}
@@ -123,6 +155,93 @@ llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
return new MipsSETargetLowering(TM);
}
+// Enable MSA support for the given integer type and Register class.
+void MipsSETargetLowering::
+addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
+ addRegisterClass(Ty, RC);
+
+ // Expand all builtin opcodes.
+ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+ setOperationAction(Opc, Ty, Expand);
+
+ setOperationAction(ISD::BITCAST, Ty, Legal);
+ setOperationAction(ISD::LOAD, Ty, Legal);
+ setOperationAction(ISD::STORE, Ty, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
+
+ setOperationAction(ISD::ADD, Ty, Legal);
+ setOperationAction(ISD::AND, Ty, Legal);
+ setOperationAction(ISD::CTLZ, Ty, Legal);
+ setOperationAction(ISD::CTPOP, Ty, Legal);
+ setOperationAction(ISD::MUL, Ty, Legal);
+ setOperationAction(ISD::OR, Ty, Legal);
+ setOperationAction(ISD::SDIV, Ty, Legal);
+ setOperationAction(ISD::SREM, Ty, Legal);
+ setOperationAction(ISD::SHL, Ty, Legal);
+ setOperationAction(ISD::SRA, Ty, Legal);
+ setOperationAction(ISD::SRL, Ty, Legal);
+ setOperationAction(ISD::SUB, Ty, Legal);
+ setOperationAction(ISD::UDIV, Ty, Legal);
+ setOperationAction(ISD::UREM, Ty, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom);
+ setOperationAction(ISD::VSELECT, Ty, Legal);
+ setOperationAction(ISD::XOR, Ty, Legal);
+
+ if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
+ setOperationAction(ISD::FP_TO_SINT, Ty, Legal);
+ setOperationAction(ISD::FP_TO_UINT, Ty, Legal);
+ setOperationAction(ISD::SINT_TO_FP, Ty, Legal);
+ setOperationAction(ISD::UINT_TO_FP, Ty, Legal);
+ }
+
+ setOperationAction(ISD::SETCC, Ty, Legal);
+ setCondCodeAction(ISD::SETNE, Ty, Expand);
+ setCondCodeAction(ISD::SETGE, Ty, Expand);
+ setCondCodeAction(ISD::SETGT, Ty, Expand);
+ setCondCodeAction(ISD::SETUGE, Ty, Expand);
+ setCondCodeAction(ISD::SETUGT, Ty, Expand);
+}
+
+// Enable MSA support for the given floating-point type and Register class.
+void MipsSETargetLowering::
+addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
+ addRegisterClass(Ty, RC);
+
+ // Expand all builtin opcodes.
+ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+ setOperationAction(Opc, Ty, Expand);
+
+ setOperationAction(ISD::LOAD, Ty, Legal);
+ setOperationAction(ISD::STORE, Ty, Legal);
+ setOperationAction(ISD::BITCAST, Ty, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
+
+ if (Ty != MVT::v8f16) {
+ setOperationAction(ISD::FABS, Ty, Legal);
+ setOperationAction(ISD::FADD, Ty, Legal);
+ setOperationAction(ISD::FDIV, Ty, Legal);
+ setOperationAction(ISD::FEXP2, Ty, Legal);
+ setOperationAction(ISD::FLOG2, Ty, Legal);
+ setOperationAction(ISD::FMA, Ty, Legal);
+ setOperationAction(ISD::FMUL, Ty, Legal);
+ setOperationAction(ISD::FRINT, Ty, Legal);
+ setOperationAction(ISD::FSQRT, Ty, Legal);
+ setOperationAction(ISD::FSUB, Ty, Legal);
+ setOperationAction(ISD::VSELECT, Ty, Legal);
+
+ setOperationAction(ISD::SETCC, Ty, Legal);
+ setCondCodeAction(ISD::SETOGE, Ty, Expand);
+ setCondCodeAction(ISD::SETOGT, Ty, Expand);
+ setCondCodeAction(ISD::SETUGE, Ty, Expand);
+ setCondCodeAction(ISD::SETUGT, Ty, Expand);
+ setCondCodeAction(ISD::SETGE, Ty, Expand);
+ setCondCodeAction(ISD::SETGT, Ty, Expand);
+ }
+}
bool
MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
@@ -142,6 +261,8 @@ MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch(Op.getOpcode()) {
+ case ISD::LOAD: return lowerLOAD(Op, DAG);
+ case ISD::STORE: return lowerSTORE(Op, DAG);
case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
@@ -152,6 +273,10 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
DAG);
case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG);
}
return MipsTargetLowering::LowerOperation(Op, DAG);
@@ -204,7 +329,7 @@ static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
SDLoc DL(ADDENode);
// Initialize accumulator.
- SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+ SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
ADDCNode->getOperand(1),
ADDENode->getOperand(1));
@@ -218,15 +343,11 @@ static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
// replace uses of adde and addc here
if (!SDValue(ADDCNode, 0).use_empty()) {
- SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
- SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
- LoIdx);
+ SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut);
}
if (!SDValue(ADDENode, 0).use_empty()) {
- SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
- SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
- HiIdx);
+ SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut);
}
@@ -280,7 +401,7 @@ static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
SDLoc DL(SUBENode);
// Initialize accumulator.
- SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+ SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
SUBCNode->getOperand(0),
SUBENode->getOperand(0));
@@ -294,15 +415,11 @@ static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
// replace uses of sube and subc here
if (!SDValue(SUBCNode, 0).use_empty()) {
- SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
- SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
- LoIdx);
+ SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut);
}
if (!SDValue(SUBENode, 0).use_empty()) {
- SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
- SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
- HiIdx);
+ SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut);
}
@@ -322,6 +439,248 @@ static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
+//
+// Performs the following transformations:
+// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
+// sign/zero-extension is completely overwritten by the new one performed by
+// the ISD::AND.
+// - Removes redundant zero extensions performed by an ISD::AND.
+static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ if (!Subtarget->hasMSA())
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ unsigned Op0Opcode = Op0->getOpcode();
+
+ // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
+ // where $d + 1 == 2^n and n == 32
+ // or $d + 1 == 2^n and n <= 32 and ZExt
+ // -> (MipsVExtractZExt $a, $b, $c)
+ if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
+ Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) {
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1);
+
+ if (!Mask)
+ return SDValue();
+
+ int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
+
+ if (Log2IfPositive <= 0)
+ return SDValue(); // Mask+1 is not a power of 2
+
+ SDValue Op0Op2 = Op0->getOperand(2);
+ EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT();
+ unsigned ExtendTySize = ExtendTy.getSizeInBits();
+ unsigned Log2 = Log2IfPositive;
+
+ if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
+ Log2 == ExtendTySize) {
+ SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
+ DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT,
+ Op0->getVTList(), Ops, Op0->getNumOperands());
+ return Op0;
+ }
+ }
+
+ return SDValue();
+}
+
+// Determine if the specified node is a constant vector splat.
+//
+// Returns true and sets Imm if:
+// * N is a ISD::BUILD_VECTOR representing a constant splat
+//
+// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
+// differences are that it assumes the MSA has already been checked and the
+// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
+// must not be in order for binsri.d to be selectable).
+static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
+ BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode());
+
+ if (Node == NULL)
+ return false;
+
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+ 8, !IsLittleEndian))
+ return false;
+
+ Imm = SplatValue;
+
+ return true;
+}
+
+// Test whether the given node is an all-ones build_vector.
+static bool isVectorAllOnes(SDValue N) {
+ // Look through bitcasts. Endianness doesn't matter because we are looking
+ // for an all-ones value.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
+
+ if (!BVN)
+ return false;
+
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ // Endianness doesn't matter in this context because we are looking for
+ // an all-ones value.
+ if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
+ return SplatValue.isAllOnesValue();
+
+ return false;
+}
+
+// Test whether N is the bitwise inverse of OfNode.
+static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
+ if (N->getOpcode() != ISD::XOR)
+ return false;
+
+ if (isVectorAllOnes(N->getOperand(0)))
+ return N->getOperand(1) == OfNode;
+
+ if (isVectorAllOnes(N->getOperand(1)))
+ return N->getOperand(0) == OfNode;
+
+ return false;
+}
+
+// Perform combines where ISD::OR is the root node.
+//
+// Performs the following transformations:
+// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
+// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
+// vector type.
+static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ if (!Subtarget->hasMSA())
+ return SDValue();
+
+ EVT Ty = N->getValueType(0);
+
+ if (!Ty.is128BitVector())
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
+ SDValue Op0Op0 = Op0->getOperand(0);
+ SDValue Op0Op1 = Op0->getOperand(1);
+ SDValue Op1Op0 = Op1->getOperand(0);
+ SDValue Op1Op1 = Op1->getOperand(1);
+ bool IsLittleEndian = !Subtarget->isLittle();
+
+ SDValue IfSet, IfClr, Cond;
+ bool IsConstantMask = false;
+ APInt Mask, InvMask;
+
+ // If Op0Op0 is an appropriate mask, try to find it's inverse in either
+ // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
+ // looking.
+ // IfClr will be set if we find a valid match.
+ if (isVSplat(Op0Op0, Mask, IsLittleEndian)) {
+ Cond = Op0Op0;
+ IfSet = Op0Op1;
+
+ if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
+ Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
+ IfClr = Op1Op1;
+ else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
+ Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
+ IfClr = Op1Op0;
+
+ IsConstantMask = true;
+ }
+
+ // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
+ // thing again using this mask.
+ // IfClr will be set if we find a valid match.
+ if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) {
+ Cond = Op0Op1;
+ IfSet = Op0Op0;
+
+ if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
+ Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
+ IfClr = Op1Op1;
+ else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
+ Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
+ IfClr = Op1Op0;
+
+ IsConstantMask = true;
+ }
+
+ // If IfClr is not yet set, try looking for a non-constant match.
+ // IfClr will be set if we find a valid match amongst the eight
+ // possibilities.
+ if (!IfClr.getNode()) {
+ if (isBitwiseInverse(Op0Op0, Op1Op0)) {
+ Cond = Op1Op0;
+ IfSet = Op1Op1;
+ IfClr = Op0Op1;
+ } else if (isBitwiseInverse(Op0Op1, Op1Op0)) {
+ Cond = Op1Op0;
+ IfSet = Op1Op1;
+ IfClr = Op0Op0;
+ } else if (isBitwiseInverse(Op0Op0, Op1Op1)) {
+ Cond = Op1Op1;
+ IfSet = Op1Op0;
+ IfClr = Op0Op1;
+ } else if (isBitwiseInverse(Op0Op1, Op1Op1)) {
+ Cond = Op1Op1;
+ IfSet = Op1Op0;
+ IfClr = Op0Op0;
+ } else if (isBitwiseInverse(Op1Op0, Op0Op0)) {
+ Cond = Op0Op0;
+ IfSet = Op0Op1;
+ IfClr = Op1Op1;
+ } else if (isBitwiseInverse(Op1Op1, Op0Op0)) {
+ Cond = Op0Op0;
+ IfSet = Op0Op1;
+ IfClr = Op1Op0;
+ } else if (isBitwiseInverse(Op1Op0, Op0Op1)) {
+ Cond = Op0Op1;
+ IfSet = Op0Op0;
+ IfClr = Op1Op1;
+ } else if (isBitwiseInverse(Op1Op1, Op0Op1)) {
+ Cond = Op0Op1;
+ IfSet = Op0Op0;
+ IfClr = Op1Op0;
+ }
+ }
+
+ // At this point, IfClr will be set if we have a valid match.
+ if (!IfClr.getNode())
+ return SDValue();
+
+ assert(Cond.getNode() && IfSet.getNode());
+
+ // Fold degenerate cases.
+ if (IsConstantMask) {
+ if (Mask.isAllOnesValue())
+ return IfSet;
+ else if (Mask == 0)
+ return IfClr;
+ }
+
+ // Transform the DAG into an equivalent VSELECT.
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfClr, IfSet);
+ }
+
+ return SDValue();
+}
+
static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
@@ -396,6 +755,9 @@ static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
unsigned EltSize = Ty.getVectorElementType().getSizeInBits();
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ if (!Subtarget->hasDSP())
+ return SDValue();
+
if (!BV ||
!BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
EltSize, !Subtarget->isLittle()) ||
@@ -418,11 +780,57 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
}
+// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
+// constant splats into MipsISD::SHRA_DSP for DSPr2.
+//
+// Performs the following transformations:
+// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
+// sign/zero-extension is completely overwritten by the new one performed by
+// the ISD::SRA and ISD::SHL nodes.
+// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
+// sequence.
+//
+// See performDSPShiftCombine for more information about the transformation
+// used for DSPr2.
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
EVT Ty = N->getValueType(0);
+ if (Subtarget->hasMSA()) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
+ // where $d + sizeof($c) == 32
+ // or $d + sizeof($c) <= 32 and SExt
+ // -> (MipsVExtractSExt $a, $b, $c)
+ if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) {
+ SDValue Op0Op0 = Op0->getOperand(0);
+ ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1);
+
+ if (!ShAmount)
+ return SDValue();
+
+ if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
+ Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT)
+ return SDValue();
+
+ EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT();
+ unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
+
+ if (TotalBits == 32 ||
+ (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT &&
+ TotalBits <= 32)) {
+ SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
+ Op0Op0->getOperand(2) };
+ DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT,
+ Op0Op0->getVTList(), Ops, Op0Op0->getNumOperands());
+ return Op0Op0;
+ }
+ }
+ }
+
if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2()))
return SDValue();
@@ -475,17 +883,84 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
EVT Ty = N->getValueType(0);
- if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
- return SDValue();
+ if (Ty.is128BitVector() && Ty.isInteger()) {
+ // Try the following combines:
+ // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b)
+ // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b)
+ // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b)
+ // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b)
+ // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b)
+ // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b)
+ // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b)
+ // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b)
+ // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but
+ // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the
+ // legalizer.
+ SDValue Op0 = N->getOperand(0);
+
+ if (Op0->getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ ISD::CondCode CondCode = cast<CondCodeSDNode>(Op0->getOperand(2))->get();
+ bool Signed;
+
+ if (CondCode == ISD::SETLT || CondCode == ISD::SETLE)
+ Signed = true;
+ else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE)
+ Signed = false;
+ else
+ return SDValue();
+
+ SDValue Op1 = N->getOperand(1);
+ SDValue Op2 = N->getOperand(2);
+ SDValue Op0Op0 = Op0->getOperand(0);
+ SDValue Op0Op1 = Op0->getOperand(1);
+
+ if (Op1 == Op0Op0 && Op2 == Op0Op1)
+ return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N),
+ Ty, Op1, Op2);
+ else if (Op1 == Op0Op1 && Op2 == Op0Op0)
+ return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N),
+ Ty, Op1, Op2);
+ } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) {
+ SDValue SetCC = N->getOperand(0);
+
+ if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
+ return SDValue();
+
+ return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty,
+ SetCC.getOperand(0), SetCC.getOperand(1),
+ N->getOperand(1), N->getOperand(2), SetCC.getOperand(2));
+ }
- SDValue SetCC = N->getOperand(0);
+ return SDValue();
+}
- if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
- return SDValue();
+static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
+ const MipsSubtarget *Subtarget) {
+ EVT Ty = N->getValueType(0);
- return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty,
- SetCC.getOperand(0), SetCC.getOperand(1), N->getOperand(1),
- N->getOperand(2), SetCC.getOperand(2));
+ if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
+ // Try the following combines:
+ // (xor (or $a, $b), (build_vector allones))
+ // (xor (or $a, $b), (bitcast (build_vector allones)))
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue NotOp;
+
+ if (ISD::isBuildVectorAllOnes(Op0.getNode()))
+ NotOp = Op1;
+ else if (ISD::isBuildVectorAllOnes(Op1.getNode()))
+ NotOp = Op0;
+ else
+ return SDValue();
+
+ if (NotOp->getOpcode() == ISD::OR)
+ return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0),
+ NotOp->getOperand(1));
+ }
+
+ return SDValue();
}
SDValue
@@ -496,6 +971,12 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
case ISD::ADDE:
return performADDECombine(N, DAG, DCI, Subtarget);
+ case ISD::AND:
+ Val = performANDCombine(N, DAG, DCI, Subtarget);
+ break;
+ case ISD::OR:
+ Val = performORCombine(N, DAG, DCI, Subtarget);
+ break;
case ISD::SUBE:
return performSUBECombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
@@ -508,14 +989,22 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
return performSRLCombine(N, DAG, DCI, Subtarget);
case ISD::VSELECT:
return performVSELECTCombine(N, DAG);
- case ISD::SETCC: {
+ case ISD::XOR:
+ Val = performXORCombine(N, DAG, Subtarget);
+ break;
+ case ISD::SETCC:
Val = performSETCCCombine(N, DAG);
break;
}
- }
- if (Val.getNode())
+ if (Val.getNode()) {
+ DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
+ N->printrWithDepth(dbgs(), &DAG);
+ dbgs() << "\n=> \n";
+ Val.getNode()->printrWithDepth(dbgs(), &DAG);
+ dbgs() << "\n");
return Val;
+ }
return MipsTargetLowering::PerformDAGCombine(N, DCI);
}
@@ -528,6 +1017,42 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case Mips::BPOSGE32_PSEUDO:
return emitBPOSGE32(MI, BB);
+ case Mips::SNZ_B_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B);
+ case Mips::SNZ_H_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H);
+ case Mips::SNZ_W_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W);
+ case Mips::SNZ_D_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D);
+ case Mips::SNZ_V_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V);
+ case Mips::SZ_B_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BZ_B);
+ case Mips::SZ_H_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BZ_H);
+ case Mips::SZ_W_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BZ_W);
+ case Mips::SZ_D_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BZ_D);
+ case Mips::SZ_V_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BZ_V);
+ case Mips::COPY_FW_PSEUDO:
+ return emitCOPY_FW(MI, BB);
+ case Mips::COPY_FD_PSEUDO:
+ return emitCOPY_FD(MI, BB);
+ case Mips::INSERT_FW_PSEUDO:
+ return emitINSERT_FW(MI, BB);
+ case Mips::INSERT_FD_PSEUDO:
+ return emitINSERT_FD(MI, BB);
+ case Mips::FILL_FW_PSEUDO:
+ return emitFILL_FW(MI, BB);
+ case Mips::FILL_FD_PSEUDO:
+ return emitFILL_FD(MI, BB);
+ case Mips::FEXP2_W_1_PSEUDO:
+ return emitFEXP2_W_1(MI, BB);
+ case Mips::FEXP2_D_1_PSEUDO:
+ return emitFEXP2_D_1(MI, BB);
}
}
@@ -564,6 +1089,68 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
InternalLinkage, CLI, Callee, Chain);
}
+SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ LoadSDNode &Nd = *cast<LoadSDNode>(Op);
+
+ if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
+ return MipsTargetLowering::lowerLOAD(Op, DAG);
+
+ // Replace a double precision load with two i32 loads and a buildpair64.
+ SDLoc DL(Op);
+ SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
+ EVT PtrVT = Ptr.getValueType();
+
+ // i32 load from lower address.
+ SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr,
+ MachinePointerInfo(), Nd.isVolatile(),
+ Nd.isNonTemporal(), Nd.isInvariant(),
+ Nd.getAlignment());
+
+ // i32 load from higher address.
+ Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
+ SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr,
+ MachinePointerInfo(), Nd.isVolatile(),
+ Nd.isNonTemporal(), Nd.isInvariant(),
+ std::min(Nd.getAlignment(), 4U));
+
+ if (!Subtarget->isLittle())
+ std::swap(Lo, Hi);
+
+ SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
+ SDValue Ops[2] = {BP, Hi.getValue(1)};
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ StoreSDNode &Nd = *cast<StoreSDNode>(Op);
+
+ if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
+ return MipsTargetLowering::lowerSTORE(Op, DAG);
+
+ // Replace a double precision store with two extractelement64s and i32 stores.
+ SDLoc DL(Op);
+ SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
+ EVT PtrVT = Ptr.getValueType();
+ SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+ Val, DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+ Val, DAG.getConstant(1, MVT::i32));
+
+ if (!Subtarget->isLittle())
+ std::swap(Lo, Hi);
+
+ // i32 store to lower address.
+ Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(),
+ Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(),
+ Nd.getTBAAInfo());
+
+ // i32 store to higher address.
+ Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
+ return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
+ Nd.isVolatile(), Nd.isNonTemporal(),
+ std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo());
+}
+
SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
bool HasLo, bool HasHi,
SelectionDAG &DAG) const {
@@ -574,11 +1161,9 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
SDValue Lo, Hi;
if (HasLo)
- Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
- DAG.getConstant(Mips::sub_lo, MVT::i32));
+ Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult);
if (HasHi)
- Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
- DAG.getConstant(Mips::sub_hi, MVT::i32));
+ Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult);
if (!HasLo || !HasHi)
return HasLo ? Lo : Hi;
@@ -593,14 +1178,12 @@ static SDValue initAccumulator(SDValue In, SDLoc DL, SelectionDAG &DAG) {
DAG.getConstant(0, MVT::i32));
SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
DAG.getConstant(1, MVT::i32));
- return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
+ return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi);
}
static SDValue extractLOHI(SDValue Op, SDLoc DL, SelectionDAG &DAG) {
- SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
- DAG.getConstant(Mips::sub_lo, MVT::i32));
- SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
- DAG.getConstant(Mips::sub_hi, MVT::i32));
+ SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op);
+ SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op);
return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
}
@@ -664,8 +1247,156 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
return DAG.getMergeValues(Vals, 2, DL);
}
+// Lower an MSA copy intrinsic into the specified SelectionDAG node
+static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
+ SDLoc DL(Op);
+ SDValue Vec = Op->getOperand(1);
+ SDValue Idx = Op->getOperand(2);
+ EVT ResTy = Op->getValueType(0);
+ EVT EltTy = Vec->getValueType(0).getVectorElementType();
+
+ SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx,
+ DAG.getValueType(EltTy));
+
+ return Result;
+}
+
+static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
+ EVT ResVecTy = Op->getValueType(0);
+ EVT ViaVecTy = ResVecTy;
+ SDLoc DL(Op);
+
+ // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
+ // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
+ // lanes.
+ SDValue LaneA;
+ SDValue LaneB = Op->getOperand(2);
+
+ if (ResVecTy == MVT::v2i64) {
+ LaneA = DAG.getConstant(0, MVT::i32);
+ ViaVecTy = MVT::v4i32;
+ } else
+ LaneA = LaneB;
+
+ SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
+ LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
+
+ SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops,
+ ViaVecTy.getVectorNumElements());
+
+ if (ViaVecTy != ResVecTy)
+ Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result);
+
+ return Result;
+}
+
+static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) {
+ return DAG.getConstant(Op->getConstantOperandVal(ImmOp), Op->getValueType(0));
+}
+
+static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
+ bool BigEndian, SelectionDAG &DAG) {
+ EVT ViaVecTy = VecTy;
+ SDValue SplatValueA = SplatValue;
+ SDValue SplatValueB = SplatValue;
+ SDLoc DL(SplatValue);
+
+ if (VecTy == MVT::v2i64) {
+ // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
+ ViaVecTy = MVT::v4i32;
+
+ SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
+ SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
+ DAG.getConstant(32, MVT::i32));
+ SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
+ }
+
+ // We currently hold the parts in little endian order. Swap them if
+ // necessary.
+ if (BigEndian)
+ std::swap(SplatValueA, SplatValueB);
+
+ SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
+ SplatValueA, SplatValueB, SplatValueA, SplatValueB,
+ SplatValueA, SplatValueB, SplatValueA, SplatValueB,
+ SplatValueA, SplatValueB, SplatValueA, SplatValueB };
+
+ SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops,
+ ViaVecTy.getVectorNumElements());
+
+ if (VecTy != ViaVecTy)
+ Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
+
+ return Result;
+}
+
+static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
+ unsigned Opc, SDValue Imm,
+ bool BigEndian) {
+ EVT VecTy = Op->getValueType(0);
+ SDValue Exp2Imm;
+ SDLoc DL(Op);
+
+ // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
+ // here for now.
+ if (VecTy == MVT::v2i64) {
+ if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
+ APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
+
+ SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), MVT::i32);
+ SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), MVT::i32);
+
+ if (BigEndian)
+ std::swap(BitImmLoOp, BitImmHiOp);
+
+ Exp2Imm =
+ DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, BitImmLoOp,
+ BitImmHiOp, BitImmLoOp, BitImmHiOp));
+ }
+ }
+
+ if (Exp2Imm.getNode() == NULL) {
+ // We couldnt constant fold, do a vector shift instead
+
+ // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
+ // only values 0-63 are valid.
+ if (VecTy == MVT::v2i64)
+ Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm);
+
+ Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
+
+ Exp2Imm =
+ DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, VecTy), Exp2Imm);
+ }
+
+ return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
+}
+
+static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
+ EVT ResTy = Op->getValueType(0);
+ SDLoc DL(Op);
+ SDValue One = DAG.getConstant(1, ResTy);
+ SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2));
+
+ return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
+ DAG.getNOT(DL, Bit, ResTy));
+}
+
+static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ EVT ResTy = Op->getValueType(0);
+ APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1)
+ << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue();
+ SDValue BitMask = DAG.getConstant(~BitImm, ResTy);
+
+ return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
+}
+
SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
default:
return SDValue();
@@ -701,12 +1432,610 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
return lowerDSPIntr(Op, DAG, MipsISD::MSub);
case Intrinsic::mips_msubu:
return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
+ case Intrinsic::mips_addv_b:
+ case Intrinsic::mips_addv_h:
+ case Intrinsic::mips_addv_w:
+ case Intrinsic::mips_addv_d:
+ return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_addvi_b:
+ case Intrinsic::mips_addvi_h:
+ case Intrinsic::mips_addvi_w:
+ case Intrinsic::mips_addvi_d:
+ return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_and_v:
+ return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_andi_b:
+ return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_bclr_b:
+ case Intrinsic::mips_bclr_h:
+ case Intrinsic::mips_bclr_w:
+ case Intrinsic::mips_bclr_d:
+ return lowerMSABitClear(Op, DAG);
+ case Intrinsic::mips_bclri_b:
+ case Intrinsic::mips_bclri_h:
+ case Intrinsic::mips_bclri_w:
+ case Intrinsic::mips_bclri_d:
+ return lowerMSABitClearImm(Op, DAG);
+ case Intrinsic::mips_binsli_b:
+ case Intrinsic::mips_binsli_h:
+ case Intrinsic::mips_binsli_w:
+ case Intrinsic::mips_binsli_d: {
+ EVT VecTy = Op->getValueType(0);
+ EVT EltTy = VecTy.getVectorElementType();
+ APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(),
+ Op->getConstantOperandVal(3));
+ return DAG.getNode(ISD::VSELECT, DL, VecTy,
+ DAG.getConstant(Mask, VecTy, true), Op->getOperand(1),
+ Op->getOperand(2));
+ }
+ case Intrinsic::mips_binsri_b:
+ case Intrinsic::mips_binsri_h:
+ case Intrinsic::mips_binsri_w:
+ case Intrinsic::mips_binsri_d: {
+ EVT VecTy = Op->getValueType(0);
+ EVT EltTy = VecTy.getVectorElementType();
+ APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(),
+ Op->getConstantOperandVal(3));
+ return DAG.getNode(ISD::VSELECT, DL, VecTy,
+ DAG.getConstant(Mask, VecTy, true), Op->getOperand(1),
+ Op->getOperand(2));
+ }
+ case Intrinsic::mips_bmnz_v:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
+ Op->getOperand(2), Op->getOperand(1));
+ case Intrinsic::mips_bmnzi_b:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
+ lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2),
+ Op->getOperand(1));
+ case Intrinsic::mips_bmz_v:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_bmzi_b:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
+ lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_bneg_b:
+ case Intrinsic::mips_bneg_h:
+ case Intrinsic::mips_bneg_w:
+ case Intrinsic::mips_bneg_d: {
+ EVT VecTy = Op->getValueType(0);
+ SDValue One = DAG.getConstant(1, VecTy);
+
+ return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
+ DAG.getNode(ISD::SHL, DL, VecTy, One,
+ Op->getOperand(2)));
+ }
+ case Intrinsic::mips_bnegi_b:
+ case Intrinsic::mips_bnegi_h:
+ case Intrinsic::mips_bnegi_w:
+ case Intrinsic::mips_bnegi_d:
+ return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2),
+ !Subtarget->isLittle());
+ case Intrinsic::mips_bnz_b:
+ case Intrinsic::mips_bnz_h:
+ case Intrinsic::mips_bnz_w:
+ case Intrinsic::mips_bnz_d:
+ return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_bnz_v:
+ return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_bsel_v:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2),
+ Op->getOperand(3));
+ case Intrinsic::mips_bseli_b:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2),
+ lowerMSASplatImm(Op, 3, DAG));
+ case Intrinsic::mips_bset_b:
+ case Intrinsic::mips_bset_h:
+ case Intrinsic::mips_bset_w:
+ case Intrinsic::mips_bset_d: {
+ EVT VecTy = Op->getValueType(0);
+ SDValue One = DAG.getConstant(1, VecTy);
+
+ return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
+ DAG.getNode(ISD::SHL, DL, VecTy, One,
+ Op->getOperand(2)));
+ }
+ case Intrinsic::mips_bseti_b:
+ case Intrinsic::mips_bseti_h:
+ case Intrinsic::mips_bseti_w:
+ case Intrinsic::mips_bseti_d:
+ return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2),
+ !Subtarget->isLittle());
+ case Intrinsic::mips_bz_b:
+ case Intrinsic::mips_bz_h:
+ case Intrinsic::mips_bz_w:
+ case Intrinsic::mips_bz_d:
+ return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_bz_v:
+ return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_ceq_b:
+ case Intrinsic::mips_ceq_h:
+ case Intrinsic::mips_ceq_w:
+ case Intrinsic::mips_ceq_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETEQ);
+ case Intrinsic::mips_ceqi_b:
+ case Intrinsic::mips_ceqi_h:
+ case Intrinsic::mips_ceqi_w:
+ case Intrinsic::mips_ceqi_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG), ISD::SETEQ);
+ case Intrinsic::mips_cle_s_b:
+ case Intrinsic::mips_cle_s_h:
+ case Intrinsic::mips_cle_s_w:
+ case Intrinsic::mips_cle_s_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETLE);
+ case Intrinsic::mips_clei_s_b:
+ case Intrinsic::mips_clei_s_h:
+ case Intrinsic::mips_clei_s_w:
+ case Intrinsic::mips_clei_s_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG), ISD::SETLE);
+ case Intrinsic::mips_cle_u_b:
+ case Intrinsic::mips_cle_u_h:
+ case Intrinsic::mips_cle_u_w:
+ case Intrinsic::mips_cle_u_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETULE);
+ case Intrinsic::mips_clei_u_b:
+ case Intrinsic::mips_clei_u_h:
+ case Intrinsic::mips_clei_u_w:
+ case Intrinsic::mips_clei_u_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG), ISD::SETULE);
+ case Intrinsic::mips_clt_s_b:
+ case Intrinsic::mips_clt_s_h:
+ case Intrinsic::mips_clt_s_w:
+ case Intrinsic::mips_clt_s_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETLT);
+ case Intrinsic::mips_clti_s_b:
+ case Intrinsic::mips_clti_s_h:
+ case Intrinsic::mips_clti_s_w:
+ case Intrinsic::mips_clti_s_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG), ISD::SETLT);
+ case Intrinsic::mips_clt_u_b:
+ case Intrinsic::mips_clt_u_h:
+ case Intrinsic::mips_clt_u_w:
+ case Intrinsic::mips_clt_u_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETULT);
+ case Intrinsic::mips_clti_u_b:
+ case Intrinsic::mips_clti_u_h:
+ case Intrinsic::mips_clti_u_w:
+ case Intrinsic::mips_clti_u_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG), ISD::SETULT);
+ case Intrinsic::mips_copy_s_b:
+ case Intrinsic::mips_copy_s_h:
+ case Intrinsic::mips_copy_s_w:
+ return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
+ case Intrinsic::mips_copy_s_d:
+ // Don't lower directly into VEXTRACT_SEXT_ELT since i64 might be illegal.
+ // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type
+ // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_copy_u_b:
+ case Intrinsic::mips_copy_u_h:
+ case Intrinsic::mips_copy_u_w:
+ return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
+ case Intrinsic::mips_copy_u_d:
+ // Don't lower directly into VEXTRACT_ZEXT_ELT since i64 might be illegal.
+ // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type
+ // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
+ //
+ // Note: When i64 is illegal, this results in copy_s.w instructions instead
+ // of copy_u.w instructions. This makes no difference to the behaviour
+ // since i64 is only illegal when the register file is 32-bit.
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_div_s_b:
+ case Intrinsic::mips_div_s_h:
+ case Intrinsic::mips_div_s_w:
+ case Intrinsic::mips_div_s_d:
+ return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_div_u_b:
+ case Intrinsic::mips_div_u_h:
+ case Intrinsic::mips_div_u_w:
+ case Intrinsic::mips_div_u_d:
+ return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_fadd_w:
+ case Intrinsic::mips_fadd_d:
+ return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
+ case Intrinsic::mips_fceq_w:
+ case Intrinsic::mips_fceq_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETOEQ);
+ case Intrinsic::mips_fcle_w:
+ case Intrinsic::mips_fcle_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETOLE);
+ case Intrinsic::mips_fclt_w:
+ case Intrinsic::mips_fclt_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETOLT);
+ case Intrinsic::mips_fcne_w:
+ case Intrinsic::mips_fcne_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETONE);
+ case Intrinsic::mips_fcor_w:
+ case Intrinsic::mips_fcor_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETO);
+ case Intrinsic::mips_fcueq_w:
+ case Intrinsic::mips_fcueq_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETUEQ);
+ case Intrinsic::mips_fcule_w:
+ case Intrinsic::mips_fcule_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETULE);
+ case Intrinsic::mips_fcult_w:
+ case Intrinsic::mips_fcult_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETULT);
+ case Intrinsic::mips_fcun_w:
+ case Intrinsic::mips_fcun_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETUO);
+ case Intrinsic::mips_fcune_w:
+ case Intrinsic::mips_fcune_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETUNE);
+ case Intrinsic::mips_fdiv_w:
+ case Intrinsic::mips_fdiv_d:
+ return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_ffint_u_w:
+ case Intrinsic::mips_ffint_u_d:
+ return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_ffint_s_w:
+ case Intrinsic::mips_ffint_s_d:
+ return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_fill_b:
+ case Intrinsic::mips_fill_h:
+ case Intrinsic::mips_fill_w:
+ case Intrinsic::mips_fill_d: {
+ SmallVector<SDValue, 16> Ops;
+ EVT ResTy = Op->getValueType(0);
+
+ for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i)
+ Ops.push_back(Op->getOperand(1));
+
+ // If ResTy is v2i64 then the type legalizer will break this node down into
+ // an equivalent v4i32.
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, &Ops[0], Ops.size());
+ }
+ case Intrinsic::mips_fexp2_w:
+ case Intrinsic::mips_fexp2_d: {
+ EVT ResTy = Op->getValueType(0);
+ return DAG.getNode(
+ ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
+ DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2)));
+ }
+ case Intrinsic::mips_flog2_w:
+ case Intrinsic::mips_flog2_d:
+ return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1));
+ case Intrinsic::mips_fmadd_w:
+ case Intrinsic::mips_fmadd_d:
+ return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+ case Intrinsic::mips_fmul_w:
+ case Intrinsic::mips_fmul_d:
+ return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_fmsub_w:
+ case Intrinsic::mips_fmsub_d: {
+ EVT ResTy = Op->getValueType(0);
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1),
+ DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy,
+ Op->getOperand(2), Op->getOperand(3)));
+ }
+ case Intrinsic::mips_frint_w:
+ case Intrinsic::mips_frint_d:
+ return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1));
+ case Intrinsic::mips_fsqrt_w:
+ case Intrinsic::mips_fsqrt_d:
+ return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
+ case Intrinsic::mips_fsub_w:
+ case Intrinsic::mips_fsub_d:
+ return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_ftrunc_u_w:
+ case Intrinsic::mips_ftrunc_u_d:
+ return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_ftrunc_s_w:
+ case Intrinsic::mips_ftrunc_s_d:
+ return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_ilvev_b:
+ case Intrinsic::mips_ilvev_h:
+ case Intrinsic::mips_ilvev_w:
+ case Intrinsic::mips_ilvev_d:
+ return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_ilvl_b:
+ case Intrinsic::mips_ilvl_h:
+ case Intrinsic::mips_ilvl_w:
+ case Intrinsic::mips_ilvl_d:
+ return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_ilvod_b:
+ case Intrinsic::mips_ilvod_h:
+ case Intrinsic::mips_ilvod_w:
+ case Intrinsic::mips_ilvod_d:
+ return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_ilvr_b:
+ case Intrinsic::mips_ilvr_h:
+ case Intrinsic::mips_ilvr_w:
+ case Intrinsic::mips_ilvr_d:
+ return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_insert_b:
+ case Intrinsic::mips_insert_h:
+ case Intrinsic::mips_insert_w:
+ case Intrinsic::mips_insert_d:
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(3), Op->getOperand(2));
+ case Intrinsic::mips_ldi_b:
+ case Intrinsic::mips_ldi_h:
+ case Intrinsic::mips_ldi_w:
+ case Intrinsic::mips_ldi_d:
+ return lowerMSASplatImm(Op, 1, DAG);
+ case Intrinsic::mips_lsa: {
+ EVT ResTy = Op->getValueType(0);
+ return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
+ DAG.getNode(ISD::SHL, SDLoc(Op), ResTy,
+ Op->getOperand(2), Op->getOperand(3)));
+ }
+ case Intrinsic::mips_maddv_b:
+ case Intrinsic::mips_maddv_h:
+ case Intrinsic::mips_maddv_w:
+ case Intrinsic::mips_maddv_d: {
+ EVT ResTy = Op->getValueType(0);
+ return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
+ DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
+ Op->getOperand(2), Op->getOperand(3)));
+ }
+ case Intrinsic::mips_max_s_b:
+ case Intrinsic::mips_max_s_h:
+ case Intrinsic::mips_max_s_w:
+ case Intrinsic::mips_max_s_d:
+ return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_max_u_b:
+ case Intrinsic::mips_max_u_h:
+ case Intrinsic::mips_max_u_w:
+ case Intrinsic::mips_max_u_d:
+ return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_maxi_s_b:
+ case Intrinsic::mips_maxi_s_h:
+ case Intrinsic::mips_maxi_s_w:
+ case Intrinsic::mips_maxi_s_d:
+ return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_maxi_u_b:
+ case Intrinsic::mips_maxi_u_h:
+ case Intrinsic::mips_maxi_u_w:
+ case Intrinsic::mips_maxi_u_d:
+ return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_min_s_b:
+ case Intrinsic::mips_min_s_h:
+ case Intrinsic::mips_min_s_w:
+ case Intrinsic::mips_min_s_d:
+ return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_min_u_b:
+ case Intrinsic::mips_min_u_h:
+ case Intrinsic::mips_min_u_w:
+ case Intrinsic::mips_min_u_d:
+ return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_mini_s_b:
+ case Intrinsic::mips_mini_s_h:
+ case Intrinsic::mips_mini_s_w:
+ case Intrinsic::mips_mini_s_d:
+ return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_mini_u_b:
+ case Intrinsic::mips_mini_u_h:
+ case Intrinsic::mips_mini_u_w:
+ case Intrinsic::mips_mini_u_d:
+ return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_mod_s_b:
+ case Intrinsic::mips_mod_s_h:
+ case Intrinsic::mips_mod_s_w:
+ case Intrinsic::mips_mod_s_d:
+ return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_mod_u_b:
+ case Intrinsic::mips_mod_u_h:
+ case Intrinsic::mips_mod_u_w:
+ case Intrinsic::mips_mod_u_d:
+ return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_mulv_b:
+ case Intrinsic::mips_mulv_h:
+ case Intrinsic::mips_mulv_w:
+ case Intrinsic::mips_mulv_d:
+ return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_msubv_b:
+ case Intrinsic::mips_msubv_h:
+ case Intrinsic::mips_msubv_w:
+ case Intrinsic::mips_msubv_d: {
+ EVT ResTy = Op->getValueType(0);
+ return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1),
+ DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
+ Op->getOperand(2), Op->getOperand(3)));
+ }
+ case Intrinsic::mips_nlzc_b:
+ case Intrinsic::mips_nlzc_h:
+ case Intrinsic::mips_nlzc_w:
+ case Intrinsic::mips_nlzc_d:
+ return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1));
+ case Intrinsic::mips_nor_v: {
+ SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ return DAG.getNOT(DL, Res, Res->getValueType(0));
+ }
+ case Intrinsic::mips_nori_b: {
+ SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
+ Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG));
+ return DAG.getNOT(DL, Res, Res->getValueType(0));
+ }
+ case Intrinsic::mips_or_v:
+ return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_ori_b:
+ return DAG.getNode(ISD::OR, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_pckev_b:
+ case Intrinsic::mips_pckev_h:
+ case Intrinsic::mips_pckev_w:
+ case Intrinsic::mips_pckev_d:
+ return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_pckod_b:
+ case Intrinsic::mips_pckod_h:
+ case Intrinsic::mips_pckod_w:
+ case Intrinsic::mips_pckod_d:
+ return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_pcnt_b:
+ case Intrinsic::mips_pcnt_h:
+ case Intrinsic::mips_pcnt_w:
+ case Intrinsic::mips_pcnt_d:
+ return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1));
+ case Intrinsic::mips_shf_b:
+ case Intrinsic::mips_shf_h:
+ case Intrinsic::mips_shf_w:
+ return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0),
+ Op->getOperand(2), Op->getOperand(1));
+ case Intrinsic::mips_sll_b:
+ case Intrinsic::mips_sll_h:
+ case Intrinsic::mips_sll_w:
+ case Intrinsic::mips_sll_d:
+ return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_slli_b:
+ case Intrinsic::mips_slli_h:
+ case Intrinsic::mips_slli_w:
+ case Intrinsic::mips_slli_d:
+ return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_splat_b:
+ case Intrinsic::mips_splat_h:
+ case Intrinsic::mips_splat_w:
+ case Intrinsic::mips_splat_d:
+ // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
+ // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
+ // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
+ // Instead we lower to MipsISD::VSHF and match from there.
+ return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
+ lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1),
+ Op->getOperand(1));
+ case Intrinsic::mips_splati_b:
+ case Intrinsic::mips_splati_h:
+ case Intrinsic::mips_splati_w:
+ case Intrinsic::mips_splati_d:
+ return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
+ lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
+ Op->getOperand(1));
+ case Intrinsic::mips_sra_b:
+ case Intrinsic::mips_sra_h:
+ case Intrinsic::mips_sra_w:
+ case Intrinsic::mips_sra_d:
+ return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_srai_b:
+ case Intrinsic::mips_srai_h:
+ case Intrinsic::mips_srai_w:
+ case Intrinsic::mips_srai_d:
+ return DAG.getNode(ISD::SRA, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_srl_b:
+ case Intrinsic::mips_srl_h:
+ case Intrinsic::mips_srl_w:
+ case Intrinsic::mips_srl_d:
+ return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_srli_b:
+ case Intrinsic::mips_srli_h:
+ case Intrinsic::mips_srli_w:
+ case Intrinsic::mips_srli_d:
+ return DAG.getNode(ISD::SRL, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_subv_b:
+ case Intrinsic::mips_subv_h:
+ case Intrinsic::mips_subv_w:
+ case Intrinsic::mips_subv_d:
+ return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_subvi_b:
+ case Intrinsic::mips_subvi_h:
+ case Intrinsic::mips_subvi_w:
+ case Intrinsic::mips_subvi_d:
+ return DAG.getNode(ISD::SUB, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_vshf_b:
+ case Intrinsic::mips_vshf_h:
+ case Intrinsic::mips_vshf_w:
+ case Intrinsic::mips_vshf_d:
+ return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+ case Intrinsic::mips_xor_v:
+ return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_xori_b:
+ return DAG.getNode(ISD::XOR, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
}
}
+static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) {
+ SDLoc DL(Op);
+ SDValue ChainIn = Op->getOperand(0);
+ SDValue Address = Op->getOperand(2);
+ SDValue Offset = Op->getOperand(3);
+ EVT ResTy = Op->getValueType(0);
+ EVT PtrTy = Address->getValueType(0);
+
+ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
+
+ return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), false,
+ false, false, 16);
+}
+
SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
- switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
+ unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
+ switch (Intr) {
default:
return SDValue();
case Intrinsic::mips_extp:
@@ -749,7 +2078,522 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
case Intrinsic::mips_dpsqx_sa_w_ph:
return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
+ case Intrinsic::mips_ld_b:
+ case Intrinsic::mips_ld_h:
+ case Intrinsic::mips_ld_w:
+ case Intrinsic::mips_ld_d:
+ return lowerMSALoadIntr(Op, DAG, Intr);
+ }
+}
+
+static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) {
+ SDLoc DL(Op);
+ SDValue ChainIn = Op->getOperand(0);
+ SDValue Value = Op->getOperand(2);
+ SDValue Address = Op->getOperand(3);
+ SDValue Offset = Op->getOperand(4);
+ EVT PtrTy = Address->getValueType(0);
+
+ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
+
+ return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), false,
+ false, 16);
+}
+
+SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
+ switch (Intr) {
+ default:
+ return SDValue();
+ case Intrinsic::mips_st_b:
+ case Intrinsic::mips_st_h:
+ case Intrinsic::mips_st_w:
+ case Intrinsic::mips_st_d:
+ return lowerMSAStoreIntr(Op, DAG, Intr);
+ }
+}
+
+/// \brief Check if the given BuildVectorSDNode is a splat.
+/// This method currently relies on DAG nodes being reused when equivalent,
+/// so it's possible for this to return false even when isConstantSplat returns
+/// true.
+static bool isSplatVector(const BuildVectorSDNode *N) {
+ unsigned int nOps = N->getNumOperands();
+ assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector");
+
+ SDValue Operand0 = N->getOperand(0);
+
+ for (unsigned int i = 1; i < nOps; ++i) {
+ if (N->getOperand(i) != Operand0)
+ return false;
+ }
+
+ return true;
+}
+
+// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
+//
+// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
+// choose to sign-extend but we could have equally chosen zero-extend. The
+// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
+// result into this node later (possibly changing it to a zero-extend in the
+// process).
+SDValue MipsSETargetLowering::
+lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT ResTy = Op->getValueType(0);
+ SDValue Op0 = Op->getOperand(0);
+ EVT VecTy = Op0->getValueType(0);
+
+ if (!VecTy.is128BitVector())
+ return SDValue();
+
+ if (ResTy.isInteger()) {
+ SDValue Op1 = Op->getOperand(1);
+ EVT EltTy = VecTy.getVectorElementType();
+ return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1,
+ DAG.getValueType(EltTy));
+ }
+
+ return Op;
+}
+
+static bool isConstantOrUndef(const SDValue Op) {
+ if (Op->getOpcode() == ISD::UNDEF)
+ return true;
+ if (dyn_cast<ConstantSDNode>(Op))
+ return true;
+ if (dyn_cast<ConstantFPSDNode>(Op))
+ return true;
+ return false;
+}
+
+static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
+ for (unsigned i = 0; i < Op->getNumOperands(); ++i)
+ if (isConstantOrUndef(Op->getOperand(i)))
+ return true;
+ return false;
+}
+
+// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
+// backend.
+//
+// Lowers according to the following rules:
+// - Constant splats are legal as-is as long as the SplatBitSize is a power of
+// 2 less than or equal to 64 and the value fits into a signed 10-bit
+// immediate
+// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
+// is a power of 2 less than or equal to 64 and the value does not fit into a
+// signed 10-bit immediate
+// - Non-constant splats are legal as-is.
+// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
+// - All others are illegal and must be expanded.
+SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
+ EVT ResTy = Op->getValueType(0);
+ SDLoc DL(Op);
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!Subtarget->hasMSA() || !ResTy.is128BitVector())
+ return SDValue();
+
+ if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, 8,
+ !Subtarget->isLittle()) && SplatBitSize <= 64) {
+ // We can only cope with 8, 16, 32, or 64-bit elements
+ if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
+ SplatBitSize != 64)
+ return SDValue();
+
+ // If the value fits into a simm10 then we can use ldi.[bhwd]
+ // However, if it isn't an integer type we will have to bitcast from an
+ // integer type first. Also, if there are any undefs, we must lower them
+ // to defined values first.
+ if (ResTy.isInteger() && !HasAnyUndefs && SplatValue.isSignedIntN(10))
+ return Op;
+
+ EVT ViaVecTy;
+
+ switch (SplatBitSize) {
+ default:
+ return SDValue();
+ case 8:
+ ViaVecTy = MVT::v16i8;
+ break;
+ case 16:
+ ViaVecTy = MVT::v8i16;
+ break;
+ case 32:
+ ViaVecTy = MVT::v4i32;
+ break;
+ case 64:
+ // There's no fill.d to fall back on for 64-bit values
+ return SDValue();
+ }
+
+ // SelectionDAG::getConstant will promote SplatValue appropriately.
+ SDValue Result = DAG.getConstant(SplatValue, ViaVecTy);
+
+ // Bitcast to the type we originally wanted
+ if (ViaVecTy != ResTy)
+ Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
+
+ return Result;
+ } else if (isSplatVector(Node))
+ return Op;
+ else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
+ // Use INSERT_VECTOR_ELT operations rather than expand to stores.
+ // The resulting code is the same length as the expansion, but it doesn't
+ // use memory operations
+ EVT ResTy = Node->getValueType(0);
+
+ assert(ResTy.isVector());
+
+ unsigned NumElts = ResTy.getVectorNumElements();
+ SDValue Vector = DAG.getUNDEF(ResTy);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
+ Node->getOperand(i),
+ DAG.getConstant(i, MVT::i32));
+ }
+ return Vector;
+ }
+
+ return SDValue();
+}
+
+// Lower VECTOR_SHUFFLE into SHF (if possible).
+//
+// SHF splits the vector into blocks of four elements, then shuffles these
+// elements according to a <4 x i2> constant (encoded as an integer immediate).
+//
+// It is therefore possible to lower into SHF when the mask takes the form:
+// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
+// When undef's appear they are treated as if they were whatever value is
+// necessary in order to fit the above form.
+//
+// For example:
+// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
+// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
+// i32 7, i32 6, i32 5, i32 4>
+// is lowered to:
+// (SHF_H $w0, $w1, 27)
+// where the 27 comes from:
+// 3 + (2 << 2) + (1 << 4) + (0 << 6)
+static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ int SHFIndices[4] = { -1, -1, -1, -1 };
+
+ if (Indices.size() < 4)
+ return SDValue();
+
+ for (unsigned i = 0; i < 4; ++i) {
+ for (unsigned j = i; j < Indices.size(); j += 4) {
+ int Idx = Indices[j];
+
+ // Convert from vector index to 4-element subvector index
+ // If an index refers to an element outside of the subvector then give up
+ if (Idx != -1) {
+ Idx -= 4 * (j / 4);
+ if (Idx < 0 || Idx >= 4)
+ return SDValue();
+ }
+
+ // If the mask has an undef, replace it with the current index.
+ // Note that it might still be undef if the current index is also undef
+ if (SHFIndices[i] == -1)
+ SHFIndices[i] = Idx;
+
+ // Check that non-undef values are the same as in the mask. If they
+ // aren't then give up
+ if (!(Idx == -1 || Idx == SHFIndices[i]))
+ return SDValue();
+ }
+ }
+
+ // Calculate the immediate. Replace any remaining undefs with zero
+ APInt Imm(32, 0);
+ for (int i = 3; i >= 0; --i) {
+ int Idx = SHFIndices[i];
+
+ if (Idx == -1)
+ Idx = 0;
+
+ Imm <<= 2;
+ Imm |= Idx & 0x3;
+ }
+
+ return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy,
+ DAG.getConstant(Imm, MVT::i32), Op->getOperand(0));
+}
+
+// Lower VECTOR_SHUFFLE into ILVEV (if possible).
+//
+// ILVEV interleaves the even elements from each vector.
+//
+// It is possible to lower into ILVEV when the mask takes the form:
+// <0, n, 2, n+2, 4, n+4, ...>
+// where n is the number of elements in the vector.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ assert ((Indices.size() % 2) == 0);
+ int WsIdx = 0;
+ int WtIdx = ResTy.getVectorNumElements();
+
+ for (unsigned i = 0; i < Indices.size(); i += 2) {
+ if (Indices[i] != -1 && Indices[i] != WsIdx)
+ return SDValue();
+ if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
+ return SDValue();
+ WsIdx += 2;
+ WtIdx += 2;
+ }
+
+ return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Op->getOperand(0),
+ Op->getOperand(1));
+}
+
+// Lower VECTOR_SHUFFLE into ILVOD (if possible).
+//
+// ILVOD interleaves the odd elements from each vector.
+//
+// It is possible to lower into ILVOD when the mask takes the form:
+// <1, n+1, 3, n+3, 5, n+5, ...>
+// where n is the number of elements in the vector.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ assert ((Indices.size() % 2) == 0);
+ int WsIdx = 1;
+ int WtIdx = ResTy.getVectorNumElements() + 1;
+
+ for (unsigned i = 0; i < Indices.size(); i += 2) {
+ if (Indices[i] != -1 && Indices[i] != WsIdx)
+ return SDValue();
+ if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
+ return SDValue();
+ WsIdx += 2;
+ WtIdx += 2;
+ }
+
+ return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Op->getOperand(0),
+ Op->getOperand(1));
+}
+
+// Lower VECTOR_SHUFFLE into ILVL (if possible).
+//
+// ILVL interleaves consecutive elements from the left half of each vector.
+//
+// It is possible to lower into ILVL when the mask takes the form:
+// <0, n, 1, n+1, 2, n+2, ...>
+// where n is the number of elements in the vector.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ assert ((Indices.size() % 2) == 0);
+ int WsIdx = 0;
+ int WtIdx = ResTy.getVectorNumElements();
+
+ for (unsigned i = 0; i < Indices.size(); i += 2) {
+ if (Indices[i] != -1 && Indices[i] != WsIdx)
+ return SDValue();
+ if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
+ return SDValue();
+ WsIdx ++;
+ WtIdx ++;
}
+
+ return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Op->getOperand(0),
+ Op->getOperand(1));
+}
+
+// Lower VECTOR_SHUFFLE into ILVR (if possible).
+//
+// ILVR interleaves consecutive elements from the right half of each vector.
+//
+// It is possible to lower into ILVR when the mask takes the form:
+// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
+// where n is the number of elements in the vector and x is half n.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ assert ((Indices.size() % 2) == 0);
+ unsigned NumElts = ResTy.getVectorNumElements();
+ int WsIdx = NumElts / 2;
+ int WtIdx = NumElts + NumElts / 2;
+
+ for (unsigned i = 0; i < Indices.size(); i += 2) {
+ if (Indices[i] != -1 && Indices[i] != WsIdx)
+ return SDValue();
+ if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
+ return SDValue();
+ WsIdx ++;
+ WtIdx ++;
+ }
+
+ return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Op->getOperand(0),
+ Op->getOperand(1));
+}
+
+// Lower VECTOR_SHUFFLE into PCKEV (if possible).
+//
+// PCKEV copies the even elements of each vector into the result vector.
+//
+// It is possible to lower into PCKEV when the mask takes the form:
+// <0, 2, 4, ..., n, n+2, n+4, ...>
+// where n is the number of elements in the vector.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ assert ((Indices.size() % 2) == 0);
+ int Idx = 0;
+
+ for (unsigned i = 0; i < Indices.size(); ++i) {
+ if (Indices[i] != -1 && Indices[i] != Idx)
+ return SDValue();
+ Idx += 2;
+ }
+
+ return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Op->getOperand(0),
+ Op->getOperand(1));
+}
+
+// Lower VECTOR_SHUFFLE into PCKOD (if possible).
+//
+// PCKOD copies the odd elements of each vector into the result vector.
+//
+// It is possible to lower into PCKOD when the mask takes the form:
+// <1, 3, 5, ..., n+1, n+3, n+5, ...>
+// where n is the number of elements in the vector.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ assert ((Indices.size() % 2) == 0);
+ int Idx = 1;
+
+ for (unsigned i = 0; i < Indices.size(); ++i) {
+ if (Indices[i] != -1 && Indices[i] != Idx)
+ return SDValue();
+ Idx += 2;
+ }
+
+ return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Op->getOperand(0),
+ Op->getOperand(1));
+}
+
+// Lower VECTOR_SHUFFLE into VSHF.
+//
+// This mostly consists of converting the shuffle indices in Indices into a
+// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
+// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
+// if the type is v8i16 and all the indices are less than 8 then the second
+// operand is unused and can be replaced with anything. We choose to replace it
+// with the used operand since this reduces the number of instructions overall.
+static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ SmallVector<SDValue, 16> Ops;
+ SDValue Op0;
+ SDValue Op1;
+ EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
+ EVT MaskEltTy = MaskVecTy.getVectorElementType();
+ bool Using1stVec = false;
+ bool Using2ndVec = false;
+ SDLoc DL(Op);
+ int ResTyNumElts = ResTy.getVectorNumElements();
+
+ for (int i = 0; i < ResTyNumElts; ++i) {
+ // Idx == -1 means UNDEF
+ int Idx = Indices[i];
+
+ if (0 <= Idx && Idx < ResTyNumElts)
+ Using1stVec = true;
+ if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
+ Using2ndVec = true;
+ }
+
+ for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end();
+ ++I)
+ Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy));
+
+ SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, &Ops[0],
+ Ops.size());
+
+ if (Using1stVec && Using2ndVec) {
+ Op0 = Op->getOperand(0);
+ Op1 = Op->getOperand(1);
+ } else if (Using1stVec)
+ Op0 = Op1 = Op->getOperand(0);
+ else if (Using2ndVec)
+ Op0 = Op1 = Op->getOperand(1);
+ else
+ llvm_unreachable("shuffle vector mask references neither vector operand?");
+
+ return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op0, Op1);
+}
+
+// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
+// indices in the shuffle.
+SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op);
+ EVT ResTy = Op->getValueType(0);
+
+ if (!ResTy.is128BitVector())
+ return SDValue();
+
+ int ResTyNumElts = ResTy.getVectorNumElements();
+ SmallVector<int, 16> Indices;
+
+ for (int i = 0; i < ResTyNumElts; ++i)
+ Indices.push_back(Node->getMaskElt(i));
+
+ SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
}
MachineBasicBlock * MipsSETargetLowering::
@@ -814,3 +2658,318 @@ emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
MI->eraseFromParent(); // The pseudo instruction is gone now.
return Sink;
}
+
+MachineBasicBlock * MipsSETargetLowering::
+emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned BranchOp) const{
+ // $bb:
+ // vany_nonzero $rd, $ws
+ // =>
+ // $bb:
+ // bnz.b $ws, $tbb
+ // b $fbb
+ // $fbb:
+ // li $rd1, 0
+ // b $sink
+ // $tbb:
+ // li $rd2, 1
+ // $sink:
+ // $rd = phi($rd1, $fbb, $rd2, $tbb)
+
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ DebugLoc DL = MI->getDebugLoc();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, FBB);
+ F->insert(It, TBB);
+ F->insert(It, Sink);
+
+ // Transfer the remainder of BB and its successor edges to Sink.
+ Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ Sink->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add successors.
+ BB->addSuccessor(FBB);
+ BB->addSuccessor(TBB);
+ FBB->addSuccessor(Sink);
+ TBB->addSuccessor(Sink);
+
+ // Insert the real bnz.b instruction to $BB.
+ BuildMI(BB, DL, TII->get(BranchOp))
+ .addReg(MI->getOperand(1).getReg())
+ .addMBB(TBB);
+
+ // Fill $FBB.
+ unsigned RD1 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1)
+ .addReg(Mips::ZERO).addImm(0);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
+
+ // Fill $TBB.
+ unsigned RD2 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2)
+ .addReg(Mips::ZERO).addImm(1);
+
+ // Insert phi function to $Sink.
+ BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(RD1).addMBB(FBB).addReg(RD2).addMBB(TBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return Sink;
+}
+
+// Emit the COPY_FW pseudo instruction.
+//
+// copy_fw_pseudo $fd, $ws, n
+// =>
+// copy_u_w $rt, $ws, $n
+// mtc1 $rt, $fd
+//
+// When n is zero, the equivalent operation can be performed with (potentially)
+// zero instructions due to register overlaps. This optimization is never valid
+// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
+MachineBasicBlock * MipsSETargetLowering::
+emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Fd = MI->getOperand(0).getReg();
+ unsigned Ws = MI->getOperand(1).getReg();
+ unsigned Lane = MI->getOperand(2).getImm();
+
+ if (Lane == 0)
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo);
+ else {
+ unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the COPY_FD pseudo instruction.
+//
+// copy_fd_pseudo $fd, $ws, n
+// =>
+// splati.d $wt, $ws, $n
+// copy $fd, $wt:sub_64
+//
+// When n is zero, the equivalent operation can be performed with (potentially)
+// zero instructions due to register overlaps. This optimization is always
+// valid because FR=1 mode which is the only supported mode in MSA.
+MachineBasicBlock * MipsSETargetLowering::
+emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{
+ assert(Subtarget->isFP64bit());
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ unsigned Fd = MI->getOperand(0).getReg();
+ unsigned Ws = MI->getOperand(1).getReg();
+ unsigned Lane = MI->getOperand(2).getImm() * 2;
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Lane == 0)
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64);
+ else {
+ unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the INSERT_FW pseudo instruction.
+//
+// insert_fw_pseudo $wd, $wd_in, $n, $fs
+// =>
+// subreg_to_reg $wt:sub_lo, $fs
+// insve_w $wd[$n], $wd_in, $wt[0]
+MachineBasicBlock *
+MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Wd = MI->getOperand(0).getReg();
+ unsigned Wd_in = MI->getOperand(1).getReg();
+ unsigned Lane = MI->getOperand(2).getImm();
+ unsigned Fs = MI->getOperand(3).getReg();
+ unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
+ .addImm(0)
+ .addReg(Fs)
+ .addImm(Mips::sub_lo);
+ BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd)
+ .addReg(Wd_in)
+ .addImm(Lane)
+ .addReg(Wt);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the INSERT_FD pseudo instruction.
+//
+// insert_fd_pseudo $wd, $fs, n
+// =>
+// subreg_to_reg $wt:sub_64, $fs
+// insve_d $wd[$n], $wd_in, $wt[0]
+MachineBasicBlock *
+MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ assert(Subtarget->isFP64bit());
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Wd = MI->getOperand(0).getReg();
+ unsigned Wd_in = MI->getOperand(1).getReg();
+ unsigned Lane = MI->getOperand(2).getImm();
+ unsigned Fs = MI->getOperand(3).getReg();
+ unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
+ .addImm(0)
+ .addReg(Fs)
+ .addImm(Mips::sub_64);
+ BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd)
+ .addReg(Wd_in)
+ .addImm(Lane)
+ .addReg(Wt);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the FILL_FW pseudo instruction.
+//
+// fill_fw_pseudo $wd, $fs
+// =>
+// implicit_def $wt1
+// insert_subreg $wt2:subreg_lo, $wt1, $fs
+// splati.w $wd, $wt2[0]
+MachineBasicBlock *
+MipsSETargetLowering::emitFILL_FW(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Wd = MI->getOperand(0).getReg();
+ unsigned Fs = MI->getOperand(1).getReg();
+ unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
+ .addReg(Wt1)
+ .addReg(Fs)
+ .addImm(Mips::sub_lo);
+ BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the FILL_FD pseudo instruction.
+//
+// fill_fd_pseudo $wd, $fs
+// =>
+// implicit_def $wt1
+// insert_subreg $wt2:subreg_64, $wt1, $fs
+// splati.d $wd, $wt2[0]
+MachineBasicBlock *
+MipsSETargetLowering::emitFILL_FD(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ assert(Subtarget->isFP64bit());
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Wd = MI->getOperand(0).getReg();
+ unsigned Fs = MI->getOperand(1).getReg();
+ unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+ unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
+ .addReg(Wt1)
+ .addReg(Fs)
+ .addImm(Mips::sub_64);
+ BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the FEXP2_W_1 pseudo instructions.
+//
+// fexp2_w_1_pseudo $wd, $wt
+// =>
+// ldi.w $ws, 1
+// fexp2.w $wd, $ws, $wt
+MachineBasicBlock *
+MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
+ unsigned Ws1 = RegInfo.createVirtualRegister(RC);
+ unsigned Ws2 = RegInfo.createVirtualRegister(RC);
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Splat 1.0 into a vector
+ BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1);
+
+ // Emit 1.0 * fexp2(Wt)
+ BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI->getOperand(0).getReg())
+ .addReg(Ws2)
+ .addReg(MI->getOperand(1).getReg());
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the FEXP2_D_1 pseudo instructions.
+//
+// fexp2_d_1_pseudo $wd, $wt
+// =>
+// ldi.d $ws, 1
+// fexp2.d $wd, $ws, $wt
+MachineBasicBlock *
+MipsSETargetLowering::emitFEXP2_D_1(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
+ unsigned Ws1 = RegInfo.createVirtualRegister(RC);
+ unsigned Ws2 = RegInfo.createVirtualRegister(RC);
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Splat 1.0 into a vector
+ BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1);
+
+ // Emit 1.0 * fexp2(Wt)
+ BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI->getOperand(0).getReg())
+ .addReg(Ws2)
+ .addReg(MI->getOperand(1).getReg());
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index ec8a5c7..c5210d9 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -22,6 +22,14 @@ namespace llvm {
public:
explicit MipsSETargetLowering(MipsTargetMachine &TM);
+ /// \brief Enable MSA support for the given integer type and Register
+ /// class.
+ void addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC);
+ /// \brief Enable MSA support for the given floating-point type and
+ /// Register class.
+ void addMSAFloatType(MVT::SimpleValueType Ty,
+ const TargetRegisterClass *RC);
+
virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -38,8 +46,8 @@ namespace llvm {
virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
if (VT == MVT::Untyped)
- return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass :
- &Mips::ACRegsRegClass;
+ return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass :
+ &Mips::ACC64RegClass;
return TargetLowering::getRepRegClassFor(VT);
}
@@ -56,14 +64,50 @@ namespace llvm {
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+ SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+
SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ /// \brief Lower VECTOR_SHUFFLE into one of a number of instructions
+ /// depending on the indices in the shuffle.
+ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitMSACBranchPseudo(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned BranchOp) const;
+ /// \brief Emit the COPY_FW pseudo instruction
+ MachineBasicBlock *emitCOPY_FW(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the COPY_FD pseudo instruction
+ MachineBasicBlock *emitCOPY_FD(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the INSERT_FW pseudo instruction
+ MachineBasicBlock *emitINSERT_FW(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the INSERT_FD pseudo instruction
+ MachineBasicBlock *emitINSERT_FD(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the FILL_FW pseudo instruction
+ MachineBasicBlock *emitFILL_FW(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the FILL_FD pseudo instruction
+ MachineBasicBlock *emitFILL_FD(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the FEXP2_W_1 pseudo instructions.
+ MachineBasicBlock *emitFEXP2_W_1(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the FEXP2_D_1 pseudo instructions.
+ MachineBasicBlock *emitFEXP2_D_1(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
};
}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index 9521043..02931a3 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -24,11 +24,6 @@
using namespace llvm;
-static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
- cl::desc("Expand double precision loads and "
- "stores to their single precision "
- "counterparts."));
-
MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm)
: MipsInstrInfo(tm,
tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J),
@@ -49,10 +44,8 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
{
unsigned Opc = MI->getOpcode();
- if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) ||
- (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) ||
- (Opc == Mips::LDC1) || (Opc == Mips::LDC164) ||
- (Opc == Mips::LDC164_P8)) {
+ if ((Opc == Mips::LW) || (Opc == Mips::LD) ||
+ (Opc == Mips::LWC1) || (Opc == Mips::LDC1) || (Opc == Mips::LDC164)) {
if ((MI->getOperand(1).isFI()) && // is a stack slot
(MI->getOperand(2).isImm()) && // the imm is zero
(isZeroImm(MI->getOperand(2)))) {
@@ -74,10 +67,8 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
{
unsigned Opc = MI->getOpcode();
- if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) ||
- (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) ||
- (Opc == Mips::SDC1) || (Opc == Mips::SDC164) ||
- (Opc == Mips::SDC164_P8)) {
+ if ((Opc == Mips::SW) || (Opc == Mips::SD) ||
+ (Opc == Mips::SWC1) || (Opc == Mips::SDC1) || (Opc == Mips::SDC164)) {
if ((MI->getOperand(1).isFI()) && // is a stack slot
(MI->getOperand(2).isImm()) && // the imm is zero
(isZeroImm(MI->getOperand(2)))) {
@@ -101,32 +92,34 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = Mips::CFC1;
else if (Mips::FGR32RegClass.contains(SrcReg))
Opc = Mips::MFC1;
- else if (Mips::HIRegsRegClass.contains(SrcReg))
+ else if (Mips::HI32RegClass.contains(SrcReg))
Opc = Mips::MFHI, SrcReg = 0;
- else if (Mips::LORegsRegClass.contains(SrcReg))
+ else if (Mips::LO32RegClass.contains(SrcReg))
Opc = Mips::MFLO, SrcReg = 0;
- else if (Mips::HIRegsDSPRegClass.contains(SrcReg))
+ else if (Mips::HI32DSPRegClass.contains(SrcReg))
Opc = Mips::MFHI_DSP;
- else if (Mips::LORegsDSPRegClass.contains(SrcReg))
+ else if (Mips::LO32DSPRegClass.contains(SrcReg))
Opc = Mips::MFLO_DSP;
else if (Mips::DSPCCRegClass.contains(SrcReg)) {
BuildMI(MBB, I, DL, get(Mips::RDDSP), DestReg).addImm(1 << 4)
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
return;
}
+ else if (Mips::MSACtrlRegClass.contains(SrcReg))
+ Opc = Mips::CFCMSA;
}
else if (Mips::GPR32RegClass.contains(SrcReg)) { // Copy from CPU Reg.
if (Mips::CCRRegClass.contains(DestReg))
Opc = Mips::CTC1;
else if (Mips::FGR32RegClass.contains(DestReg))
Opc = Mips::MTC1;
- else if (Mips::HIRegsRegClass.contains(DestReg))
+ else if (Mips::HI32RegClass.contains(DestReg))
Opc = Mips::MTHI, DestReg = 0;
- else if (Mips::LORegsRegClass.contains(DestReg))
+ else if (Mips::LO32RegClass.contains(DestReg))
Opc = Mips::MTLO, DestReg = 0;
- else if (Mips::HIRegsDSPRegClass.contains(DestReg))
+ else if (Mips::HI32DSPRegClass.contains(DestReg))
Opc = Mips::MTHI_DSP;
- else if (Mips::LORegsDSPRegClass.contains(DestReg))
+ else if (Mips::LO32DSPRegClass.contains(DestReg))
Opc = Mips::MTLO_DSP;
else if (Mips::DSPCCRegClass.contains(DestReg)) {
BuildMI(MBB, I, DL, get(Mips::WRDSP))
@@ -134,6 +127,8 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(DestReg, RegState::ImplicitDefine);
return;
}
+ else if (Mips::MSACtrlRegClass.contains(DestReg))
+ Opc = Mips::CTCMSA;
}
else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
Opc = Mips::FMOV_S;
@@ -144,21 +139,25 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (Mips::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg.
if (Mips::GPR64RegClass.contains(SrcReg))
Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
- else if (Mips::HIRegs64RegClass.contains(SrcReg))
+ else if (Mips::HI64RegClass.contains(SrcReg))
Opc = Mips::MFHI64, SrcReg = 0;
- else if (Mips::LORegs64RegClass.contains(SrcReg))
+ else if (Mips::LO64RegClass.contains(SrcReg))
Opc = Mips::MFLO64, SrcReg = 0;
else if (Mips::FGR64RegClass.contains(SrcReg))
Opc = Mips::DMFC1;
}
else if (Mips::GPR64RegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
- if (Mips::HIRegs64RegClass.contains(DestReg))
+ if (Mips::HI64RegClass.contains(DestReg))
Opc = Mips::MTHI64, DestReg = 0;
- else if (Mips::LORegs64RegClass.contains(DestReg))
+ else if (Mips::LO64RegClass.contains(DestReg))
Opc = Mips::MTLO64, DestReg = 0;
else if (Mips::FGR64RegClass.contains(DestReg))
Opc = Mips::DMTC1;
}
+ else if (Mips::MSA128BRegClass.contains(DestReg)) { // Copy to MSA reg
+ if (Mips::MSA128BRegClass.contains(SrcReg))
+ Opc = Mips::MOVE_V;
+ }
assert(Opc && "Cannot copy registers");
@@ -186,23 +185,31 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Opc = 0;
if (Mips::GPR32RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
+ Opc = Mips::SW;
else if (Mips::GPR64RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
- else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::STORE_AC64_P8 : Mips::STORE_AC64;
- else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP;
- else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128;
+ Opc = Mips::SD;
+ else if (Mips::ACC64RegClass.hasSubClassEq(RC))
+ Opc = Mips::STORE_ACC64;
+ else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC))
+ Opc = Mips::STORE_ACC64DSP;
+ else if (Mips::ACC128RegClass.hasSubClassEq(RC))
+ Opc = Mips::STORE_ACC128;
else if (Mips::DSPCCRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::STORE_CCOND_DSP_P8 : Mips::STORE_CCOND_DSP;
+ Opc = Mips::STORE_CCOND_DSP;
else if (Mips::FGR32RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
+ Opc = Mips::SWC1;
else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
Opc = Mips::SDC1;
else if (Mips::FGR64RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
+ Opc = Mips::SDC164;
+ else if (RC->hasType(MVT::v16i8))
+ Opc = Mips::ST_B;
+ else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16))
+ Opc = Mips::ST_H;
+ else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32))
+ Opc = Mips::ST_W;
+ else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64))
+ Opc = Mips::ST_D;
assert(Opc && "Register class not handled!");
BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
@@ -219,23 +226,31 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Opc = 0;
if (Mips::GPR32RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
+ Opc = Mips::LW;
else if (Mips::GPR64RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
- else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LOAD_AC64_P8 : Mips::LOAD_AC64;
- else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP;
- else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128;
+ Opc = Mips::LD;
+ else if (Mips::ACC64RegClass.hasSubClassEq(RC))
+ Opc = Mips::LOAD_ACC64;
+ else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC))
+ Opc = Mips::LOAD_ACC64DSP;
+ else if (Mips::ACC128RegClass.hasSubClassEq(RC))
+ Opc = Mips::LOAD_ACC128;
else if (Mips::DSPCCRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LOAD_CCOND_DSP_P8 : Mips::LOAD_CCOND_DSP;
+ Opc = Mips::LOAD_CCOND_DSP;
else if (Mips::FGR32RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
+ Opc = Mips::LWC1;
else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
Opc = Mips::LDC1;
else if (Mips::FGR64RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
+ Opc = Mips::LDC164;
+ else if (RC->hasType(MVT::v16i8))
+ Opc = Mips::LD_B;
+ else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16))
+ Opc = Mips::LD_H;
+ else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32))
+ Opc = Mips::LD_W;
+ else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64))
+ Opc = Mips::LD_D;
assert(Opc && "Register class not handled!");
BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
@@ -251,6 +266,27 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case Mips::RetRA:
expandRetRA(MBB, MI, Mips::RET);
break;
+ case Mips::PseudoMFHI:
+ expandPseudoMFHiLo(MBB, MI, Mips::MFHI);
+ break;
+ case Mips::PseudoMFLO:
+ expandPseudoMFHiLo(MBB, MI, Mips::MFLO);
+ break;
+ case Mips::PseudoMFHI64:
+ expandPseudoMFHiLo(MBB, MI, Mips::MFHI64);
+ break;
+ case Mips::PseudoMFLO64:
+ expandPseudoMFHiLo(MBB, MI, Mips::MFLO64);
+ break;
+ case Mips::PseudoMTLOHI:
+ expandPseudoMTLoHi(MBB, MI, Mips::MTLO, Mips::MTHI, false);
+ break;
+ case Mips::PseudoMTLOHI64:
+ expandPseudoMTLoHi(MBB, MI, Mips::MTLO64, Mips::MTHI64, false);
+ break;
+ case Mips::PseudoMTLOHI_DSP:
+ expandPseudoMTLoHi(MBB, MI, Mips::MTLO_DSP, Mips::MTHI_DSP, true);
+ break;
case Mips::PseudoCVT_S_W:
expandCvtFPInt(MBB, MI, Mips::CVT_S_W, Mips::MTC1, false);
break;
@@ -267,16 +303,16 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
expandCvtFPInt(MBB, MI, Mips::CVT_D64_L, Mips::DMTC1, true);
break;
case Mips::BuildPairF64:
- expandBuildPairF64(MBB, MI);
+ expandBuildPairF64(MBB, MI, false);
break;
- case Mips::ExtractElementF64:
- expandExtractElementF64(MBB, MI);
+ case Mips::BuildPairF64_64:
+ expandBuildPairF64(MBB, MI, true);
break;
- case Mips::PseudoLDC1:
- expandDPLoadStore(MBB, MI, Mips::LDC1, Mips::LWC1);
+ case Mips::ExtractElementF64:
+ expandExtractElementF64(MBB, MI, false);
break;
- case Mips::PseudoSDC1:
- expandDPLoadStore(MBB, MI, Mips::SDC1, Mips::SWC1);
+ case Mips::ExtractElementF64_64:
+ expandExtractElementF64(MBB, MI, true);
break;
case Mips::MIPSeh_return32:
case Mips::MIPSeh_return64:
@@ -399,6 +435,41 @@ MipsSEInstrInfo::compareOpndSize(unsigned Opc,
return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize);
}
+void MipsSEInstrInfo::expandPseudoMFHiLo(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned NewOpc) const {
+ BuildMI(MBB, I, I->getDebugLoc(), get(NewOpc), I->getOperand(0).getReg());
+}
+
+void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned LoOpc,
+ unsigned HiOpc,
+ bool HasExplicitDef) const {
+ // Expand
+ // lo_hi pseudomtlohi $gpr0, $gpr1
+ // to these two instructions:
+ // mtlo $gpr0
+ // mthi $gpr1
+
+ DebugLoc DL = I->getDebugLoc();
+ const MachineOperand &SrcLo = I->getOperand(1), &SrcHi = I->getOperand(2);
+ MachineInstrBuilder LoInst = BuildMI(MBB, I, DL, get(LoOpc));
+ MachineInstrBuilder HiInst = BuildMI(MBB, I, DL, get(HiOpc));
+ LoInst.addReg(SrcLo.getReg(), getKillRegState(SrcLo.isKill()));
+ HiInst.addReg(SrcHi.getReg(), getKillRegState(SrcHi.isKill()));
+
+ // Add lo/hi registers if the mtlo/hi instructions created have explicit
+ // def registers.
+ if (HasExplicitDef) {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned DstLo = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo);
+ unsigned DstHi = getRegisterInfo().getSubReg(DstReg, Mips::sub_hi);
+ LoInst.addReg(DstLo, RegState::Define);
+ HiInst.addReg(DstHi, RegState::Define);
+ }
+}
+
void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned CvtOpc, unsigned MovOpc,
@@ -408,100 +479,63 @@ void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB,
unsigned DstReg = Dst.getReg(), SrcReg = Src.getReg(), TmpReg = DstReg;
unsigned KillSrc = getKillRegState(Src.isKill());
DebugLoc DL = I->getDebugLoc();
- unsigned SubIdx = (IsI64 ? Mips::sub_32 : Mips::sub_fpeven);
bool DstIsLarger, SrcIsLarger;
tie(DstIsLarger, SrcIsLarger) = compareOpndSize(CvtOpc, *MBB.getParent());
if (DstIsLarger)
- TmpReg = getRegisterInfo().getSubReg(DstReg, SubIdx);
+ TmpReg = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo);
if (SrcIsLarger)
- DstReg = getRegisterInfo().getSubReg(DstReg, SubIdx);
+ DstReg = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo);
BuildMI(MBB, I, DL, MovDesc, TmpReg).addReg(SrcReg, KillSrc);
BuildMI(MBB, I, DL, CvtDesc, DstReg).addReg(TmpReg, RegState::Kill);
}
void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
+ MachineBasicBlock::iterator I,
+ bool FP64) const {
unsigned DstReg = I->getOperand(0).getReg();
unsigned SrcReg = I->getOperand(1).getReg();
unsigned N = I->getOperand(2).getImm();
- const MCInstrDesc& Mfc1Tdd = get(Mips::MFC1);
DebugLoc dl = I->getDebugLoc();
assert(N < 2 && "Invalid immediate");
- unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven;
+ unsigned SubIdx = N ? Mips::sub_hi : Mips::sub_lo;
unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx);
- BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg);
+ if (SubIdx == Mips::sub_hi && FP64)
+ BuildMI(MBB, I, dl, get(Mips::MFHC1), DstReg).addReg(SubReg);
+ else
+ BuildMI(MBB, I, dl, get(Mips::MFC1), DstReg).addReg(SubReg);
}
void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
+ MachineBasicBlock::iterator I,
+ bool FP64) const {
unsigned DstReg = I->getOperand(0).getReg();
unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
DebugLoc dl = I->getDebugLoc();
const TargetRegisterInfo &TRI = getRegisterInfo();
- // mtc1 Lo, $fp
- // mtc1 Hi, $fp + 1
- BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpeven))
- .addReg(LoReg);
- BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpodd))
- .addReg(HiReg);
-}
-
-/// Add 4 to the displacement of operand MO.
-static void fixDisp(MachineOperand &MO) {
- switch (MO.getType()) {
- default:
- llvm_unreachable("Unhandled operand type.");
- case MachineOperand::MO_Immediate:
- MO.setImm(MO.getImm() + 4);
- break;
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_BlockAddress:
- case MachineOperand::MO_TargetIndex:
- case MachineOperand::MO_ExternalSymbol:
- MO.setOffset(MO.getOffset() + 4);
- break;
- }
-}
-
-void MipsSEInstrInfo::expandDPLoadStore(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned OpcD, unsigned OpcS) const {
- // If NoDPLoadStore is false, just change the opcode.
- if (!NoDPLoadStore) {
- genInstrWithNewOpc(OpcD, I);
- return;
- }
+ // For FP32 mode:
+ // mtc1 Lo, $fp
+ // mtc1 Hi, $fp + 1
+ // For FP64 mode:
+ // mtc1 Lo, $fp
+ // mthc1 Hi, $fp
- // Expand a double precision FP load or store to two single precision
- // instructions.
+ BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo))
+ .addReg(LoReg);
- const TargetRegisterInfo &TRI = getRegisterInfo();
- const MachineOperand &ValReg = I->getOperand(0);
- unsigned LoReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_fpeven);
- unsigned HiReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_fpodd);
-
- if (!TM.getSubtarget<MipsSubtarget>().isLittle())
- std::swap(LoReg, HiReg);
-
- // Create an instruction which loads from or stores to the lower memory
- // address.
- MachineInstrBuilder MIB = genInstrWithNewOpc(OpcS, I);
- MIB->getOperand(0).setReg(LoReg);
-
- // Create an instruction which loads from or stores to the higher memory
- // address.
- MIB = genInstrWithNewOpc(OpcS, I);
- MIB->getOperand(0).setReg(HiReg);
- fixDisp(MIB->getOperand(2));
+ if (FP64)
+ BuildMI(MBB, I, dl, get(Mips::MTHC1), TRI.getSubReg(DstReg, Mips::sub_hi))
+ .addReg(HiReg);
+ else
+ BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi))
+ .addReg(HiReg);
}
void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index d962ef0..6d2dd90 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -87,6 +87,13 @@ private:
std::pair<bool, bool> compareOpndSize(unsigned Opc,
const MachineFunction &MF) const;
+ void expandPseudoMFHiLo(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned NewOpc) const;
+
+ void expandPseudoMTLoHi(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned LoOpc, unsigned HiOpc,
+ bool HasExplicitDef) const;
+
/// Expand pseudo Int-to-FP conversion instructions.
///
/// For example, the following pseudo instruction
@@ -101,12 +108,9 @@ private:
unsigned CvtOpc, unsigned MovOpc, bool IsI64) const;
void expandExtractElementF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock::iterator I, bool FP64) const;
void expandBuildPairF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
- void expandDPLoadStore(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, unsigned OpcD,
- unsigned OpcS) const;
+ MachineBasicBlock::iterator I, bool FP64) const;
void expandEhReturn(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
};
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 286a2e2..2d44084 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -62,6 +62,24 @@ MipsSERegisterInfo::intRegClass(unsigned Size) const {
return &Mips::GPR64RegClass;
}
+/// Determine whether a given opcode is an MSA load/store (supporting 10-bit
+/// offsets) or a non-MSA load/store (supporting 16-bit offsets).
+static inline bool isMSALoadOrStore(const unsigned Opcode) {
+ switch (Opcode) {
+ case Mips::LD_B:
+ case Mips::LD_H:
+ case Mips::LD_W:
+ case Mips::LD_D:
+ case Mips::ST_B:
+ case Mips::ST_H:
+ case Mips::ST_W:
+ case Mips::ST_D:
+ return true;
+ default:
+ return false;
+ }
+}
+
void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
unsigned OpNo, int FrameIndex,
uint64_t StackSize,
@@ -111,23 +129,49 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
- // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
- // field.
- if (!MI.isDebugValue() && !isInt<16>(Offset)) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = II->getDebugLoc();
- unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned NewImm;
- const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(
- MBB.getParent()->getTarget().getInstrInfo());
- unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, &NewImm);
- BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
- .addReg(Reg, RegState::Kill);
-
- FrameReg = Reg;
- Offset = SignExtend64<16>(NewImm);
- IsKill = true;
+ if (!MI.isDebugValue()) {
+ // Make sure Offset fits within the field available.
+ // For MSA instructions, this is a 10-bit signed immediate, otherwise it is
+ // a 16-bit signed immediate.
+ unsigned OffsetBitSize = isMSALoadOrStore(MI.getOpcode()) ? 10 : 16;
+
+ if (OffsetBitSize == 10 && !isInt<10>(Offset) && isInt<16>(Offset)) {
+ // If we have an offset that needs to fit into a signed 10-bit immediate
+ // and doesn't, but does fit into 16-bits then use an ADDiu
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ unsigned ADDiu = Subtarget.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+ const TargetRegisterClass *RC =
+ Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+ MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
+ unsigned Reg = RegInfo.createVirtualRegister(RC);
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo *>(
+ MBB.getParent()->getTarget().getInstrInfo());
+ BuildMI(MBB, II, DL, TII.get(ADDiu), Reg).addReg(FrameReg).addImm(Offset);
+
+ FrameReg = Reg;
+ Offset = 0;
+ IsKill = true;
+ } else if (!isInt<16>(Offset)) {
+ // Otherwise split the offset into 16-bit pieces and add it in multiple
+ // instructions.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned NewImm = 0;
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo *>(
+ MBB.getParent()->getTarget().getInstrInfo());
+ unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL,
+ OffsetBitSize == 16 ? &NewImm : NULL);
+ BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
+ .addReg(Reg, RegState::Kill);
+
+ FrameReg = Reg;
+ Offset = SignExtend64<16>(NewImm);
+ IsKill = true;
+ }
}
MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill);
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 541e2ca..0a81072 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -53,6 +53,12 @@ Mips16HardFloat("mips16-hard-float", cl::NotHidden,
cl::desc("MIPS: mips16 hard float enable."),
cl::init(false));
+static cl::opt<bool>
+Mips16ConstantIslands(
+ "mips16-constant-islands", cl::Hidden,
+ cl::desc("MIPS: mips16 constant islands enable. experimental feature"),
+ cl::init(false));
+
void MipsSubtarget::anchor() { }
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
@@ -65,7 +71,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
HasBitCount(false), HasFPIdx(false),
InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
- AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
+ AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false),
RM(_RM), OverrideMode(NoOverride), TM(_TM)
{
std::string CPUName = CPU;
@@ -89,12 +95,20 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
(hasMips64() && (isABI_N32() || isABI_N64()))) &&
"Invalid Arch & ABI pair.");
+ if (hasMSA() && !isFP64bit())
+ report_fatal_error("MSA requires a 64-bit FPU register file (FR=1 mode). "
+ "See -mattr=+fp64.",
+ false);
+
// Is the target system Linux ?
if (TT.find("linux") == std::string::npos)
IsLinux = false;
// Set UseSmallSection.
UseSmallSection = !IsLinux && (RM == Reloc::Static);
+ // set some subtarget specific features
+ if (inMips16Mode())
+ HasBitCount=false;
}
bool
@@ -152,3 +166,11 @@ void MipsSubtarget::resetSubtarget(MachineFunction *MF) {
}
}
+bool MipsSubtarget::mipsSEUsesSoftFloat() const {
+ return TM->Options.UseSoftFloat && !InMips16HardFloat;
+}
+
+bool MipsSubtarget::useConstantIslands() {
+ DEBUG(dbgs() << "use constant islands " << Mips16ConstantIslands << "\n");
+ return Mips16ConstantIslands;
+}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index bfb13bb..6b2ab12 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -113,6 +113,9 @@ protected:
// compiled as Mips32
bool Os16;
+ // HasMSA -- supports MSA ASE.
+ bool HasMSA;
+
InstrItineraryData InstrItins;
// The instance to the register info section object
@@ -157,6 +160,7 @@ public:
bool isLittle() const { return IsLittle; }
bool isFP64bit() const { return IsFP64bit; }
+ bool isNotFP64bit() const { return !IsFP64bit; }
bool isGP64bit() const { return IsGP64bit; }
bool isGP32bit() const { return !IsGP64bit; }
bool isSingleFloat() const { return IsSingleFloat; }
@@ -182,17 +186,25 @@ public:
bool inMicroMipsMode() const { return InMicroMipsMode; }
bool hasDSP() const { return HasDSP; }
bool hasDSPR2() const { return HasDSPR2; }
+ bool hasMSA() const { return HasMSA; }
bool isLinux() const { return IsLinux; }
bool useSmallSection() const { return UseSmallSection; }
bool hasStandardEncoding() const { return !inMips16Mode(); }
+ bool mipsSEUsesSoftFloat() const;
+
+ bool enableLongBranchPass() const {
+ return hasStandardEncoding() || allowMixed16_32();
+ }
+
/// Features related to the presence of specific instructions.
bool hasSEInReg() const { return HasSEInReg; }
bool hasCondMov() const { return HasCondMov; }
bool hasSwap() const { return HasSwap; }
bool hasBitCount() const { return HasBitCount; }
bool hasFPIdx() const { return HasFPIdx; }
+ bool hasExtractInsert() const { return !inMips16Mode() && hasMips32r2(); }
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
bool allowMixed16_32() const { return inMips16ModeDefault() |
@@ -200,6 +212,13 @@ public:
bool os16() const { return Os16;};
+// for now constant islands are on for the whole compilation unit but we only
+// really use them if in addition we are in mips16 mode
+//
+static bool useConstantIslands();
+
+ unsigned stackAlignment() const { return hasMips64() ? 16 : 8; }
+
// Grab MipsRegInfo object
const MipsReginfo &getMReginfo() const { return MRI; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index ced6a09..5046c1b 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -134,7 +135,13 @@ namespace {
class MipsPassConfig : public TargetPassConfig {
public:
MipsPassConfig(MipsTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ // The current implementation of long branch pass requires a scratch
+ // register ($at) to be available before branch instructions. Tail merging
+ // can break this requirement, so disable it when long branch pass is
+ // enabled.
+ EnableTailMerge = !getMipsSubtarget().enableLongBranchPass();
+ }
MipsTargetMachine &getMipsTargetMachine() const {
return getTM<MipsTargetMachine>();
@@ -160,7 +167,7 @@ void MipsPassConfig::addIRPasses() {
addPass(createMipsOs16(getMipsTargetMachine()));
if (getMipsSubtarget().inMips16HardFloat())
addPass(createMips16HardFloat(getMipsTargetMachine()));
- addPass(createMipsOptimizeMathLibCalls(getMipsTargetMachine()));
+ addPass(createPartiallyInlineLibCallsPass());
}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
@@ -196,8 +203,7 @@ bool MipsPassConfig::addPreEmitPass() {
const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
addPass(createMipsDelaySlotFillerPass(TM));
- if (Subtarget.hasStandardEncoding() ||
- Subtarget.allowMixed16_32())
+ if (Subtarget.enableLongBranchPass())
addPass(createMipsLongBranchPass(TM));
if (Subtarget.inMips16Mode() ||
Subtarget.allowMixed16_32())
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
new file mode 100644
index 0000000..96966fd
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -0,0 +1,44 @@
+//===-- MipsTargetStreamer.h - Mips Target Streamer ------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETSTREAMER_H
+#define MIPSTARGETSTREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class MipsTargetStreamer : public MCTargetStreamer {
+ virtual void anchor();
+
+public:
+ virtual void emitMipsHackELFFlags(unsigned Flags) = 0;
+ virtual void emitMipsHackSTOCG(MCSymbol *Sym, unsigned Val) = 0;
+};
+
+// This part is for ascii assembly output
+class MipsTargetAsmStreamer : public MipsTargetStreamer {
+ formatted_raw_ostream &OS;
+
+public:
+ MipsTargetAsmStreamer(formatted_raw_ostream &OS);
+ virtual void emitMipsHackELFFlags(unsigned Flags);
+ virtual void emitMipsHackSTOCG(MCSymbol *Sym, unsigned Val);
+};
+
+// This part is for ELF object output
+class MipsTargetELFStreamer : public MipsTargetStreamer {
+public:
+ MCELFStreamer &getStreamer();
+ virtual void emitMipsHackELFFlags(unsigned Flags);
+ virtual void emitMipsHackSTOCG(MCSymbol *Sym, unsigned Val);
+};
+}
+
+#endif
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index c7b8aa4..d5be0e4 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
@@ -277,3 +278,12 @@ void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum,
printOperand(MI, OpNum + 1, O);
}
}
+
+void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ assert(Op.isExpr() && "Call prototype is not an MCExpr?");
+ const MCExpr *Expr = Op.getExpr();
+ const MCSymbol &Sym = cast<MCSymbolRefExpr>(Expr)->getSymbol();
+ O << Sym.getName();
+}
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index e0f44da..93029ae 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -44,7 +44,8 @@ public:
raw_ostream &O, const char *Modifier = 0);
void printMemOperand(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = 0);
-
+ void printProtoIdent(const MCInst *MI, int OpNum,
+ raw_ostream &O, const char *Modifier = 0);
};
}
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index dfa1ff5..f2784b8 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -35,8 +35,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) {
PrivateGlobalPrefix = "$L__";
- AllowPeriodsInName = false;
-
HasSetDirective = false;
HasSingleParameterDotFile = false;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index a2b9bec..7552fe7 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -20,6 +20,7 @@
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXUtilities.h"
+#include "InstPrinter/NVPTXInstPrinter.h"
#include "cl_common_defines.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -47,21 +48,17 @@
#include <sstream>
using namespace llvm;
-bool RegAllocNilUsed = true;
-
#define DEPOTNAME "__local_depot"
static cl::opt<bool>
-EmitLineNumbers("nvptx-emit-line-numbers",
+EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden,
cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
cl::init(true));
-namespace llvm { bool InterleaveSrcInPtx = false; }
-
-static cl::opt<bool, true>
-InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
+static cl::opt<bool>
+InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specific: Emit source line in ptx file"),
- cl::location(llvm::InterleaveSrcInPtx));
+ cl::init(false));
namespace {
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
@@ -129,7 +126,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
- return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+ return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
@@ -294,7 +291,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
return;
// Emit the line from the source file.
- if (llvm::InterleaveSrcInPtx)
+ if (InterleaveSrc)
this->emitSrcInText(fileName.str(), curLoc.getLine());
std::stringstream temp;
@@ -317,6 +314,14 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
OutMI.setOpcode(MI->getOpcode());
+ // Special: Do not mangle symbol operand of CALL_PROTOTYPE
+ if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
+ const MachineOperand &MO = MI->getOperand(0);
+ OutMI.addOperand(GetSymbolRef(MO,
+ OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
+ return;
+ }
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
@@ -344,7 +349,7 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal()));
break;
case MachineOperand::MO_FPImmediate: {
const ConstantFP *Cnt = MO.getFPImm();
@@ -550,6 +555,19 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
VRegMapping.clear();
}
+void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
+ unsigned RegNo = MI->getOperand(0).getReg();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ if (TRI->isVirtualRegister(RegNo)) {
+ OutStreamer.AddComment(Twine("implicit-def: ") +
+ getVirtualRegisterName(RegNo));
+ } else {
+ OutStreamer.AddComment(Twine("implicit-def: ") +
+ TM.getRegisterInfo()->getName(RegNo));
+ }
+ OutStreamer.AddBlankLine();
+}
+
void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
raw_ostream &O) const {
// If the NVVM IR has some of reqntid* specified, then output
@@ -601,23 +619,30 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
O << ".minnctapersm " << mincta << "\n";
}
-void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
- raw_ostream &O) {
- const TargetRegisterClass *RC = MRI->getRegClass(vr);
+std::string
+NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
- unsigned mapped_vr = regmap[vr];
+ std::string Name;
+ raw_string_ostream NameStr(Name);
- if (!isVec) {
- O << getNVPTXRegClassStr(RC) << mapped_vr;
- return;
- }
- report_fatal_error("Bad register!");
+ VRegRCMap::const_iterator I = VRegMapping.find(RC);
+ assert(I != VRegMapping.end() && "Bad register class");
+ const DenseMap<unsigned, unsigned> &RegMap = I->second;
+
+ VRegMap::const_iterator VI = RegMap.find(Reg);
+ assert(VI != RegMap.end() && "Bad virtual register");
+ unsigned MappedVR = VI->second;
+
+ NameStr << getNVPTXRegClassStr(RC) << MappedVR;
+
+ NameStr.flush();
+ return Name;
}
-void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
+void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
raw_ostream &O) {
- getVirtualRegisterName(vr, isVec, O);
+ O << getVirtualRegisterName(vr);
}
void NVPTXAsmPrinter::printVecModifiedImmediate(
@@ -660,7 +685,7 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
else
O << ".func ";
printReturnValStr(F, O);
- O << *Mang->getSymbol(F) << "\n";
+ O << *getSymbol(F) << "\n";
emitFunctionParamList(F, O);
O << ";\n";
}
@@ -870,7 +895,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.Initialize(OutContext, TM);
- Mang = new Mangler(OutContext, &TM);
+ Mang = new Mangler(&TM);
// Emit header before any dwarf directives are emitted below.
emitHeader(M, OS1);
@@ -1190,7 +1215,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
else
O << getPTXFundamentalTypeStr(ETy, false);
O << " ";
- O << *Mang->getSymbol(GVar);
+ O << *getSymbol(GVar);
// Ptx allows variable initilization only for constant and global state
// spaces.
@@ -1226,15 +1251,15 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
bufferAggregateConstant(Initializer, &aggBuffer);
if (aggBuffer.numSymbols) {
if (nvptxSubtarget.is64Bit()) {
- O << " .u64 " << *Mang->getSymbol(GVar) << "[";
+ O << " .u64 " << *getSymbol(GVar) << "[";
O << ElementSize / 8;
} else {
- O << " .u32 " << *Mang->getSymbol(GVar) << "[";
+ O << " .u32 " << *getSymbol(GVar) << "[";
O << ElementSize / 4;
}
O << "]";
} else {
- O << " .b8 " << *Mang->getSymbol(GVar) << "[";
+ O << " .b8 " << *getSymbol(GVar) << "[";
O << ElementSize;
O << "]";
}
@@ -1242,7 +1267,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
aggBuffer.print();
O << "}";
} else {
- O << " .b8 " << *Mang->getSymbol(GVar);
+ O << " .b8 " << *getSymbol(GVar);
if (ElementSize) {
O << "[";
O << ElementSize;
@@ -1250,7 +1275,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
}
}
} else {
- O << " .b8 " << *Mang->getSymbol(GVar);
+ O << " .b8 " << *getSymbol(GVar);
if (ElementSize) {
O << "[";
O << ElementSize;
@@ -1357,7 +1382,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
O << " .";
O << getPTXFundamentalTypeStr(ETy);
O << " ";
- O << *Mang->getSymbol(GVar);
+ O << *getSymbol(GVar);
return;
}
@@ -1372,7 +1397,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
case Type::ArrayTyID:
case Type::VectorTyID:
ElementSize = TD->getTypeStoreSize(ETy);
- O << " .b8 " << *Mang->getSymbol(GVar) << "[";
+ O << " .b8 " << *getSymbol(GVar) << "[";
if (ElementSize) {
O << itostr(ElementSize);
}
@@ -1427,7 +1452,7 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
int paramIndex, raw_ostream &O) {
if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
(nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
- O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex;
+ O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
else {
std::string argName = I->getName();
const char *p = argName.c_str();
@@ -1486,13 +1511,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (llvm::isImage(*I)) {
std::string sname = I->getName();
if (llvm::isImageWriteOnly(*I))
- O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_"
+ O << "\t.param .surfref " << *getSymbol(F) << "_param_"
<< paramIndex;
else // Default image is read_only
- O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_"
+ O << "\t.param .texref " << *getSymbol(F) << "_param_"
<< paramIndex;
} else // Should be llvm::isSampler(*I)
- O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_"
+ O << "\t.param .samplerref " << *getSymbol(F) << "_param_"
<< paramIndex;
continue;
}
@@ -1739,13 +1764,13 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
return;
}
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
- O << *Mang->getSymbol(GVar);
+ O << *getSymbol(GVar);
return;
}
if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
const Value *v = Cexpr->stripPointerCasts();
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
- O << *Mang->getSymbol(GVar);
+ O << *getSymbol(GVar);
return;
} else {
O << *LowerConstant(CPV, *this);
@@ -1863,7 +1888,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::VectorTyID:
case Type::StructTyID: {
if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
- isa<ConstantStruct>(CPV)) {
+ isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
int ElementSize = TD->getTypeAllocSize(CPV->getType());
bufferAggregateConstant(CPV, aggBuffer);
if (Bytes > ElementSize)
@@ -1993,6 +2018,116 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
return false;
}
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ case 'r':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+
+ return false;
+}
+
+bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
+ const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
+
+ O << '[';
+ printMemOperand(MI, OpNo, O);
+ O << ']';
+
+ return false;
+}
+
+void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (MO.getReg() == NVPTX::VRDepot)
+ O << DEPOTNAME << getFunctionNumber();
+ else
+ O << NVPTXInstPrinter::getRegisterName(MO.getReg());
+ } else {
+ emitVirtualRegister(MO.getReg(), O);
+ }
+ return;
+
+ case MachineOperand::MO_Immediate:
+ if (!Modifier)
+ O << MO.getImm();
+ else if (strstr(Modifier, "vec") == Modifier)
+ printVecModifiedImmediate(MO, Modifier, O);
+ else
+ llvm_unreachable(
+ "Don't know how to handle modifier on immediate operand");
+ return;
+
+ case MachineOperand::MO_FPImmediate:
+ printFPConstant(MO.getFPImm(), O);
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol: {
+ const char *symbname = MO.getSymbolName();
+ if (strstr(symbname, ".PARAM") == symbname) {
+ unsigned index;
+ sscanf(symbname + 6, "%u[];", &index);
+ printParamName(index, O);
+ } else if (strstr(symbname, ".HLPPARAM") == symbname) {
+ unsigned index;
+ sscanf(symbname + 9, "%u[];", &index);
+ O << *CurrentFnSym << "_param_" << index << "_offset";
+ } else
+ O << symbname;
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ default:
+ llvm_unreachable("Operand type not supported.");
+ }
+}
+
+void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, opNum, O);
+
+ if (Modifier && !strcmp(Modifier, "add")) {
+ O << ", ";
+ printOperand(MI, opNum + 1, O);
+ } else {
+ if (MI->getOperand(opNum + 1).isImm() &&
+ MI->getOperand(opNum + 1).getImm() == 0)
+ return; // don't print ',0' or '+0'
+ O << "+";
+ printOperand(MI, opNum + 1, O);
+ }
+}
+
+
// Force static initialization.
extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 27bfa54..3abe5d1 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -155,7 +155,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
if (pos == nextSymbolPos) {
const Value *v = Symbols[nSym];
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
- MCSymbol *Name = AP.Mang->getSymbol(GVar);
+ MCSymbol *Name = AP.getSymbol(GVar);
O << *Name;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
O << *nvptx::LowerConstant(Cexpr, AP);
@@ -188,6 +188,7 @@ private:
void EmitFunctionEntryLabel();
void EmitFunctionBodyStart();
void EmitFunctionBodyEnd();
+ void emitImplicitDef(const MachineInstr *MI) const;
void EmitInstruction(const MachineInstr *);
void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
@@ -213,7 +214,7 @@ private:
void emitGlobals(const Module &M);
void emitHeader(Module &M, raw_ostream &O);
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
- void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
+ void emitVirtualRegister(unsigned int vr, raw_ostream &);
void emitFunctionExternParamList(const MachineFunction &MF);
void emitFunctionParamList(const Function *, raw_ostream &O);
void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
@@ -222,7 +223,14 @@ private:
bool isImageType(const Type *Ty);
void printReturnValStr(const Function *, raw_ostream &O);
void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
-
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier = 0);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &);
protected:
bool doInitialization(Module &M);
bool doFinalization(Module &M);
@@ -287,7 +295,7 @@ public:
bool ignoreLoc(const MachineInstr &);
- virtual void getVirtualRegisterName(unsigned, bool, raw_ostream &);
+ std::string getVirtualRegisterName(unsigned) const;
DebugLoc prevDebugLoc;
void emitLineNumberAsDotLoc(const MachineInstr &);
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 9f92a5b..9fb0dd8 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -142,7 +142,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
GlobalVariable *GV = I->first;
GlobalVariable *NewGV = I->second;
++I;
- Constant *BitCastNewGV = ConstantExpr::getBitCast(NewGV, GV->getType());
+ Constant *BitCastNewGV = ConstantExpr::getPointerCast(NewGV, GV->getType());
// At this point, the remaining uses of GV should be found only in global
// variable initializers, as other uses have been already been removed
// while walking through the instructions in function definitions.
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index ba85e35..4b8b306 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -26,24 +26,24 @@
using namespace llvm;
static cl::opt<int>
-FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
+FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
" 1: do it 2: do it aggressively"),
cl::init(2));
static cl::opt<int> UsePrecDivF32(
- "nvptx-prec-divf32", cl::ZeroOrMore,
+ "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
" IEEE Compliant F32 div.rnd if avaiable."),
cl::init(2));
static cl::opt<bool>
-UsePrecSqrtF32("nvptx-prec-sqrtf32",
+UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
cl::init(true));
static cl::opt<bool>
-FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore,
+FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
cl::init(false));
@@ -118,8 +118,10 @@ bool NVPTXDAGToDAGISel::useF32FTZ() const {
/// expanded, promoted and normal instructions.
SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
- if (N->isMachineOpcode())
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
return NULL; // Already selected.
+ }
SDNode *ResNode = NULL;
switch (N->getOpcode()) {
@@ -249,7 +251,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
SDValue Addr;
SDValue Offset, Base;
unsigned Opcode;
- MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
+ MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
if (SelectDirectAddr(N1, Addr)) {
switch (TargetVT) {
@@ -1347,8 +1349,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
SDValue Addr;
SDValue Offset, Base;
unsigned Opcode;
- MVT::SimpleValueType SourceVT =
- N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
+ MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
if (SelectDirectAddr(N2, Addr)) {
switch (SourceVT) {
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 828242d..6a8be75 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -310,6 +310,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::CallSeqBegin";
case NVPTXISD::CallSeqEnd:
return "NVPTXISD::CallSeqEnd";
+ case NVPTXISD::CallPrototype:
+ return "NVPTXISD::CallPrototype";
case NVPTXISD::LoadV2:
return "NVPTXISD::LoadV2";
case NVPTXISD::LoadV4:
@@ -471,22 +473,47 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
Type *Ty,
unsigned Idx) const {
const DataLayout *TD = getDataLayout();
- unsigned align = 0;
- GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
+ unsigned Align = 0;
+ const Value *DirectCallee = CS->getCalledFunction();
+
+ if (!DirectCallee) {
+ // We don't have a direct function symbol, but that may be because of
+ // constant cast instructions in the call.
+ const Instruction *CalleeI = CS->getInstruction();
+ assert(CalleeI && "Call target is not a function or derived value?");
+
+ // With bitcast'd call targets, the instruction will be the call
+ if (isa<CallInst>(CalleeI)) {
+ // Check if we have call alignment metadata
+ if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align))
+ return Align;
+
+ const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
+ // Ignore any bitcast instructions
+ while(isa<ConstantExpr>(CalleeV)) {
+ const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
+ if (!CE->isCast())
+ break;
+ // Look through the bitcast
+ CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0);
+ }
- if (Func) { // direct call
- assert(CS->getCalledFunction() &&
- "direct call cannot find callee");
- if (!llvm::getAlign(*(CS->getCalledFunction()), Idx, align))
- align = TD->getABITypeAlignment(Ty);
- }
- else { // indirect call
- const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
- if (!llvm::getAlign(*CallI, Idx, align))
- align = TD->getABITypeAlignment(Ty);
+ // We have now looked past all of the bitcasts. Do we finally have a
+ // Function?
+ if (isa<Function>(CalleeV))
+ DirectCallee = CalleeV;
+ }
}
- return align;
+ // Check for function alignment information if we found that the
+ // ultimate target is a Function
+ if (DirectCallee)
+ if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align))
+ return Align;
+
+ // Call is indirect or alignment information is not available, fall back to
+ // the ABI type alignment
+ return TD->getABITypeAlignment(Ty);
}
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
@@ -860,18 +887,16 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
// to be emitted, and the label has to used as the last arg of call
// instruction.
- // The prototype is embedded in a string and put as the operand for an
- // INLINEASM SDNode.
- SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string proto_string =
- getPrototype(retTy, Args, Outs, retAlignment, CS);
- const char *asmstr = nvTM->getManagedStrPool()
- ->getManagedString(proto_string.c_str())->c_str();
- SDValue InlineAsmOps[] = {
- Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()),
- DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag
+ // The prototype is embedded in a string and put as the operand for a
+ // CallPrototype SDNode which will print out to the value of the string.
+ SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS);
+ const char *ProtoStr =
+ nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
+ SDValue ProtoOps[] = {
+ Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,
};
- Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
+ Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, &ProtoOps[0], 3);
InFlag = Chain.getValue(1);
}
// Op to just print "call"
@@ -1595,7 +1620,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
}
Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
}
- InsIdx += VecSize;
+ InsIdx += NumElts;
}
if (NumElts > 0)
@@ -2263,3 +2288,29 @@ void NVPTXTargetLowering::ReplaceNodeResults(
return;
}
}
+
+// Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file.
+void NVPTXSection::anchor() {}
+
+NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
+ delete TextSection;
+ delete DataSection;
+ delete BSSSection;
+ delete ReadOnlySection;
+
+ delete StaticCtorSection;
+ delete StaticDtorSection;
+ delete LSDASection;
+ delete EHFrameSection;
+ delete DwarfAbbrevSection;
+ delete DwarfInfoSection;
+ delete DwarfLineSection;
+ delete DwarfFrameSection;
+ delete DwarfPubTypesSection;
+ delete DwarfDebugInlineSection;
+ delete DwarfStrSection;
+ delete DwarfLocSection;
+ delete DwarfARangesSection;
+ delete DwarfRangesSection;
+ delete DwarfMacroInfoSection;
+}
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 3418437..66e708f 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -49,6 +49,7 @@ enum NodeType {
RETURN,
CallSeqBegin,
CallSeqEnd,
+ CallPrototype,
Dummy,
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index b406aa9..86ddd38 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -14,17 +14,19 @@
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
#include "NVPTXTargetMachine.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "NVPTXGenInstrInfo.inc"
#include "llvm/IR/Function.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include <cstdio>
using namespace llvm;
+// Pin the vtable to this file.
+void NVPTXInstrInfo::anchor() {}
+
// FIXME: Add the subtarget support on this constructor.
NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
: NVPTXGenInstrInfo(), TM(tm), RegInfo(*TM.getSubtargetImpl()) {}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index b1972e9..600fc5c 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -26,6 +26,7 @@ namespace llvm {
class NVPTXInstrInfo : public NVPTXGenInstrInfo {
NVPTXTargetMachine &TM;
const NVPTXRegisterInfo RegInfo;
+ virtual void anchor();
public:
explicit NVPTXInstrInfo(NVPTXTargetMachine &TM);
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 3e430bf..b23f1e4 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2607,6 +2607,20 @@ def trapinst : NVPTXInst<(outs), (ins),
"trap;",
[(trap)]>;
+// Call prototype wrapper
+def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def CallPrototype
+ : SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def ProtoIdent : Operand<i32> {
+ let PrintMethod = "printProtoIdent";
+}
+def CALL_PROTOTYPE
+ : NVPTXInst<(outs), (ins ProtoIdent:$ident),
+ "$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
+
+
+
include "NVPTXIntrinsics.td"
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index e57ace9..f8a692e 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -24,10 +24,10 @@ namespace llvm {
/// the ASMPrint interface.
///
class NVPTXSection : public MCSection {
-
+ virtual void anchor();
public:
NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {}
- ~NVPTXSection() {}
+ virtual ~NVPTXSection() {}
/// Override this as NVPTX has its own way of printing switching
/// to a section.
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
index 83dfe12..b64c308 100644
--- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
@@ -36,7 +36,7 @@ bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
BasicBlock::iterator II = IB;
BasicBlock::iterator IE = BI->end();
- // Skit the first intruction. No splitting is needed at this
+ // Skit the first instruction. No splitting is needed at this
// point even if this is a bar.
while (II != IE) {
if (IntrinsicInst *inst = dyn_cast<IntrinsicInst>(II)) {
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index c4d0d6e..9771a17 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -20,6 +20,9 @@
using namespace llvm;
+// Pin the vtable to this file.
+void NVPTXSubtarget::anchor() {}
+
NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit)
: NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index 670077d..004be11 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -25,7 +25,7 @@
namespace llvm {
class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
-
+ virtual void anchor();
std::string TargetName;
NVPTX::DrvInterface drvInterface;
bool Is64Bit;
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 72afe8d..46edd6d 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -57,9 +57,6 @@ extern "C" void LLVMInitializeNVPTXTarget() {
RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
- RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32);
- RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64);
-
// FIXME: This pass is really intended to be invoked during IR optimization,
// but it's very NVPTX-specific.
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
@@ -129,6 +126,7 @@ void NVPTXPassConfig::addIRPasses() {
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineCopyPropagationID);
disablePass(&BranchFolderPassID);
+ disablePass(&TailDuplicateID);
TargetPassConfig::addIRPasses();
addPass(createGenericToNVVMPass());
@@ -154,10 +152,30 @@ FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
assert(!RegAllocPass && "NVPTX uses no regalloc!");
- addPass(&StrongPHIEliminationID);
+ addPass(&PHIEliminationID);
+ addPass(&TwoAddressInstructionPassID);
}
void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
assert(!RegAllocPass && "NVPTX uses no regalloc!");
- addPass(&StrongPHIEliminationID);
+
+ addPass(&ProcessImplicitDefsID);
+ addPass(&LiveVariablesID);
+ addPass(&MachineLoopInfoID);
+ addPass(&PHIEliminationID);
+
+ addPass(&TwoAddressInstructionPassID);
+ addPass(&RegisterCoalescerID);
+
+ // PreRA instruction scheduling.
+ if (addPass(&MachineSchedulerID))
+ printAndVerify("After Machine Scheduling");
+
+
+ addPass(&StackSlotColoringID);
+
+ // FIXME: Needs physical registers
+ //addPass(&PostRAMachineLICMID);
+
+ printAndVerify("After StackSlotColoring");
}
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index bfd6ab1..2a7394b 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -44,30 +44,10 @@ public:
DwarfMacroInfoSection = 0;
}
- ~NVPTXTargetObjectFile() {
- delete TextSection;
- delete DataSection;
- delete BSSSection;
- delete ReadOnlySection;
-
- delete StaticCtorSection;
- delete StaticDtorSection;
- delete LSDASection;
- delete EHFrameSection;
- delete DwarfAbbrevSection;
- delete DwarfInfoSection;
- delete DwarfLineSection;
- delete DwarfFrameSection;
- delete DwarfPubTypesSection;
- delete DwarfDebugInlineSection;
- delete DwarfStrSection;
- delete DwarfLocSection;
- delete DwarfARangesSection;
- delete DwarfRangesSection;
- delete DwarfMacroInfoSection;
- }
+ virtual ~NVPTXTargetObjectFile();
virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFile::Initialize(ctx, TM);
TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
DataSection =
new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index 3cc324b..7406207 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -79,7 +79,7 @@ ModulePass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping) {
}
static cl::opt<bool>
-NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true),
+NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
cl::desc("NVVM reflection, enabled by default"));
char NVVMReflect::ID = 0;
@@ -88,7 +88,7 @@ INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
false)
static cl::list<std::string>
-ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"),
+ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"), cl::Hidden,
cl::desc("A list of string=num assignments"),
cl::ValueRequired);
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index a8f7509..fe83fe1 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
@@ -174,6 +175,7 @@ struct PPCOperand;
class PPCAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ const MCInstrInfo &MII;
bool IsPPC64;
MCAsmParser &getParser() const { return Parser; }
@@ -218,8 +220,9 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
- PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &_MII)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -235,6 +238,10 @@ public:
virtual bool ParseDirective(AsmToken DirectiveID);
unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind);
+
+ virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind,
+ MCContext &Ctx);
};
/// PPCOperand - Instances of this class represent a parsed PowerPC machine
@@ -900,19 +907,19 @@ MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
RegNo = PPC::VRSAVE;
IntVal = 256;
return false;
- } else if (Name.substr(0, 1).equals_lower("r") &&
+ } else if (Name.startswith_lower("r") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal];
return false;
- } else if (Name.substr(0, 1).equals_lower("f") &&
+ } else if (Name.startswith_lower("f") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = FRegs[IntVal];
return false;
- } else if (Name.substr(0, 1).equals_lower("v") &&
+ } else if (Name.startswith_lower("v") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = VRegs[IntVal];
return false;
- } else if (Name.substr(0, 2).equals_lower("cr") &&
+ } else if (Name.startswith_lower("cr") &&
!Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
RegNo = CRRegs[IntVal];
return false;
@@ -1353,6 +1360,8 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
switch (Kind) {
case MCK_0: ImmVal = 0; break;
case MCK_1: ImmVal = 1; break;
+ case MCK_2: ImmVal = 2; break;
+ case MCK_3: ImmVal = 3; break;
default: return Match_InvalidOperand;
}
@@ -1363,3 +1372,26 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
return Match_InvalidOperand;
}
+const MCExpr *
+PPCAsmParser::applyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant,
+ MCContext &Ctx) {
+ switch (Variant) {
+ case MCSymbolRefExpr::VK_PPC_LO:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_LO, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HI:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HI, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HA:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HA, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHER:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHERA:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHEST:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHESTA:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx);
+ default:
+ return 0;
+ }
+}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 08d7665..8281b5c 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -18,9 +18,17 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
+// FIXME: Once the integrated assembler supports full register names, tie this
+// to the verbose-asm setting.
+static cl::opt<bool>
+FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
+ cl::desc("Use full register names when printing assembly"));
+
#include "PPCGenAsmWriter.inc"
void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
@@ -78,6 +86,17 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
}
}
+ // For fast-isel, a COPY_TO_REGCLASS may survive this long. This is
+ // used when converting a 32-bit float to a 64-bit float as part of
+ // conversion to an integer (see PPCFastISel.cpp:SelectFPToI()),
+ // as otherwise we have problems with incorrect register classes
+ // in machine instruction verification. For now, just avoid trying
+ // to print it as such an instruction has no effect (a 32-bit float
+ // in a register is already in 64-bit form, just with lower
+ // precision). FIXME: Is there a better solution?
+ if (MI->getOpcode() == TargetOpcode::COPY_TO_REGCLASS)
+ return;
+
printInstruction(MI, O);
printAnnotation(O, Annot);
}
@@ -285,6 +304,9 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
/// stripRegisterPrefix - This method strips the character prefix from a
/// register name so that only the number is left. Used by for linux asm.
static const char *stripRegisterPrefix(const char *RegName) {
+ if (FullRegNames)
+ return RegName;
+
switch (RegName[0]) {
case 'r':
case 'f':
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
index 45be471..3efa5ec 100644
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMPowerPCDesc
PPCMCCodeEmitter.cpp
PPCMCExpr.cpp
PPCPredicates.cpp
+ PPCMachObjectWriter.cpp
PPCELFObjectWriter.cpp
)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index b2a8701..0d42081 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -16,9 +16,9 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -69,19 +69,6 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
}
namespace {
-class PPCMachObjectWriter : public MCMachObjectTargetWriter {
-public:
- PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
- uint32_t CPUSubtype)
- : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
-
- void RecordRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue) {
- llvm_unreachable("Relocation emission for MachO/PPC unimplemented!");
- }
-};
class PPCAsmBackend : public MCAsmBackend {
const Target &TheTarget;
@@ -145,14 +132,17 @@ public:
}
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
- // Can't emit NOP with size not multiple of 32-bits
- if (Count % 4 != 0)
- return false;
-
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
OW->Write32(0x60000000);
+ switch (Count % 4) {
+ default: break; // No leftover bytes to write
+ case 1: OW->Write8(0); break;
+ case 2: OW->Write16(0); break;
+ case 3: OW->Write16(0); OW->Write8(0); break;
+ }
+
return true;
}
@@ -174,12 +164,11 @@ namespace {
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
- return createMachObjectWriter(new PPCMachObjectWriter(
- /*Is64Bit=*/is64,
- (is64 ? object::mach::CTM_PowerPC64 :
- object::mach::CTM_PowerPC),
- object::mach::CSPPC_ALL),
- OS, /*IsLittleEndian=*/false);
+ return createPPCMachObjectWriter(
+ OS,
+ /*Is64Bit=*/is64,
+ (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
+ MachO::CPU_SUBTYPE_POWERPC_ALL);
}
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -206,10 +195,9 @@ namespace {
} // end anonymous namespace
-
-
-
-MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
if (Triple(TT).isOSDarwin())
return new DarwinPPCAsmBackend(T);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 6822507..f3dddce 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -22,7 +22,6 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
}
IsLittleEndian = false;
- PCSymbol = ".";
CommentString = ";";
ExceptionsType = ExceptionHandling::DwarfCFI;
@@ -47,15 +46,14 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
CommentString = "#";
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
- WeakRefDirective = "\t.weak\t";
-
+
// Uses '.section' before '.bss' directive
UsesELFSectionDirectiveForBSS = true;
// Debug Information
SupportsDebugInformation = true;
- PCSymbol = ".";
+ DollarIsPC = true;
// Set up DWARF directives
HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 7b4ed9f..1530e77 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -15,6 +15,7 @@
#define PPCTARGETASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
@@ -24,7 +25,7 @@ namespace llvm {
explicit PPCMCAsmInfoDarwin(bool is64Bit);
};
- class PPCLinuxMCAsmInfo : public MCAsmInfo {
+ class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit PPCLinuxMCAsmInfo(bool is64Bit);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 59ba9c4..346a9be 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
@@ -76,11 +77,17 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const;
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
+ // For fast-isel, a float COPY_TO_REGCLASS can survive this long.
+ // It's just a nop to keep the register classes happy, so don't
+ // generate anything.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == TargetOpcode::COPY_TO_REGCLASS)
+ return;
+
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
// BL8_NOP etc. all have a size of 8 because of the following 'nop'.
unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
- unsigned Opcode = MI.getOpcode();
if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP ||
Opcode == PPC::BL8_NOP_TLS)
Size = 8;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 9529267..d7e8402 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -54,7 +54,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const {
MCValue Value;
- if (!getSubExpr()->EvaluateAsRelocatable(Value, *Layout))
+ if (!Layout || !getSubExpr()->EvaluateAsRelocatable(Value, *Layout))
return false;
if (Value.isAbsolute()) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 5f7a39a..f18d095 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -14,13 +14,16 @@
#include "PPCMCTargetDesc.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "PPCMCAsmInfo.h"
+#include "PPCTargetStreamer.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
@@ -34,6 +37,9 @@
using namespace llvm;
+// Pin the vtable to this file.
+PPCTargetStreamer::~PPCTargetStreamer() {}
+
static MCInstrInfo *createPPCMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitPPCMCInstrInfo(X);
@@ -101,6 +107,29 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
+namespace {
+class PPCTargetAsmStreamer : public PPCTargetStreamer {
+ formatted_raw_ostream &OS;
+
+public:
+ PPCTargetAsmStreamer(formatted_raw_ostream &OS) : OS(OS) {}
+ virtual void emitTCEntry(const MCSymbol &S) {
+ OS << "\t.tc ";
+ OS << S.getName();
+ OS << "[TC],";
+ OS << S.getName();
+ OS << '\n';
+ }
+};
+
+class PPCTargetELFStreamer : public PPCTargetStreamer {
+ virtual void emitTCEntry(const MCSymbol &S) {
+ // Creates a R_PPC64_TOC relocation
+ Streamer->EmitSymbolValue(&S, 8);
+ }
+};
+}
+
// This is duplicated code. Refactor this.
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
@@ -111,7 +140,20 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
if (Triple(TT).isOSDarwin())
return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ PPCTargetStreamer *S = new PPCTargetELFStreamer();
+ return createELFStreamer(Ctx, S, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+static MCStreamer *
+createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory, MCInstPrinter *InstPrint,
+ MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) {
+ PPCTargetStreamer *S = new PPCTargetAsmStreamer(OS);
+
+ return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI,
+ useDwarfDirectory, InstPrint, CE, TAB,
+ ShowInst);
}
static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
@@ -171,6 +213,11 @@ extern "C" void LLVMInitializePowerPCTargetMC() {
TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer);
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer);
+
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 9f29132..0b0ca24 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -40,12 +40,17 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU);
+MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
bool Is64Bit,
uint8_t OSABI);
+/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer.
+MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
} // End llvm namespace
// Generated files will use "namespace PPC". To avoid symbol clash,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
new file mode 100644
index 0000000..bbafe2e
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -0,0 +1,389 @@
+//===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MachO.h"
+
+using namespace llvm;
+
+namespace {
+class PPCMachObjectWriter : public MCMachObjectTargetWriter {
+ bool RecordScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ unsigned Log2Size, uint64_t &FixedValue);
+
+ void RecordPPCRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+
+public:
+ PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+
+ void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ const MCAsmLayout &Layout, const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ if (Writer->is64Bit()) {
+ report_fatal_error("Relocation emission for MachO/PPC64 unimplemented.");
+ } else
+ RecordPPCRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ }
+};
+}
+
+/// computes the log2 of the size of the relocation,
+/// used for relocation_info::r_length.
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+ switch (Kind) {
+ default:
+ report_fatal_error("log2size(FixupKind): Unhandled fixup kind!");
+ case FK_PCRel_1:
+ case FK_Data_1:
+ return 0;
+ case FK_PCRel_2:
+ case FK_Data_2:
+ return 1;
+ case FK_PCRel_4:
+ case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_half16:
+ case PPC::fixup_ppc_br24:
+ case FK_Data_4:
+ return 2;
+ case FK_PCRel_8:
+ case FK_Data_8:
+ return 3;
+ }
+ return 0;
+}
+
+/// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum.
+/// Outline based on PPCELFObjectWriter::getRelocTypeInner().
+static unsigned getRelocType(const MCValue &Target,
+ const MCFixupKind FixupKind, // from
+ // Fixup.getKind()
+ const bool IsPCRel) {
+ const MCSymbolRefExpr::VariantKind Modifier =
+ Target.isAbsolute() ? MCSymbolRefExpr::VK_None
+ : Target.getSymA()->getKind();
+ // determine the type of the relocation
+ unsigned Type = MachO::GENERIC_RELOC_VANILLA;
+ if (IsPCRel) { // relative to PC
+ switch ((unsigned)FixupKind) {
+ default:
+ report_fatal_error("Unimplemented fixup kind (relative)");
+ case PPC::fixup_ppc_br24:
+ Type = MachO::PPC_RELOC_BR24; // R_PPC_REL24
+ break;
+ case PPC::fixup_ppc_brcond14:
+ Type = MachO::PPC_RELOC_BR14;
+ break;
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported modifier for half16 fixup");
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = MachO::PPC_RELOC_HA16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = MachO::PPC_RELOC_LO16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = MachO::PPC_RELOC_HI16;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch ((unsigned)FixupKind) {
+ default:
+ report_fatal_error("Unimplemented fixup kind (absolute)!");
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported modifier for half16 fixup");
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = MachO::PPC_RELOC_HA16_SECTDIFF;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = MachO::PPC_RELOC_LO16_SECTDIFF;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = MachO::PPC_RELOC_HI16_SECTDIFF;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ break;
+ case FK_Data_2:
+ break;
+ }
+ }
+ return Type;
+}
+
+static void makeRelocationInfo(MachO::any_relocation_info &MRE,
+ const uint32_t FixupOffset, const uint32_t Index,
+ const unsigned IsPCRel, const unsigned Log2Size,
+ const unsigned IsExtern, const unsigned Type) {
+ MRE.r_word0 = FixupOffset;
+ // The bitfield offsets that work (as determined by trial-and-error)
+ // are different than what is documented in the mach-o manuals.
+ // This appears to be an endianness issue; reversing the order of the
+ // documented bitfields in <llvm/Support/MachO.h> fixes this (but
+ // breaks x86/ARM assembly).
+ MRE.r_word1 = ((Index << 8) | // was << 0
+ (IsPCRel << 7) | // was << 24
+ (Log2Size << 5) | // was << 25
+ (IsExtern << 4) | // was << 27
+ (Type << 0)); // was << 28
+}
+
+static void
+makeScatteredRelocationInfo(MachO::any_relocation_info &MRE,
+ const uint32_t Addr, const unsigned Type,
+ const unsigned Log2Size, const unsigned IsPCRel,
+ const uint32_t Value2) {
+ // For notes on bitfield positions and endianness, see:
+ // https://developer.apple.com/library/mac/documentation/developertools/conceptual/MachORuntime/Reference/reference.html#//apple_ref/doc/uid/20001298-scattered_relocation_entry
+ MRE.r_word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) |
+ (IsPCRel << 30) | MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
+}
+
+/// Compute fixup offset (address).
+static uint32_t getFixupOffset(const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ // On Mach-O, ppc_fixup_half16 relocations must refer to the
+ // start of the instruction, not the second halfword, as ELF does
+ if (unsigned(Fixup.getKind()) == PPC::fixup_ppc_half16)
+ FixupOffset &= ~uint32_t(3);
+ return FixupOffset;
+}
+
+/// \return false if falling back to using non-scattered relocation,
+/// otherwise true for normal scattered relocation.
+/// based on X86MachObjectWriter::RecordScatteredRelocation
+/// and ARMMachObjectWriter::RecordScatteredRelocation
+bool PPCMachObjectWriter::RecordScatteredRelocation(
+ MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
+ unsigned Log2Size, uint64_t &FixedValue) {
+ // caller already computes these, can we just pass and reuse?
+ const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
+ const MCFixupKind FK = Fixup.getKind();
+ const unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
+ const unsigned Type = getRelocType(Target, FK, IsPCRel);
+
+ // Is this a local or SECTDIFF relocation entry?
+ // SECTDIFF relocation entries have symbol subtractions,
+ // and require two entries, the first for the add-symbol value,
+ // the second for the subtract-symbol value.
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint64_t SecAddr =
+ Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // FIXME: is Type correct? see include/llvm/Support/MachO.h
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+ // FIXME: does FixedValue get used??
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == MachO::PPC_RELOC_SECTDIFF ||
+ Type == MachO::PPC_RELOC_HI16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LO16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_HA16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LO14_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LOCAL_SECTDIFF) {
+ // X86 had this piece, but ARM does not
+ // If the offset is too large to fit in a scattered relocation,
+ // we're hosed. It's an unfortunate limitation of the MachO format.
+ if (FixupOffset > 0xffffff) {
+ char Buffer[32];
+ format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ Twine("Section too large, can't encode "
+ "r_address (") +
+ Buffer + ") into 24 bits of scattered "
+ "relocation entry.");
+ llvm_unreachable("fatal error returned?!");
+ }
+
+ // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()?
+ // see PPCMCExpr::EvaluateAsRelocatableImpl()
+ uint32_t other_half = 0;
+ switch (Type) {
+ case MachO::PPC_RELOC_LO16_SECTDIFF:
+ other_half = (FixedValue >> 16) & 0xffff;
+ // applyFixupOffset longer extracts the high part because it now assumes
+ // this was already done.
+ // It looks like this is not true for the FixedValue needed with Mach-O
+ // relocs.
+ // So we need to adjust FixedValue again here.
+ FixedValue &= 0xffff;
+ break;
+ case MachO::PPC_RELOC_HA16_SECTDIFF:
+ other_half = FixedValue & 0xffff;
+ FixedValue =
+ ((FixedValue >> 16) + ((FixedValue & 0x8000) ? 1 : 0)) & 0xffff;
+ break;
+ case MachO::PPC_RELOC_HI16_SECTDIFF:
+ other_half = FixedValue & 0xffff;
+ FixedValue = (FixedValue >> 16) & 0xffff;
+ break;
+ default:
+ llvm_unreachable("Invalid PPC scattered relocation type.");
+ break;
+ }
+
+ MachO::any_relocation_info MRE;
+ makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,
+ Log2Size, IsPCRel, Value2);
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ } else {
+ // If the offset is more than 24-bits, it won't fit in a scattered
+ // relocation offset field, so we fall back to using a non-scattered
+ // relocation. This is a bit risky, as if the offset reaches out of
+ // the block and the linker is doing scattered loading on this
+ // symbol, things can go badly.
+ //
+ // Required for 'as' compatibility.
+ if (FixupOffset > 0xffffff)
+ return false;
+ }
+ MachO::any_relocation_info MRE;
+ makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value);
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ return true;
+}
+
+// see PPCELFObjectWriter for a general outline of cases
+void PPCMachObjectWriter::RecordPPCRelocation(
+ MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ const MCFixupKind FK = Fixup.getKind(); // unsigned
+ const unsigned Log2Size = getFixupKindLog2Size(FK);
+ const bool IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
+ const unsigned RelocType = getRelocType(Target, FK, IsPCRel);
+
+ // If this is a difference or a defined symbol plus an offset, then we need a
+ // scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB() &&
+ // Q: are branch targets ever scattered?
+ RelocType != MachO::PPC_RELOC_BR24 &&
+ RelocType != MachO::PPC_RELOC_BR14) {
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ Log2Size, FixedValue);
+ return;
+ }
+
+ // this doesn't seem right for RIT_PPC_BR24
+ // Get the symbol data, if any.
+ MCSymbolData *SD = 0;
+ if (Target.getSymA())
+ SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+ // See <reloc.h>.
+ const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = RelocType;
+
+ if (Target.isAbsolute()) { // constant
+ // SymbolNum of 0 indicates the absolute section.
+ //
+ // FIXME: Currently, these are never generated (see code below). I cannot
+ // find a case where they are actually emitted.
+ report_fatal_error("FIXME: relocations to absolute targets "
+ "not yet implemented");
+ // the above line stolen from ARM, not sure
+ } else {
+ // Resolve constant variables.
+ if (SD->getSymbol().isVariable()) {
+ int64_t Res;
+ if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ Res, Layout, Writer->getSectionAddressMap())) {
+ FixedValue = Res;
+ return;
+ }
+ }
+
+ // Check whether we need an external or internal relocation.
+ if (Writer->doesSymbolRequireExternRelocation(SD)) {
+ IsExtern = 1;
+ Index = SD->getIndex();
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!SD->Symbol->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(SD);
+ } else {
+ // The index is the section ordinal (1-based).
+ const MCSectionData &SymSD =
+ Asm.getSectionData(SD->getSymbol().getSection());
+ Index = SymSD.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&SymSD);
+ }
+ if (IsPCRel)
+ FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+ }
+
+ // struct relocation_info (8 bytes)
+ MachO::any_relocation_info MRE;
+ makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern,
+ Type);
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(
+ new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS,
+ /*IsLittleEndian=*/false);
+}
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 806822c..54e3d40 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -57,6 +57,8 @@ def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
"Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
+def FeatureFCPSGN : SubtargetFeature<"fcpsgn", "HasFCPSGN", "true",
+ "Enable the fcpsgn instruction">;
def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true",
"Enable the fre instruction">;
def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true",
@@ -85,6 +87,13 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions">;
+def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
+ "Enable VSX instructions">;
+
+def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
+ "Treat mftb as deprecated">;
+def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
+ "Treat vector data stream cache control instructions as deprecated">;
// Note: Future features to add when support is extended to more
// recent ISA levels:
@@ -146,10 +155,10 @@ include "PPCInstrInfo.td"
def : Processor<"generic", G3Itineraries, [Directive32]>;
def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE]>;
+ FeatureBookE, DeprecatedMFTB]>;
def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE]>;
+ FeatureBookE, DeprecatedMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603,
@@ -185,29 +194,32 @@ def : ProcessorModel<"g5", G5Model,
[Directive970, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
FeatureFRES, FeatureFRSQRTE,
- Feature64Bit /*, Feature64BitRegs */]>;
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
[DirectiveE500mc, FeatureMFOCRF,
- FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
+ FeatureSTFIWX, FeatureBookE, FeatureISEL,
+ DeprecatedMFTB]>;
def : ProcessorModel<"e5500", PPCE5500Model,
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
- FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
+ FeatureSTFIWX, FeatureBookE, FeatureISEL,
+ DeprecatedMFTB]>;
def : ProcessorModel<"a2", PPCA2Model,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
- FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
- /*, Feature64BitRegs */]>;
+ /*, Feature64BitRegs */, DeprecatedMFTB]>;
def : ProcessorModel<"a2q", PPCA2Model,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
- FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
- /*, Feature64BitRegs */, FeatureQPX]>;
+ /*, Feature64BitRegs */, FeatureQPX, DeprecatedMFTB]>;
def : ProcessorModel<"pwr3", G5Model,
[DirectivePwr3, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
@@ -220,32 +232,37 @@ def : ProcessorModel<"pwr5", G5Model,
[DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureSTFIWX, Feature64Bit]>;
+ FeatureSTFIWX, Feature64Bit,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr5x", G5Model,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureSTFIWX, FeatureFPRND, Feature64Bit]>;
+ FeatureSTFIWX, FeatureFPRND, Feature64Bit,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr6", G5Model,
[DirectivePwr6, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
+ FeatureFPRND, Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr6x", G5Model,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
- FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, Feature64Bit]>;
+ FeatureFPRND, Feature64Bit,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr7", G5Model,
[DirectivePwr7, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */]>;
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : ProcessorModel<"ppc64", G5Model,
[Directive64, FeatureAltivec,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index bbfad87..ada34ed 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -23,6 +23,7 @@
#include "MCTargetDesc/PPCMCExpr.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "PPCTargetStreamer.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -202,7 +203,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
.getGVStubEntry(SymToPrint);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
GV->hasAvailableExternallyLinkage()) {
SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
@@ -212,12 +213,12 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
getHiddenGVStubEntry(SymToPrint);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else {
- SymToPrint = Mang->getSymbol(GV);
+ SymToPrint = getSymbol(GV);
}
} else {
- SymToPrint = Mang->getSymbol(GV);
+ SymToPrint = getSymbol(GV);
}
O << *SymToPrint;
@@ -363,7 +364,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
MCSymbol *MOSymbol = 0;
if (MO.isGlobal())
- MOSymbol = Mang->getSymbol(MO.getGlobal());
+ MOSymbol = getSymbol(MO.getGlobal());
else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
@@ -402,7 +403,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
- MOSymbol = Mang->getSymbol(RealGValue);
+ MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
IsExternal = GVar && !GVar->hasInitializer();
IsCommon = GVar && RealGValue->hasCommonLinkage();
@@ -413,7 +414,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
- if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI())
+ if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI() ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
@@ -438,18 +440,22 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MO.isJTI())
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
- else if (MO.isCPI())
+ else if (MO.isCPI()) {
MOSymbol = GetCPISymbol(MO.getIndex());
+ if (TM.getCodeModel() == CodeModel::Large)
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
else if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
- MOSymbol = Mang->getSymbol(RealGValue);
+ MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
- RealGValue->hasAvailableExternallyLinkage())
+ RealGValue->hasAvailableExternallyLinkage() ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
}
@@ -479,14 +485,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
- MOSymbol = Mang->getSymbol(RealGValue);
+ MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
IsExternal = GVar && !GVar->hasInitializer();
IsFunction = !GVar;
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
- if (IsFunction || IsExternal)
+ if (IsFunction || IsExternal || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
@@ -502,7 +508,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
OutContext);
@@ -520,7 +526,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
OutContext);
@@ -534,7 +540,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
OutContext);
@@ -550,7 +556,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO,
OutContext);
@@ -571,7 +577,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
OutContext);
@@ -586,7 +592,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
OutContext);
@@ -602,7 +608,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO,
OutContext);
@@ -623,7 +629,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
OutContext);
@@ -638,7 +644,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
OutContext);
@@ -654,7 +660,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
OutContext);
@@ -704,6 +710,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
break;
case PPC::LD:
case PPC::STD:
+ case PPC::LWA_32:
case PPC::LWA: {
// Verify alignment is legal, so we don't create relocations
// that can't be supported.
@@ -765,6 +772,9 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
bool isPPC64 = TD->getPointerSizeInBits() == 64;
+ PPCTargetStreamer &TS =
+ static_cast<PPCTargetStreamer &>(OutStreamer.getTargetStreamer());
+
if (isPPC64 && !TOC.empty()) {
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
@@ -775,7 +785,7 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
E = TOC.end(); I != E; ++I) {
OutStreamer.EmitLabel(I->second);
MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
- OutStreamer.EmitTCEntry(*S);
+ TS.emitTCEntry(*S);
}
}
@@ -1051,7 +1061,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMIMacho.getGVStubEntry(NLPSym);
- StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true);
+ StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true);
}
}
}
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 4e30c537..4224ae2 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -253,12 +253,19 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
case Intrinsic::sin:
case Intrinsic::cos:
return true;
+ case Intrinsic::copysign:
+ if (CI->getArgOperand(0)->getType()->getScalarType()->
+ isPPC_FP128Ty())
+ return true;
+ else
+ continue; // ISD::FCOPYSIGN is never a library call.
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
case Intrinsic::rint: Opcode = ISD::FRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
}
}
@@ -283,8 +290,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
default: return true;
case LibFunc::copysign:
case LibFunc::copysignf:
- case LibFunc::copysignl:
continue; // ISD::FCOPYSIGN is never a library call.
+ case LibFunc::copysignl:
+ return true;
case LibFunc::fabs:
case LibFunc::fabsf:
case LibFunc::fabsl:
@@ -309,6 +317,10 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
case LibFunc::rintf:
case LibFunc::rintl:
Opcode = ISD::FRINT; break;
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ Opcode = ISD::FROUND; break;
case LibFunc::trunc:
case LibFunc::truncf:
case LibFunc::truncl:
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index a584188..e8e7f4c 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -37,6 +37,37 @@ def RetCC_PPC : CallingConv<[
]>;
+// Note that we don't currently have calling conventions for 64-bit
+// PowerPC, but handle all the complexities of the ABI in the lowering
+// logic. FIXME: See if the logic can be simplified with use of CCs.
+// This may require some extensions to current table generation.
+
+// Simple calling convention for 64-bit ELF PowerPC fast isel.
+// Only handle ints and floats. All ints are promoted to i64.
+// Vector types and quadword ints are not handled.
+def CC_PPC64_ELF_FIS : CallingConv<[
+ CCIfType<[i8], CCPromoteToType<i64>>,
+ CCIfType<[i16], CCPromoteToType<i64>>,
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>
+]>;
+
+// Simple return-value convention for 64-bit ELF PowerPC fast isel.
+// All small ints are promoted to i64. Vector types, quadword ints,
+// and multiple register returns are "supported" to avoid compile
+// errors, but none are handled by the fast selector.
+def RetCC_PPC64_ELF_FIS : CallingConv<[
+ CCIfType<[i8], CCPromoteToType<i64>>,
+ CCIfType<[i16], CCPromoteToType<i64>>,
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4]>>,
+ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+ CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+]>;
+
//===----------------------------------------------------------------------===//
// PowerPC System V Release 4 32-bit ABI
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 8cbf1fb..09117e7 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -37,6 +37,25 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+//===----------------------------------------------------------------------===//
+//
+// TBD:
+// FastLowerArguments: Handle simple cases.
+// PPCMaterializeGV: Handle TLS.
+// SelectCall: Handle function pointers.
+// SelectCall: Handle multi-register return values.
+// SelectCall: Optimize away nops for local calls.
+// processCallArgs: Handle bit-converted arguments.
+// finishCall: Handle multi-register return values.
+// PPCComputeAddress: Handle parameter references as FrameIndex's.
+// PPCEmitCmp: Handle immediate as operand 1.
+// SelectCall: Handle small byval arguments.
+// SelectIntrinsicCall: Implement.
+// SelectSelect: Implement.
+// Consider factoring isTypeLegal into the base class.
+// Implement switches and jump tables.
+//
+//===----------------------------------------------------------------------===//
using namespace llvm;
namespace {
@@ -52,7 +71,7 @@ typedef struct Address {
int FI;
} Base;
- int Offset;
+ long Offset;
// Innocuous defaults for our address.
Address()
@@ -89,15 +108,76 @@ class PPCFastISel : public FastISel {
virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI);
virtual bool FastLowerArguments();
+ virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm);
+ virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+
+ // Instruction selection routines.
+ private:
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectIndirectBr(const Instruction *I);
+ bool SelectCmp(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectIToFP(const Instruction *I, bool IsSigned);
+ bool SelectFPToI(const Instruction *I, bool IsSigned);
+ bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectCall(const Instruction *I);
+ bool SelectRet(const Instruction *I);
+ bool SelectTrunc(const Instruction *I);
+ bool SelectIntExt(const Instruction *I);
// Utility routines.
private:
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt, unsigned DestReg);
+ bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ const TargetRegisterClass *RC, bool IsZExt = true,
+ unsigned FP64LoadOpc = PPC::LFD);
+ bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
+ bool PPCComputeAddress(const Value *Obj, Address &Addr);
+ void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
+ unsigned &IndexReg);
+ bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg, bool IsZExt);
unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
unsigned PPCMaterializeInt(const Constant *C, MVT VT);
unsigned PPCMaterialize32BitInt(int64_t Imm,
const TargetRegisterClass *RC);
unsigned PPCMaterialize64BitInt(int64_t Imm,
const TargetRegisterClass *RC);
+ unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
+ unsigned SrcReg, bool IsSigned);
+ unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
+
+ // Call handling routines.
+ private:
+ bool processCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool IsVarArg);
+ void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool IsVarArg);
+ CCAssignFn *usePPC32CCs(unsigned Flag);
private:
#include "PPCGenFastISel.inc"
@@ -106,10 +186,1601 @@ class PPCFastISel : public FastISel {
} // end anonymous namespace
+#include "PPCGenCallingConv.inc"
+
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
+ if (Flag == 1)
+ return CC_PPC32_SVR4;
+ else if (Flag == 2)
+ return CC_PPC32_SVR4_ByVal;
+ else if (Flag == 3)
+ return CC_PPC32_SVR4_VarArg;
+ else
+ return RetCC_PPC;
+}
+
+static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // These are not representable with any single compare.
+ case CmpInst::FCMP_FALSE:
+ case CmpInst::FCMP_UEQ:
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_UGE:
+ case CmpInst::FCMP_ULT:
+ case CmpInst::FCMP_ULE:
+ case CmpInst::FCMP_UNE:
+ case CmpInst::FCMP_TRUE:
+ default:
+ return Optional<PPC::Predicate>();
+
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::ICMP_EQ:
+ return PPC::PRED_EQ;
+
+ case CmpInst::FCMP_OGT:
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_SGT:
+ return PPC::PRED_GT;
+
+ case CmpInst::FCMP_OGE:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGE:
+ return PPC::PRED_GE;
+
+ case CmpInst::FCMP_OLT:
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_SLT:
+ return PPC::PRED_LT;
+
+ case CmpInst::FCMP_OLE:
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SLE:
+ return PPC::PRED_LE;
+
+ case CmpInst::FCMP_ONE:
+ case CmpInst::ICMP_NE:
+ return PPC::PRED_NE;
+
+ case CmpInst::FCMP_ORD:
+ return PPC::PRED_NU;
+
+ case CmpInst::FCMP_UNO:
+ return PPC::PRED_UN;
+ }
+}
+
+// Determine whether the type Ty is simple enough to be handled by
+// fast-isel, and return its equivalent machine type in VT.
+// FIXME: Copied directly from ARM -- factor into base class?
+bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
+ EVT Evt = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (Evt == MVT::Other || !Evt.isSimple()) return false;
+ VT = Evt.getSimpleVT();
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+// Determine whether the type Ty is simple enough to be handled by
+// fast-isel as a load target, and return its equivalent machine type in VT.
+bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
+ return true;
+ }
+
+ return false;
+}
+
+// Given a value Obj, create an Address object Addr that represents its
+// address. Return false if we can't handle it.
+bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks unless the object is an alloca from
+ // another block, otherwise it may not have a virtual register assigned.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast:
+ // Look through bitcasts.
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::GetElementPtr: {
+ Address SavedAddr = Addr;
+ long TmpOffset = Addr.Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
+ II != IE; ++II, ++GTI) {
+ const Value *Op = *II;
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ break;
+ }
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ // Unsupported
+ goto unsupported_gep;
+ }
+ }
+ }
+
+ // Try to grab the base operand now.
+ Addr.Offset = TmpOffset;
+ if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
+
+ // We failed, restore everything and try the other options.
+ Addr = SavedAddr;
+
+ unsupported_gep:
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = SI->second;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // FIXME: References to parameters fall through to the behavior
+ // below. They should be able to reference a frame index since
+ // they are stored to the stack, so we can get "ld rx, offset(r1)"
+ // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
+ // just contain the parameter. Try to handle this with a FI.
+
+ // Try to get this in a register if nothing else has worked.
+ if (Addr.Base.Reg == 0)
+ Addr.Base.Reg = getRegForValue(Obj);
+
+ // Prevent assignment of base register to X0, which is inappropriate
+ // for loads and stores alike.
+ if (Addr.Base.Reg != 0)
+ MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ return Addr.Base.Reg != 0;
+}
+
+// Fix up some addresses that can't be used directly. For example, if
+// an offset won't fit in an instruction field, we may need to move it
+// into an index register.
+void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
+ unsigned &IndexReg) {
+
+ // Check whether the offset fits in the instruction field.
+ if (!isInt<16>(Addr.Offset))
+ UseOffset = false;
+
+ // If this is a stack pointer and the offset needs to be simplified then
+ // put the alloca address into a register, set the base type back to
+ // register and continue. This should almost never happen.
+ if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
+ unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
+ ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
+ Addr.Base.Reg = ResultReg;
+ Addr.BaseType = Address::RegBase;
+ }
+
+ if (!UseOffset) {
+ IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context)
+ : Type::getInt64Ty(*Context));
+ const ConstantInt *Offset =
+ ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
+ IndexReg = PPCMaterializeInt(Offset, MVT::i64);
+ assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
+ }
+}
+
+// Emit a load instruction if possible, returning true if we succeeded,
+// otherwise false. See commentary below for how the register class of
+// the load is determined.
+bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ const TargetRegisterClass *RC,
+ bool IsZExt, unsigned FP64LoadOpc) {
+ unsigned Opc;
+ bool UseOffset = true;
+
+ // If ResultReg is given, it determines the register class of the load.
+ // Otherwise, RC is the register class to use. If the result of the
+ // load isn't anticipated in this block, both may be zero, in which
+ // case we must make a conservative guess. In particular, don't assign
+ // R0 or X0 to the result register, as the result may be used in a load,
+ // store, add-immediate, or isel that won't permit this. (Though
+ // perhaps the spill and reload of live-exit values would handle this?)
+ const TargetRegisterClass *UseRC =
+ (ResultReg ? MRI.getRegClass(ResultReg) :
+ (RC ? RC :
+ (VT == MVT::f64 ? &PPC::F8RCRegClass :
+ (VT == MVT::f32 ? &PPC::F4RCRegClass :
+ (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
+ &PPC::GPRC_and_GPRC_NOR0RegClass)))));
+
+ bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ switch (VT.SimpleTy) {
+ default: // e.g., vector types not handled
+ return false;
+ case MVT::i8:
+ Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
+ break;
+ case MVT::i16:
+ Opc = (IsZExt ?
+ (Is32BitInt ? PPC::LHZ : PPC::LHZ8) :
+ (Is32BitInt ? PPC::LHA : PPC::LHA8));
+ break;
+ case MVT::i32:
+ Opc = (IsZExt ?
+ (Is32BitInt ? PPC::LWZ : PPC::LWZ8) :
+ (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
+ if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
+ UseOffset = false;
+ break;
+ case MVT::i64:
+ Opc = PPC::LD;
+ assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
+ "64-bit load with 32-bit target??");
+ UseOffset = ((Addr.Offset & 3) == 0);
+ break;
+ case MVT::f32:
+ Opc = PPC::LFS;
+ break;
+ case MVT::f64:
+ Opc = FP64LoadOpc;
+ break;
+ }
+
+ // If necessary, materialize the offset into a register and use
+ // the indexed form. Also handle stack pointers with special needs.
+ unsigned IndexReg = 0;
+ PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
+ if (ResultReg == 0)
+ ResultReg = createResultReg(UseRC);
+
+ // Note: If we still have a frame index here, we know the offset is
+ // in range, as otherwise PPCSimplifyAddress would have converted it
+ // into a RegBase.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
+ MFI.getObjectAlignment(Addr.Base.FI));
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
+
+ // Base reg with offset in range.
+ } else if (UseOffset) {
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addImm(Addr.Offset).addReg(Addr.Base.Reg);
+
+ // Indexed form.
+ } else {
+ // Get the RR opcode corresponding to the RI one. FIXME: It would be
+ // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
+ // is hard to get at.
+ switch (Opc) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case PPC::LBZ: Opc = PPC::LBZX; break;
+ case PPC::LBZ8: Opc = PPC::LBZX8; break;
+ case PPC::LHZ: Opc = PPC::LHZX; break;
+ case PPC::LHZ8: Opc = PPC::LHZX8; break;
+ case PPC::LHA: Opc = PPC::LHAX; break;
+ case PPC::LHA8: Opc = PPC::LHAX8; break;
+ case PPC::LWZ: Opc = PPC::LWZX; break;
+ case PPC::LWZ8: Opc = PPC::LWZX8; break;
+ case PPC::LWA: Opc = PPC::LWAX; break;
+ case PPC::LWA_32: Opc = PPC::LWAX_32; break;
+ case PPC::LD: Opc = PPC::LDX; break;
+ case PPC::LFS: Opc = PPC::LFSX; break;
+ case PPC::LFD: Opc = PPC::LFDX; break;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(Addr.Base.Reg).addReg(IndexReg);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select a load instruction.
+bool PPCFastISel::SelectLoad(const Instruction *I) {
+ // FIXME: No atomic loads are supported.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(I->getOperand(0), Addr))
+ return false;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class. This is necessary
+ // to constrain RA from using R0/X0 when this is not legal.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
+
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
+ return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Emit a store instruction to store SrcReg at Addr.
+bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
+ assert(SrcReg && "Nothing to store!");
+ unsigned Opc;
+ bool UseOffset = true;
+
+ const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
+ bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ switch (VT.SimpleTy) {
+ default: // e.g., vector types not handled
+ return false;
+ case MVT::i8:
+ Opc = Is32BitInt ? PPC::STB : PPC::STB8;
+ break;
+ case MVT::i16:
+ Opc = Is32BitInt ? PPC::STH : PPC::STH8;
+ break;
+ case MVT::i32:
+ assert(Is32BitInt && "Not GPRC for i32??");
+ Opc = PPC::STW;
+ break;
+ case MVT::i64:
+ Opc = PPC::STD;
+ UseOffset = ((Addr.Offset & 3) == 0);
+ break;
+ case MVT::f32:
+ Opc = PPC::STFS;
+ break;
+ case MVT::f64:
+ Opc = PPC::STFD;
+ break;
+ }
+
+ // If necessary, materialize the offset into a register and use
+ // the indexed form. Also handle stack pointers with special needs.
+ unsigned IndexReg = 0;
+ PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
+
+ // Note: If we still have a frame index here, we know the offset is
+ // in range, as otherwise PPCSimplifyAddress would have converted it
+ // into a RegBase.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+ MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
+ MFI.getObjectAlignment(Addr.Base.FI));
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)).addReg(SrcReg)
+ .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
+
+ // Base reg with offset in range.
+ } else if (UseOffset)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
+
+ // Indexed form.
+ else {
+ // Get the RR opcode corresponding to the RI one. FIXME: It would be
+ // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
+ // is hard to get at.
+ switch (Opc) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case PPC::STB: Opc = PPC::STBX; break;
+ case PPC::STH : Opc = PPC::STHX; break;
+ case PPC::STW : Opc = PPC::STWX; break;
+ case PPC::STB8: Opc = PPC::STBX8; break;
+ case PPC::STH8: Opc = PPC::STHX8; break;
+ case PPC::STW8: Opc = PPC::STWX8; break;
+ case PPC::STD: Opc = PPC::STDX; break;
+ case PPC::STFS: Opc = PPC::STFSX; break;
+ case PPC::STFD: Opc = PPC::STFDX; break;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select a store instruction.
+bool PPCFastISel::SelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // FIXME: No atomics loads are supported.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(Op0->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0)
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(I->getOperand(1), Addr))
+ return false;
+
+ if (!PPCEmitStore(VT, SrcReg, Addr))
+ return false;
+
+ return true;
+}
+
+// Attempt to fast-select a branch instruction.
+bool PPCFastISel::SelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *BrBB = FuncInfo.MBB;
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // For now, just try the simplest case where it's fed by a compare.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
+ if (!OptPPCPred)
+ return false;
+
+ PPC::Predicate PPCPred = OptPPCPred.getValue();
+
+ // Take advantage of fall-through opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ PPCPred = PPC::InvertPredicate(PPCPred);
+ }
+
+ unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
+
+ if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CondReg))
+ return false;
+
+ BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC))
+ .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+
+ } else if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(BI->getCondition())) {
+ uint64_t Imm = CI->getZExtValue();
+ MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
+ FastEmitBranch(Target, DL);
+ return true;
+ }
+
+ // FIXME: ARM looks for a case where the block containing the compare
+ // has been split from the block containing the branch. If this happens,
+ // there is a vreg available containing the result of the compare. I'm
+ // not sure we can do much, as we've lost the predicate information with
+ // the compare instruction -- we have a 4-bit CR but don't know which bit
+ // to test here.
+ return false;
+}
+
+// Attempt to emit a compare of the two source values. Signed and unsigned
+// comparisons are supported. Return false if we can't handle it.
+bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
+ bool IsZExt, unsigned DestReg) {
+ Type *Ty = SrcValue1->getType();
+ EVT SrcEVT = TLI.getValueType(Ty, true);
+ if (!SrcEVT.isSimple())
+ return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ // See if operand 2 is an immediate encodeable in the compare.
+ // FIXME: Operands are not in canonical order at -O0, so an immediate
+ // operand in position 1 is a lost opportunity for now. We are
+ // similar to ARM in this regard.
+ long Imm = 0;
+ bool UseImm = false;
+
+ // Only 16-bit integer constants can be represented in compares for
+ // PowerPC. Others will be materialized into a register.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
+ if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i8 || SrcVT == MVT::i1) {
+ const APInt &CIVal = ConstInt->getValue();
+ Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
+ if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
+ UseImm = true;
+ }
+ }
+
+ unsigned CmpOpc;
+ bool NeedsExt = false;
+ switch (SrcVT.SimpleTy) {
+ default: return false;
+ case MVT::f32:
+ CmpOpc = PPC::FCMPUS;
+ break;
+ case MVT::f64:
+ CmpOpc = PPC::FCMPUD;
+ break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ NeedsExt = true;
+ // Intentional fall-through.
+ case MVT::i32:
+ if (!UseImm)
+ CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
+ else
+ CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
+ break;
+ case MVT::i64:
+ if (!UseImm)
+ CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
+ else
+ CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
+ break;
+ }
+
+ unsigned SrcReg1 = getRegForValue(SrcValue1);
+ if (SrcReg1 == 0)
+ return false;
+
+ unsigned SrcReg2 = 0;
+ if (!UseImm) {
+ SrcReg2 = getRegForValue(SrcValue2);
+ if (SrcReg2 == 0)
+ return false;
+ }
+
+ if (NeedsExt) {
+ unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
+ return false;
+ SrcReg1 = ExtReg;
+
+ if (!UseImm) {
+ unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
+ return false;
+ SrcReg2 = ExtReg;
+ }
+ }
+
+ if (!UseImm)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
+ .addReg(SrcReg1).addReg(SrcReg2);
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
+ .addReg(SrcReg1).addImm(Imm);
+
+ return true;
+}
+
+// Attempt to fast-select a floating-point extend instruction.
+bool PPCFastISel::SelectFPExt(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f32 || DestVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // No code is generated for a FP extend.
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+// Attempt to fast-select a floating-point truncate instruction.
+bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f64 || DestVT != MVT::f32)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // Round the result to single precision.
+ unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg)
+ .addReg(SrcReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
+// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
+// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
+// case to 8 bytes which produces tighter code but wastes stack space.
+unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
+ bool IsSigned) {
+
+ // If necessary, extend 32-bit int to 64-bit.
+ if (SrcVT == MVT::i32) {
+ unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
+ return 0;
+ SrcReg = TmpReg;
+ }
+
+ // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+ Address Addr;
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+ // Store the value from the GPR.
+ if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
+ return 0;
+
+ // Load the integer value into an FPR. The kind of load used depends
+ // on a number of conditions.
+ unsigned LoadOpc = PPC::LFD;
+
+ if (SrcVT == MVT::i32) {
+ Addr.Offset = 4;
+ if (!IsSigned)
+ LoadOpc = PPC::LFIWZX;
+ else if (PPCSubTarget.hasLFIWAX())
+ LoadOpc = PPC::LFIWAX;
+ }
+
+ const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
+ return 0;
+
+ return ResultReg;
+}
+
+// Attempt to fast-select an integer-to-floating-point conversion.
+bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
+ MVT DstVT;
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::f32 && DstVT != MVT::f64)
+ return false;
+
+ Value *Src = I->getOperand(0);
+ EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ if (!SrcEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
+ SrcVT != MVT::i32 && SrcVT != MVT::i64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // We can only lower an unsigned convert if we have the newer
+ // floating-point conversion operations.
+ if (!IsSigned && !PPCSubTarget.hasFPCVT())
+ return false;
+
+ // FIXME: For now we require the newer floating-point conversion operations
+ // (which are present only on P7 and A2 server models) when converting
+ // to single-precision float. Otherwise we have to generate a lot of
+ // fiddly code to avoid double rounding. If necessary, the fiddly code
+ // can be found in PPCTargetLowering::LowerINT_TO_FP().
+ if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ return false;
+
+ // Extend the input if necessary.
+ if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
+ unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
+ return false;
+ SrcVT = MVT::i64;
+ SrcReg = TmpReg;
+ }
+
+ // Move the integer value to an FPR.
+ unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
+ if (FPReg == 0)
+ return false;
+
+ // Determine the opcode for the conversion.
+ const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned Opc;
+
+ if (DstVT == MVT::f32)
+ Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
+ else
+ Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
+
+ // Generate the convert.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(FPReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+// Move the floating-point value in SrcReg into an integer destination
+// register, and return the register (or zero if we can't handle it).
+// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
+ unsigned SrcReg, bool IsSigned) {
+ // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+ // Note that if have STFIWX available, we could use a 4-byte stack
+ // slot for i32, but this being fast-isel we'll just go with the
+ // easiest code gen possible.
+ Address Addr;
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+ // Store the value from the FPR.
+ if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
+ return 0;
+
+ // Reload it into a GPR. If we want an i32, modify the address
+ // to have a 4-byte offset so we load from the right place.
+ if (VT == MVT::i32)
+ Addr.Offset = 4;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
+
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
+ return 0;
+
+ return ResultReg;
+}
+
+// Attempt to fast-select a floating-point-to-integer conversion.
+bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
+ MVT DstVT, SrcVT;
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::i32 && DstVT != MVT::i64)
+ return false;
+
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+ if (!isTypeLegal(SrcTy, SrcVT))
+ return false;
+
+ if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // Convert f32 to f64 if necessary. This is just a meaningless copy
+ // to get the register class right. COPY_TO_REGCLASS is needed since
+ // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream.
+ const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
+ if (InRC == &PPC::F4RCRegClass) {
+ unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
+ .addReg(SrcReg).addImm(PPC::F8RCRegClassID);
+ SrcReg = TmpReg;
+ }
+
+ // Determine the opcode for the conversion, which takes place
+ // entirely within FPRs.
+ unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
+ unsigned Opc;
+
+ if (DstVT == MVT::i32)
+ if (IsSigned)
+ Opc = PPC::FCTIWZ;
+ else
+ Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
+ else
+ Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
+
+ // Generate the convert.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+
+ // Now move the integer value from a float register to an integer register.
+ unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
+ if (IntReg == 0)
+ return false;
+
+ UpdateValueMap(I, IntReg);
+ return true;
+}
+
+// Attempt to fast-select a binary integer operation that isn't already
+// handled automatically.
+bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ // We can get here in the case when we have a binary operation on a non-legal
+ // type and the target independent selector doesn't know how to handle it.
+ if (DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class. If there is no register,
+ // make a conservative choice (don't assign R0).
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ (AssignedReg ? MRI.getRegClass(AssignedReg) :
+ &PPC::GPRC_and_GPRC_NOR0RegClass);
+ bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ unsigned Opc;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::ADD:
+ Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
+ break;
+ case ISD::OR:
+ Opc = IsGPRC ? PPC::OR : PPC::OR8;
+ break;
+ case ISD::SUB:
+ Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
+ break;
+ }
+
+ unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
+ unsigned SrcReg1 = getRegForValue(I->getOperand(0));
+ if (SrcReg1 == 0) return false;
+
+ // Handle case of small immediate operand.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ const APInt &CIVal = ConstInt->getValue();
+ int Imm = (int)CIVal.getSExtValue();
+ bool UseImm = true;
+ if (isInt<16>(Imm)) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Missing case!");
+ case PPC::ADD4:
+ Opc = PPC::ADDI;
+ MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ break;
+ case PPC::ADD8:
+ Opc = PPC::ADDI8;
+ MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
+ break;
+ case PPC::OR:
+ Opc = PPC::ORI;
+ break;
+ case PPC::OR8:
+ Opc = PPC::ORI8;
+ break;
+ case PPC::SUBF:
+ if (Imm == -32768)
+ UseImm = false;
+ else {
+ Opc = PPC::ADDI;
+ MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ Imm = -Imm;
+ }
+ break;
+ case PPC::SUBF8:
+ if (Imm == -32768)
+ UseImm = false;
+ else {
+ Opc = PPC::ADDI8;
+ MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
+ Imm = -Imm;
+ }
+ break;
+ }
+
+ if (UseImm) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(SrcReg1).addImm(Imm);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+
+ // Reg-reg case.
+ unsigned SrcReg2 = getRegForValue(I->getOperand(1));
+ if (SrcReg2 == 0) return false;
+
+ // Reverse operands for subtract-from.
+ if (ISDOpcode == ISD::SUB)
+ std::swap(SrcReg1, SrcReg2);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(SrcReg1).addReg(SrcReg2);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Handle arguments to a call that we're attempting to fast-select.
+// Return false if the arguments are too complex for us at the moment.
+bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool IsVarArg) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
+
+ // Bail out if we can't handle any of the arguments.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Skip vector arguments for now, as well as long double and
+ // uint128_t, and anything that isn't passed in a register.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 ||
+ !VA.isRegLoc() || VA.needsCustom())
+ return false;
+
+ // Skip bit-converted arguments for now.
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ return false;
+ }
+
+ // Get a count of how many bytes are to be pushed onto the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TII.getCallFrameSetupOpcode()))
+ .addImm(NumBytes);
+
+ // Prepare to assign register arguments. Every argument uses up a
+ // GPR protocol register even if it's passed in a floating-point
+ // register.
+ unsigned NextGPR = PPC::X3;
+ unsigned NextFPR = PPC::F1;
+
+ // Process arguments.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Handle argument promotion and bitcasts.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt: {
+ MVT DestVT = VA.getLocVT();
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
+ llvm_unreachable("Failed to emit a sext!");
+ ArgVT = DestVT;
+ Arg = TmpReg;
+ break;
+ }
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt: {
+ MVT DestVT = VA.getLocVT();
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
+ llvm_unreachable("Failed to emit a zext!");
+ ArgVT = DestVT;
+ Arg = TmpReg;
+ break;
+ }
+ case CCValAssign::BCvt: {
+ // FIXME: Not yet handled.
+ llvm_unreachable("Should have bailed before getting here!");
+ break;
+ }
+ }
+
+ // Copy this argument to the appropriate register.
+ unsigned ArgReg;
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
+ ArgReg = NextFPR++;
+ ++NextGPR;
+ } else
+ ArgReg = NextGPR++;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ArgReg).addReg(Arg);
+ RegArgs.push_back(ArgReg);
+ }
+
+ return true;
+}
+
+// For a call that we've determined we can fast-select, finish the
+// call sequence and generate a copy to obtain the return value (if any).
+void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool IsVarArg) {
+ // Issue CallSEQ_END.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TII.getCallFrameDestroyOpcode()))
+ .addImm(NumBytes).addImm(0);
+
+ // Next, generate a copy to obtain the return value.
+ // FIXME: No multi-register return values yet, though I don't foresee
+ // any real difficulties there.
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
+ CCValAssign &VA = RVLocs[0];
+ assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ MVT DestVT = VA.getValVT();
+ MVT CopyVT = DestVT;
+
+ // Ints smaller than a register still arrive in a full 64-bit
+ // register, so make sure we recognize this.
+ if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
+ CopyVT = MVT::i64;
+
+ unsigned SourcePhysReg = VA.getLocReg();
+ unsigned ResultReg = 0;
+
+ if (RetVT == CopyVT) {
+ const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
+ ResultReg = createResultReg(CpyRC);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SourcePhysReg);
+
+ // If necessary, round the floating result to single precision.
+ } else if (CopyVT == MVT::f64) {
+ ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP),
+ ResultReg).addReg(SourcePhysReg);
+
+ // If only the low half of a general register is needed, generate
+ // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
+ // used along the fast-isel path (not lowered), and downstream logic
+ // also doesn't like a direct subreg copy on a physical reg.)
+ } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
+ ResultReg = createResultReg(&PPC::GPRCRegClass);
+ // Convert physical register from G8RC to GPRC.
+ SourcePhysReg -= PPC::X0 - PPC::R0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SourcePhysReg);
+ }
+
+ assert(ResultReg && "ResultReg unset!");
+ UsedRegs.push_back(SourcePhysReg);
+ UpdateValueMap(I, ResultReg);
+ }
+}
+
+// Attempt to fast-select a call instruction.
+bool PPCFastISel::SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm.
+ if (isa<InlineAsm>(Callee))
+ return false;
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (CI->isTailCall())
+ return false;
+
+ // Obtain calling convention.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ bool IsVarArg = FTy->isVarArg();
+
+ // Not ready for varargs yet.
+ if (IsVarArg)
+ return false;
+
+ // Handle simple calls for now, with legal return types and
+ // those that can be extended.
+ Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
+ RetVT != MVT::i8)
+ return false;
+
+ // FIXME: No multi-register return values yet.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
+ RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
+ RetVT != MVT::f64) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
+ if (RVLocs.size() > 1)
+ return false;
+ }
+
+ // Bail early if more than 8 arguments, as we only currently
+ // handle arguments passed in registers.
+ unsigned NumArgs = CS.arg_size();
+ if (NumArgs > 8)
+ return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+
+ Args.reserve(NumArgs);
+ ArgRegs.reserve(NumArgs);
+ ArgVTs.reserve(NumArgs);
+ ArgFlags.reserve(NumArgs);
+
+ for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end();
+ II != IE; ++II) {
+ // FIXME: ARM does something for intrinsic calls here, check into that.
+
+ unsigned AttrIdx = II - CS.arg_begin() + 1;
+
+ // Only handle easy calls for now. It would be reasonably easy
+ // to handle <= 8-byte structures passed ByVal in registers, but we
+ // have to ensure they are right-justified in the register.
+ if (CS.paramHasAttr(AttrIdx, Attribute::InReg) ||
+ CS.paramHasAttr(AttrIdx, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrIdx, Attribute::Nest) ||
+ CS.paramHasAttr(AttrIdx, Attribute::ByVal))
+ return false;
+
+ ISD::ArgFlagsTy Flags;
+ if (CS.paramHasAttr(AttrIdx, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrIdx, Attribute::ZExt))
+ Flags.setZExt();
+
+ Type *ArgTy = (*II)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
+ return false;
+
+ if (ArgVT.isVector())
+ return false;
+
+ unsigned Arg = getRegForValue(*II);
+ if (Arg == 0)
+ return false;
+
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(*II);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Process the arguments.
+ SmallVector<unsigned, 8> RegArgs;
+ unsigned NumBytes;
+
+ if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, IsVarArg))
+ return false;
+
+ // FIXME: No handling for function pointers yet. This requires
+ // implementing the function descriptor (OPD) setup.
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV)
+ return false;
+
+ // Build direct call with NOP for TOC restore.
+ // FIXME: We can and should optimize away the NOP for local calls.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(PPC::BL8_NOP));
+ // Add callee.
+ MIB.addGlobalAddress(GV);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
+ MIB.addReg(RegArgs[II], RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers. Proper
+ // defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg);
+
+ // Set all unused physregs defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+// Attempt to fast-select a return instruction.
+bool PPCFastISel::SelectRet(const Instruction *I) {
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+ CallingConv::ID CC = F.getCallingConv();
+
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
+ CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
+ const Value *RV = Ret->getOperand(0);
+
+ // FIXME: Only one output register for now.
+ if (ValLocs.size() > 1)
+ return false;
+
+ // Special case for returning a constant integer of any size.
+ // Materialize the constant as an i64 and copy it to the return
+ // register. This avoids an unnecessary extend or truncate.
+ if (isa<ConstantInt>(*RV)) {
+ const Constant *C = cast<Constant>(RV);
+ unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
+ unsigned RetReg = ValLocs[0].getLocReg();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ RetReg).addReg(SrcReg);
+ RetRegs.push_back(RetReg);
+
+ } else {
+ unsigned Reg = getRegForValue(RV);
+
+ if (Reg == 0)
+ return false;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i < ValLocs.size(); ++i) {
+
+ CCValAssign &VA = ValLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ RetRegs.push_back(VA.getLocReg());
+ unsigned SrcReg = Reg + VA.getValNo();
+
+ EVT RVEVT = TLI.getValueType(RV->getType());
+ if (!RVEVT.isSimple())
+ return false;
+ MVT RVVT = RVEVT.getSimpleVT();
+ MVT DestVT = VA.getLocVT();
+
+ if (RVVT != DestVT && RVVT != MVT::i8 &&
+ RVVT != MVT::i16 && RVVT != MVT::i32)
+ return false;
+
+ if (RVVT != DestVT) {
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ llvm_unreachable("Full value assign but types don't match?");
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt: {
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
+ return false;
+ SrcReg = TmpReg;
+ break;
+ }
+ case CCValAssign::SExt: {
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
+ return false;
+ SrcReg = TmpReg;
+ break;
+ }
+ }
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), RetRegs[i])
+ .addReg(SrcReg);
+ }
+ }
+ }
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(PPC::BLR));
+
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
+
+ return true;
+}
+
+// Attempt to emit an integer extend of SrcReg into DestReg. Both
+// signed and zero extensions are supported. Return false if we
+// can't handle it.
+bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg, bool IsZExt) {
+ if (DestVT != MVT::i32 && DestVT != MVT::i64)
+ return false;
+ if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
+ return false;
+
+ // Signed extensions use EXTSB, EXTSH, EXTSW.
+ if (!IsZExt) {
+ unsigned Opc;
+ if (SrcVT == MVT::i8)
+ Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
+ else if (SrcVT == MVT::i16)
+ Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
+ else {
+ assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
+ Opc = PPC::EXTSW_32_64;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+
+ // Unsigned 32-bit extensions use RLWINM.
+ } else if (DestVT == MVT::i32) {
+ unsigned MB;
+ if (SrcVT == MVT::i8)
+ MB = 24;
+ else {
+ assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
+ MB = 16;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM),
+ DestReg)
+ .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
+
+ // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
+ } else {
+ unsigned MB;
+ if (SrcVT == MVT::i8)
+ MB = 56;
+ else if (SrcVT == MVT::i16)
+ MB = 48;
+ else
+ MB = 32;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(PPC::RLDICL_32_64), DestReg)
+ .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select an indirect branch instruction.
+bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
+ unsigned AddrReg = getRegForValue(I->getOperand(0));
+ if (AddrReg == 0)
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8))
+ .addReg(AddrReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8));
+
+ const IndirectBrInst *IB = cast<IndirectBrInst>(I);
+ for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+
+ return true;
+}
+
+// Attempt to fast-select an integer truncate instruction.
+bool PPCFastISel::SelectTrunc(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
+ return false;
+
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // The only interesting case is when we need to switch register classes.
+ if (SrcVT == MVT::i64) {
+ unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(SrcReg, 0, PPC::sub_32);
+ SrcReg = ResultReg;
+ }
+
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+// Attempt to fast-select an integer extend instruction.
+bool PPCFastISel::SelectIntExt(const Instruction *I) {
+ Type *DestTy = I->getType();
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+
+ bool IsZExt = isa<ZExtInst>(I);
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg) return false;
+
+ EVT SrcEVT, DestEVT;
+ SrcEVT = TLI.getValueType(SrcTy, true);
+ DestEVT = TLI.getValueType(DestTy, true);
+ if (!SrcEVT.isSimple())
+ return false;
+ if (!DestEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DestVT = DestEVT.getSimpleVT();
+
+ // If we know the register class needed for the result of this
+ // instruction, use it. Otherwise pick the register class of the
+ // correct size that does not contain X0/R0, since we don't know
+ // whether downstream uses permit that assignment.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ (AssignedReg ? MRI.getRegClass(AssignedReg) :
+ (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
+ &PPC::GPRC_and_GPRC_NOR0RegClass));
+ unsigned ResultReg = createResultReg(RC);
+
+ if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
// Attempt to fast-select an instruction that wasn't handled by
-// the table-generated machinery. TBD.
+// the table-generated machinery.
bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
- return I && false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return SelectLoad(I);
+ case Instruction::Store:
+ return SelectStore(I);
+ case Instruction::Br:
+ return SelectBranch(I);
+ case Instruction::IndirectBr:
+ return SelectIndirectBr(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectIToFP(I, /*IsSigned*/ true);
+ case Instruction::UIToFP:
+ return SelectIToFP(I, /*IsSigned*/ false);
+ case Instruction::FPToSI:
+ return SelectFPToI(I, /*IsSigned*/ true);
+ case Instruction::FPToUI:
+ return SelectFPToI(I, /*IsSigned*/ false);
+ case Instruction::Add:
+ return SelectBinaryIntOp(I, ISD::ADD);
+ case Instruction::Or:
+ return SelectBinaryIntOp(I, ISD::OR);
+ case Instruction::Sub:
+ return SelectBinaryIntOp(I, ISD::SUB);
+ case Instruction::Call:
+ if (dyn_cast<IntrinsicInst>(I))
+ return false;
+ return SelectCall(I);
+ case Instruction::Ret:
+ return SelectRet(I);
+ case Instruction::Trunc:
+ return SelectTrunc(I);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return SelectIntExt(I);
+ // Here add other flavors of Instruction::XXX that automated
+ // cases don't catch. For example, switches are terminators
+ // that aren't yet handled.
+ default:
+ break;
+ }
+ return false;
}
// Materialize a floating-point constant into a register, and return
@@ -131,21 +1802,94 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
(VT == MVT::f32) ? 4 : 8, Align);
- // For small code model, generate a LDtocCPT.
- if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
+ unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
+ unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
+ if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT),
- DestReg)
- .addConstantPoolIndex(Idx).addReg(PPC::X2).addMemOperand(MMO);
- else {
+ TmpReg)
+ .addConstantPoolIndex(Idx).addReg(PPC::X2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addImm(0).addReg(TmpReg).addMemOperand(MMO);
+ } else {
// Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
- unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
- unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
- .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
- .addReg(TmpReg)
- .addMemOperand(MMO);
+ // But for large code model, we must generate a LDtocL followed
+ // by the LF[SD].
+ if (CModel == CodeModel::Large) {
+ unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
+ TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addImm(0).addReg(TmpReg2);
+ } else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
+ .addReg(TmpReg)
+ .addMemOperand(MMO);
+ }
+
+ return DestReg;
+}
+
+// Materialize the address of a global value into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
+ assert(VT == MVT::i64 && "Non-address!");
+ const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
+ unsigned DestReg = createResultReg(RC);
+
+ // Global values may be plain old object addresses, TLS object
+ // addresses, constant pool entries, or jump tables. How we generate
+ // code for these may depend on small, medium, or large code model.
+ CodeModel::Model CModel = TM.getCodeModel();
+
+ // FIXME: Jump tables are not yet required because fast-isel doesn't
+ // handle switches; if that changes, we need them as well. For now,
+ // what follows assumes everything's a generic (or TLS) global address.
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias, use the aliasee for determining thread-locality.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ }
+
+ // FIXME: We don't yet handle the complexity of TLS.
+ bool IsTLS = GVar && GVar->isThreadLocal();
+ if (IsTLS)
+ return 0;
+
+ // For small code model, generate a simple TOC load.
+ if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtoc), DestReg)
+ .addGlobalAddress(GV).addReg(PPC::X2);
+ else {
+ // If the address is an externally defined symbol, a symbol with
+ // common or externally available linkage, a function address, or a
+ // jump table address (not yet needed), or if we are generating code
+ // for large code model, we generate:
+ // LDtocL(GV, ADDIStocHA(%X2, GV))
+ // Otherwise we generate:
+ // ADDItocL(ADDIStocHA(%X2, GV), GV)
+ // Either way, start with the ADDIStocHA:
+ unsigned HighPartReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
+ HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
+
+ // !GVar implies a function address. An external variable is one
+ // without an initializer.
+ // If/when switches are implemented, jump tables should be handled
+ // on the "if" path here.
+ if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() ||
+ GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage())
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
+ DestReg).addGlobalAddress(GV).addReg(HighPartReg);
+ else
+ // Otherwise generate the ADDItocL.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDItocL),
+ DestReg).addReg(HighPartReg).addGlobalAddress(GV);
}
return DestReg;
@@ -283,23 +2027,112 @@ unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return PPCMaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return PPCMaterializeGV(GV, VT);
else if (isa<ConstantInt>(C))
return PPCMaterializeInt(C, VT);
- // TBD: Global values.
return 0;
}
// Materialize the address created by an alloca into a register, and
-// return the register number (or zero if we failed to handle it). TBD.
+// return the register number (or zero if we failed to handle it).
unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
- return AI && 0;
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ MVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
+ ResultReg).addFrameIndex(SI->second).addImm(0);
+ return ResultReg;
+ }
+
+ return 0;
}
-// Fold loads into extends when possible. TBD.
+// Fold loads into extends when possible.
+// FIXME: We can have multiple redundant extend/trunc instructions
+// following a load. The folding only picks up one. Extend this
+// to check subsequent instructions for the same pattern and remove
+// them. Thus ResultReg should be the def reg for the last redundant
+// instruction in a chain, and all intervening instructions can be
+// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
+// to add ELF64-NOT: rldicl to the appropriate tests when this works.
bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) {
- return MI && OpNo && LI && false;
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(LI->getType(), VT))
+ return false;
+
+ // Combine load followed by zero- or sign-extend.
+ bool IsZExt = false;
+ switch(MI->getOpcode()) {
+ default:
+ return false;
+
+ case PPC::RLDICL:
+ case PPC::RLDICL_32_64: {
+ IsZExt = true;
+ unsigned MB = MI->getOperand(3).getImm();
+ if ((VT == MVT::i8 && MB <= 56) ||
+ (VT == MVT::i16 && MB <= 48) ||
+ (VT == MVT::i32 && MB <= 32))
+ break;
+ return false;
+ }
+
+ case PPC::RLWINM:
+ case PPC::RLWINM8: {
+ IsZExt = true;
+ unsigned MB = MI->getOperand(3).getImm();
+ if ((VT == MVT::i8 && MB <= 24) ||
+ (VT == MVT::i16 && MB <= 16))
+ break;
+ return false;
+ }
+
+ case PPC::EXTSB:
+ case PPC::EXTSB8:
+ case PPC::EXTSB8_32_64:
+ /* There is no sign-extending load-byte instruction. */
+ return false;
+
+ case PPC::EXTSH:
+ case PPC::EXTSH8:
+ case PPC::EXTSH8_32_64: {
+ if (VT != MVT::i16 && VT != MVT::i8)
+ return false;
+ break;
+ }
+
+ case PPC::EXTSW:
+ case PPC::EXTSW_32_64: {
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
+ return false;
+ break;
+ }
+ }
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(LI->getOperand(0), Addr))
+ return false;
+
+ unsigned ResultReg = MI->getOperand(0).getReg();
+
+ if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt))
+ return false;
+
+ MI->eraseFromParent();
+ return true;
}
// Attempt to lower call arguments in a faster way than done by
@@ -312,6 +2145,81 @@ bool PPCFastISel::FastLowerArguments() {
return false;
}
+// Handle materializing integer constants into a register. This is not
+// automatically generated for PowerPC, so must be explicitly created here.
+unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
+
+ if (Opc != ISD::Constant)
+ return 0;
+
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+ VT != MVT::i8 && VT != MVT::i1)
+ return 0;
+
+ const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
+ &PPC::GPRCRegClass);
+ if (VT == MVT::i64)
+ return PPCMaterialize64BitInt(Imm, RC);
+ else
+ return PPCMaterialize32BitInt(Imm, RC);
+}
+
+// Override for ADDI and ADDI8 to set the correct register class
+// on RHS operand 0. The automatic infrastructure naively assumes
+// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
+// for these cases. At the moment, none of the other automatically
+// generated RI instructions require special treatment. However, once
+// SelectSelect is implemented, "isel" requires similar handling.
+//
+// Also be conservative about the output register class. Avoid
+// assigning R0 or X0 to the output register for GPRC and G8RC
+// register classes, as any such result could be used in ADDI, etc.,
+// where those regs have another meaning.
+unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ if (MachineInstOpcode == PPC::ADDI)
+ MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ else if (MachineInstOpcode == PPC::ADDI8)
+ MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC,
+ Op0, Op0IsKill, Imm);
+}
+
+// Override for instructions with one register operand to avoid use of
+// R0/X0. The automatic infrastructure isn't aware of the context so
+// we must be conservative.
+unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC,
+ unsigned Op0, bool Op0IsKill) {
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
+}
+
+// Override for instructions with two register operands to avoid use
+// of R0/X0. The automatic infrastructure isn't aware of the context
+// so we must be conservative.
+unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
+ Op1, Op1IsKill);
+}
+
namespace llvm {
// Create the fast instruction selector for PowerPC64 ELF.
FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 24d3a0b..0ac2ced 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -204,10 +204,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
unsigned FrameSize =
UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
- // Get the alignments provided by the target, and the maximum alignment
- // (if any) of the fixed frame objects.
- unsigned TargetAlign = getStackAlignment();
- unsigned MaxAlign = MFI->getMaxAlignment();
+ // Get stack alignments. The frame must be aligned to the greatest of these:
+ unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
+ unsigned MaxAlign = MFI->getMaxAlignment(); // algmt required by data in frame
unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
const PPCRegisterInfo *RegInfo =
@@ -346,12 +345,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
bool needsFrameMoves = MMI.hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry();
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get the ABI.
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+ assert((isDarwinABI || isSVR4ABI) &&
+ "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
+
// Prepare for frame info.
MCSymbol *FrameLabel = 0;
// Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
// process it.
- if (!Subtarget.isSVR4ABI())
+ if (!isSVR4ABI)
for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
HandleVRSaveUpdate(MBBI, TII);
@@ -371,23 +378,52 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
if (MFI->isFrameAddressTaken())
replaceFPWithRealFP(MF);
- // Get processor type.
- bool isPPC64 = Subtarget.isPPC64();
- // Get operating system
- bool isDarwinABI = Subtarget.isDarwinABI();
// Check if the link register (LR) must be saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
- // Do we have a frame pointer for this function?
+ // Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
bool HasBP = RegInfo->hasBasePointer(MF);
+ unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30;
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
+ unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
+ const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
+ : PPC::MFLR );
+ const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
+ : PPC::STW );
+ const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
+ : PPC::STWU );
+ const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
+ : PPC::STWUX);
+ const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
+ : PPC::LIS );
+ const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
+ : PPC::ORI );
+ const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
+ : PPC::OR );
+ const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
+ : PPC::SUBFC);
+ const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
+ : PPC::SUBFIC);
+
+ // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
+ // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
+ // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
+ // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
+ assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
+ "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
+
int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
int FPOffset = 0;
if (HasFP) {
- if (Subtarget.isSVR4ABI()) {
+ if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
int FPIndex = FI->getFramePointerSaveIndex();
assert(FPIndex && "No Frame Pointer Save Slot!");
@@ -399,7 +435,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
int BPOffset = 0;
if (HasBP) {
- if (Subtarget.isSVR4ABI()) {
+ if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
int BPIndex = FI->getBasePointerSaveIndex();
assert(BPIndex && "No Base Pointer Save Slot!");
@@ -410,181 +446,116 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
- if (isPPC64) {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
+ // Get stack alignments.
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ if (HasBP && MaxAlign > 1)
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+
+ // Frames of 32KB & larger require special handling because they cannot be
+ // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
+ bool isLargeFrame = !isInt<16>(NegFrameSize);
- if (!MustSaveCRs.empty()) {
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), PPC::X12);
- for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
- }
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
- .addReg(PPC::X31)
- .addImm(FPOffset)
- .addReg(PPC::X1);
-
- if (HasBP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
- .addReg(PPC::X30)
- .addImm(BPOffset)
- .addReg(PPC::X1);
-
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
- .addReg(PPC::X0)
- .addImm(LROffset)
- .addReg(PPC::X1);
-
- if (!MustSaveCRs.empty())
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
- .addReg(PPC::X12, getKillRegState(true))
- .addImm(8)
- .addReg(PPC::X1);
- } else {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
-
- if (HasFP)
- // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative
- // offsets of R1 is not allowed.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
- .addReg(PPC::R31)
- .addImm(FPOffset)
- .addReg(PPC::R1);
-
- if (HasBP)
- // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative
- // offsets of R1 is not allowed.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
- .addReg(PPC::R30)
- .addImm(BPOffset)
- .addReg(PPC::R1);
-
- assert(MustSaveCRs.empty() &&
- "Prologue CR saving supported only in 64-bit mode");
-
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
- .addReg(PPC::R0)
- .addImm(LROffset)
- .addReg(PPC::R1);
+ assert((isPPC64 || MustSaveCRs.empty()) &&
+ "Prologue CR saving supported only in 64-bit mode");
+
+ if (!MustSaveCRs.empty()) { // will only occur for PPC64
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
+ for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+ MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
}
- // Skip if a leaf routine.
+ if (HasFP)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(FPReg)
+ .addImm(FPOffset)
+ .addReg(SPReg);
+
+ if (HasBP)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(BPReg)
+ .addImm(BPOffset)
+ .addReg(SPReg);
+
+ if (MustSaveLR)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(ScratchReg)
+ .addImm(LROffset)
+ .addReg(SPReg);
+
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
+ .addReg(TempReg, getKillRegState(true))
+ .addImm(8)
+ .addReg(SPReg);
+
+ // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
if (!FrameSize) return;
- // Get stack alignments.
- unsigned MaxAlign = MFI->getMaxAlignment();
-
// Adjust stack pointer: r1 += NegFrameSize.
// If there is a preferred stack alignment, align R1 now
- if (!isPPC64) {
- // PPC32.
-
- if (HasBP) {
- // Save a copy of r1 as the base pointer.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R30)
- .addReg(PPC::R1)
- .addReg(PPC::R1);
- }
- if (HasBP && MaxAlign > 1) {
- assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
- "Invalid alignment!");
+ if (HasBP) {
+ // Save a copy of r1 as the base pointer.
+ BuildMI(MBB, MBBI, dl, OrInst, BPReg)
+ .addReg(SPReg)
+ .addReg(SPReg);
+ }
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
- .addReg(PPC::R1)
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(64 - Log2_32(MaxAlign));
+ else // PPC32...
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(SPReg)
.addImm(0)
.addImm(32 - Log2_32(MaxAlign))
.addImm(31);
- if (isInt<16>(NegFrameSize)) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC), PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
- .addImm(NegFrameSize);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R12)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R12)
- .addReg(PPC::R12, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC), PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
- .addReg(PPC::R12, RegState::Kill);
- }
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
- .addReg(PPC::R1, RegState::Kill)
- .addReg(PPC::R1)
- .addReg(PPC::R0);
- } else if (isInt<16>(NegFrameSize)) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
- .addReg(PPC::R1)
- .addImm(NegFrameSize)
- .addReg(PPC::R1);
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize);
} else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
.addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
+ BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
+ .addReg(TempReg, RegState::Kill)
.addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
- .addReg(PPC::R1, RegState::Kill)
- .addReg(PPC::R1)
- .addReg(PPC::R0);
- }
- } else { // PPC64.
- if (HasBP) {
- // Save a copy of r1 as the base pointer.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X30)
- .addReg(PPC::X1)
- .addReg(PPC::X1);
+ BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(TempReg, RegState::Kill);
}
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
- if (HasBP && MaxAlign > 1) {
- assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
- "Invalid alignment!");
+ } else if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+ .addReg(SPReg)
+ .addImm(NegFrameSize)
+ .addReg(SPReg);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
- .addReg(PPC::X1)
- .addImm(0)
- .addImm(64 - Log2_32(MaxAlign));
- if (isInt<16>(NegFrameSize)) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
- .addReg(PPC::X0, RegState::Kill)
- .addImm(NegFrameSize);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X12)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X12)
- .addReg(PPC::X12, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC8), PPC::X0)
- .addReg(PPC::X0, RegState::Kill)
- .addReg(PPC::X12, RegState::Kill);
- }
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(PPC::X1, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(PPC::X0);
- } else if (isInt<16>(NegFrameSize)) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
- .addReg(PPC::X1)
- .addImm(NegFrameSize)
- .addReg(PPC::X1);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
- .addReg(PPC::X0, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(PPC::X1, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(PPC::X0);
- }
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
}
// Add the "machine moves" for the instructions we generated above, but in
@@ -600,22 +571,19 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MCCFIInstruction::createDefCfaOffset(FrameLabel, NegFrameSize));
if (HasFP) {
- unsigned Reg = isPPC64 ? PPC::X31 : PPC::R31;
- Reg = MRI->getDwarfRegNum(Reg, true);
+ unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
MMI.addFrameInst(
MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset));
}
if (HasBP) {
- unsigned Reg = isPPC64 ? PPC::X30 : PPC::R30;
- Reg = MRI->getDwarfRegNum(Reg, true);
+ unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
MMI.addFrameInst(
MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset));
}
if (MustSaveLR) {
- unsigned Reg = isPPC64 ? PPC::LR8 : PPC::LR;
- Reg = MRI->getDwarfRegNum(Reg, true);
+ unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
MMI.addFrameInst(
MCCFIInstruction::createOffset(FrameLabel, Reg, LROffset));
}
@@ -625,15 +593,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// If there is a frame pointer, copy R1 into R31
if (HasFP) {
- if (!isPPC64) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
- .addReg(PPC::R1)
- .addReg(PPC::R1);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
- .addReg(PPC::X1)
- .addReg(PPC::X1);
- }
+ BuildMI(MBB, MBBI, dl, OrInst, FPReg)
+ .addReg(SPReg)
+ .addReg(SPReg);
if (needsFrameMoves) {
ReadyLabel = MMI.getContext().CreateTempSymbol();
@@ -641,9 +603,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// Mark effective beginning of when frame pointer is ready.
BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
- unsigned Reg = HasFP ? (isPPC64 ? PPC::X31 : PPC::R31)
- : (isPPC64 ? PPC::X1 : PPC::R1);
- Reg = MRI->getDwarfRegNum(Reg, true);
+ unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg));
}
}
@@ -664,19 +624,16 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// For SVR4, don't emit a move for the CR spill slot if we haven't
// spilled CRs.
- if (Subtarget.isSVR4ABI()
- && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
- && MustSaveCRs.empty())
- continue;
+ if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
+ && MustSaveCRs.empty())
+ continue;
// For 64-bit SVR4 when we have spilled CRs, the spill location
// is SP+8, not a frame-relative slot.
- if (Subtarget.isSVR4ABI()
- && Subtarget.isPPC64()
- && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
MMI.addFrameInst(MCCFIInstruction::createOffset(
Label, MRI->getDwarfRegNum(PPC::CR2, true), 8));
- continue;
+ continue;
}
int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
@@ -707,7 +664,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
RetOpcode == PPC::TCRETURNai8) &&
"Can only insert epilog into returning blocks");
- // Get alignment info so we know how to restore r1
+ // Get alignment info so we know how to restore the SP.
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Get the number of bytes allocated from the FrameInfo.
@@ -715,21 +672,41 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
- // Get operating system
+ // Get the ABI.
bool isDarwinABI = Subtarget.isDarwinABI();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
- // Do we have a frame pointer for this function?
+ // Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
bool HasBP = RegInfo->hasBasePointer(MF);
+ unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30;
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
+ : PPC::MTLR );
+ const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
+ : PPC::LWZ );
+ const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
+ : PPC::LIS );
+ const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
+ : PPC::ORI );
+ const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
+ : PPC::ADDI );
+ const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
+ : PPC::ADD4 );
+
int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
int FPOffset = 0;
if (HasFP) {
- if (Subtarget.isSVR4ABI()) {
+ if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
int FPIndex = FI->getFramePointerSaveIndex();
assert(FPIndex && "No Frame Pointer Save Slot!");
@@ -741,7 +718,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
int BPOffset = 0;
if (HasBP) {
- if (Subtarget.isSVR4ABI()) {
+ if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
int BPIndex = FI->getBasePointerSaveIndex();
assert(BPIndex && "No Base Pointer Save Slot!");
@@ -773,106 +750,76 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
FrameSize += StackAdj;
}
+ // Frames of 32KB & larger require special handling because they cannot be
+ // indexed into with a simple LD/LWZ immediate offset operand.
+ bool isLargeFrame = !isInt<16>(FrameSize);
+
if (FrameSize) {
- // The loaded (or persistent) stack pointer value is offset by the 'stwu'
- // on entry to the function. Add this offset back now.
- if (!isPPC64) {
- // If this function contained a fastcc call and GuaranteedTailCallOpt is
- // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
- // call which invalidates the stack pointer value in SP(0). So we use the
- // value of R31 in this case.
- if (FI->hasFastCall() && isInt<16>(FrameSize)) {
- assert(hasFP(MF) && "Expecting a valid the frame pointer.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
- .addReg(PPC::R31).addImm(FrameSize);
- } else if(FI->hasFastCall()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
- .addImm(FrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
- .addImm(FrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
- .addReg(PPC::R1)
- .addReg(PPC::R31)
- .addReg(PPC::R0);
- } else if (isInt<16>(FrameSize) &&
- !HasBP &&
- !MFI->hasVarSizedObjects()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
- .addReg(PPC::R1).addImm(FrameSize);
+ // In the prologue, the loaded (or persistent) stack pointer value is offset
+ // by the STDU/STDUX/STWU/STWUX instruction. Add this offset back now.
+
+ // If this function contained a fastcc call and GuaranteedTailCallOpt is
+ // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+ // call which invalidates the stack pointer value in SP(0). So we use the
+ // value of R31 in this case.
+ if (FI->hasFastCall()) {
+ assert(HasFP && "Expecting a valid frame pointer.");
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(FPReg).addImm(FrameSize);
} else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
- .addImm(0).addReg(PPC::R1);
- }
- } else {
- if (FI->hasFastCall() && isInt<16>(FrameSize)) {
- assert(hasFP(MF) && "Expecting a valid the frame pointer.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
- .addReg(PPC::X31).addImm(FrameSize);
- } else if(FI->hasFastCall()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
.addImm(FrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
- .addReg(PPC::X0, RegState::Kill)
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
.addImm(FrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
- .addReg(PPC::X1)
- .addReg(PPC::X31)
- .addReg(PPC::X0);
- } else if (isInt<16>(FrameSize) && !HasBP &&
- !MFI->hasVarSizedObjects()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
- .addReg(PPC::X1).addImm(FrameSize);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
- .addImm(0).addReg(PPC::X1);
+ BuildMI(MBB, MBBI, dl, AddInst)
+ .addReg(SPReg)
+ .addReg(FPReg)
+ .addReg(ScratchReg);
}
+ } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(SPReg)
+ .addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadInst, SPReg)
+ .addImm(0)
+ .addReg(SPReg);
}
- }
- if (isPPC64) {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
- .addImm(LROffset).addReg(PPC::X1);
-
- if (!MustSaveCRs.empty())
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12)
- .addImm(8).addReg(PPC::X1);
+ }
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
- .addImm(FPOffset).addReg(PPC::X1);
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+ .addImm(LROffset)
+ .addReg(SPReg);
- if (HasBP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X30)
- .addImm(BPOffset).addReg(PPC::X1);
+ assert((isPPC64 || MustSaveCRs.empty()) &&
+ "Epilogue CR restoring supported only in 64-bit mode");
- if (!MustSaveCRs.empty())
- for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
- .addReg(PPC::X12, getKillRegState(i == e-1));
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
+ .addImm(8)
+ .addReg(SPReg);
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
- } else {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
- .addImm(LROffset).addReg(PPC::R1);
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
+ .addImm(FPOffset)
+ .addReg(SPReg);
- assert(MustSaveCRs.empty() &&
- "Epilogue CR restoring supported only in 64-bit mode");
+ if (HasBP)
+ BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
+ .addImm(BPOffset)
+ .addReg(SPReg);
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
- .addImm(FPOffset).addReg(PPC::R1);
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
+ .addReg(TempReg, getKillRegState(i == e-1));
- if (HasBP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R30)
- .addImm(FPOffset).addReg(PPC::R1);
-
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
- }
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
@@ -880,27 +827,20 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
MF.getFunction()->getCallingConv() == CallingConv::Fast) {
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned CallerAllocatedAmt = FI->getMinReservedArea();
- unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
- unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
- unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
- unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
- unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
- unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
- BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
- .addReg(StackReg).addImm(CallerAllocatedAmt);
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(SPReg).addImm(CallerAllocatedAmt);
} else {
- BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
.addImm(CallerAllocatedAmt >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
- .addReg(TmpReg, RegState::Kill)
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
.addImm(CallerAllocatedAmt & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
- .addReg(StackReg)
+ BuildMI(MBB, MBBI, dl, AddInst)
+ .addReg(SPReg)
.addReg(FPReg)
- .addReg(TmpReg);
+ .addReg(ScratchReg);
}
} else if (RetOpcode == PPC::TCRETURNdi) {
MBBI = MBB.getLastNonDebugInstr();
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 475bde1..6ba6af6 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -876,8 +876,10 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// target-specific node if it hasn't already been changed.
SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
- if (N->isMachineOpcode())
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
return NULL; // Already selected.
+ }
switch (N->getOpcode()) {
default: break;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 664dd12..8da5f05 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -149,28 +149,24 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ if (Subtarget->hasFCPSGN()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
+ } else {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ }
if (Subtarget->hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
-
- // frin does not implement "ties to even." Thus, this is safe only in
- // fast-math mode.
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
-
- // These need to set FE_INEXACT, and use a custom inserter.
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- }
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
// PowerPC does not have BSWAP, CTPOP or CTTZ
@@ -560,7 +556,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setInsertFencesForAtomic(true);
- setSchedulingPreference(Sched::Hybrid);
+ if (Subtarget->enableMachineScheduler())
+ setSchedulingPreference(Sched::Source);
+ else
+ setSchedulingPreference(Sched::Hybrid);
computeRegisterProperties();
@@ -579,24 +578,47 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
}
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
+ unsigned MaxMaxAlign) {
+ if (MaxAlign == MaxMaxAlign)
+ return;
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
+ MaxAlign = 32;
+ else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
+ MaxAlign = 16;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == MaxMaxAlign)
+ break;
+ }
+ }
+}
+
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
- const TargetMachine &TM = getTargetMachine();
// Darwin passes everything on 4 byte boundary.
- if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+ if (PPCSubTarget.isDarwin())
return 4;
// 16byte and wider vectors are passed on 16byte boundary.
- if (VectorType *VTy = dyn_cast<VectorType>(Ty))
- if (VTy->getBitWidth() >= 128)
- return 16;
-
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
- if (PPCSubTarget.isPPC64())
- return 8;
-
- return 4;
+ unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
+ if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
+ getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
+ return Align;
}
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -1386,6 +1408,10 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
+ // FIXME: TLS addresses currently use medium model code sequences,
+ // which is the most useful form. Eventually support for small and
+ // large models could be added if users need it, at the cost of
+ // additional complexity.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
@@ -1814,6 +1840,12 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
#include "PPCGenCallingConv.inc"
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
+ return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
+}
+
bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
@@ -2276,6 +2308,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
InVals.push_back(FIN);
continue;
}
+
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > 8) {
+ ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+ CurArgOffset = ArgOffset;
+ }
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (ObjSize < PtrByteSize)
CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
@@ -3448,7 +3487,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
- } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
+ } else if ((CallOpc == PPCISD::CALL) &&
+ (!isLocalCall(Callee) ||
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
}
@@ -3865,6 +3906,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (Size == 0)
continue;
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > 8) {
+ if (BVAlign % PtrByteSize != 0)
+ llvm_unreachable(
+ "ByVal alignment is not a multiple of the pointer size");
+
+ ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+ }
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -3956,7 +4006,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
continue;
}
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
@@ -3979,7 +4029,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// must be passed right-justified in the stack doubleword, and
// in the GPR, if one is available.
SDValue StoreOff;
- if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
+ if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
} else
@@ -4287,7 +4337,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
continue;
}
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
@@ -4752,7 +4802,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
SDValue Tmp;
- switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ switch (Op.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
@@ -6676,51 +6726,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
- } else if (MI->getOpcode() == PPC::FRINDrint ||
- MI->getOpcode() == PPC::FRINSrint) {
- bool isf32 = MI->getOpcode() == PPC::FRINSrint;
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
- DebugLoc dl = MI->getDebugLoc();
-
- MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
-
- // Perform the rounding.
- BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
- .addReg(Src);
-
- // Compare the results.
- BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
- .addReg(Dest).addReg(Src);
-
- // If the results were not equal, then set the FPSCR XX bit.
- MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, midMBB);
- F->insert(It, exitMBB);
- exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
-
- BB->addSuccessor(midMBB);
- BB->addSuccessor(exitMBB);
-
- BB = midMBB;
-
- // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
- // the FI bit here because that will not automatically set XX also,
- // and XX is what libm interprets as the FE_INEXACT flag.
- BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
- BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
-
- BB->addSuccessor(exitMBB);
-
- BB = exitMBB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -7061,8 +7066,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
- if (RV.getNode() != 0)
+ if (RV.getNode() != 0) {
+ // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+ // this case and force the answer to 0.
+
+ EVT VT = RV.getValueType();
+
+ SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
+ Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
+ }
+
+ SDValue ZeroCmp =
+ DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
+ N->getOperand(0), Zero, ISD::SETEQ);
+ DCI.AddToWorklist(ZeroCmp.getNode());
+ DCI.AddToWorklist(RV.getNode());
+
+ RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
+ ZeroCmp, Zero, RV);
return RV;
+ }
}
}
@@ -7158,7 +7183,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
- DCI.getDAGCombineLevel() == AfterLegalizeTypes &&
+ (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v4f32) &&
LD->getAlignment() < ABIAlignment) {
// This is a type-legal unaligned Altivec load.
SDValue Chain = LD->getChain();
@@ -7302,6 +7328,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
+
+ break;
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
@@ -7645,7 +7673,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
PPC::GPRCRegClass.contains(R.first)) {
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
return std::make_pair(TRI->getMatchingSuperReg(R.first,
- PPC::sub_32, &PPC::GPRCRegClass),
+ PPC::sub_32, &PPC::G8RCRegClass),
&PPC::G8RCRegClass);
}
@@ -7896,7 +7924,7 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
}
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
- if (DisableILPPref)
+ if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
return TargetLowering::getSchedulingPreference(N);
return Sched::ILP;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index aa5e821..df3af35 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -627,6 +627,8 @@ namespace llvm {
SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
+
+ CCAssignFn *useFastISelCCs(unsigned Flag) const;
};
namespace PPC {
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index f78bb38..46db4fe 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -270,6 +270,7 @@ def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
@@ -506,6 +507,14 @@ defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
[(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
} // Interpretation64Bit
+// For fast-isel:
+let isCodeGenOnly = 1 in {
+def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsb $rA, $rS", IntSimple, []>, isPPC64;
+def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsh $rA, $rS", IntSimple, []>, isPPC64;
+} // isCodeGenOnly for fast-isel
+
defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
"extsw", "$rA, $rS", IntSimple,
[(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
@@ -520,16 +529,16 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
"cntlzd", "$rA, $rS", IntGeneral,
[(set i64:$rA, (ctlz i64:$rS))]>;
-defm POPCNTD : XForm_11r<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
- "popcntd", "$rA, $rS", IntGeneral,
- [(set i64:$rA, (ctpop i64:$rS))]>;
+def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
+ "popcntd $rA, $rS", IntGeneral,
+ [(set i64:$rA, (ctpop i64:$rS))]>;
// popcntw also does a population count on the high 32 bits (storing the
// results in the high 32-bits of the output). We'll ignore that here (which is
// safe because we never separately use the high part of the 64-bit registers).
-defm POPCNTW : XForm_11r<31, 378, (outs gprc:$rA), (ins gprc:$rS),
- "popcntw", "$rA, $rS", IntGeneral,
- [(set i32:$rA, (ctpop i32:$rS))]>;
+def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
+ "popcntw $rA, $rS", IntGeneral,
+ [(set i32:$rA, (ctpop i32:$rS))]>;
defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"divd", "$rT, $rA, $rB", IntDivD,
@@ -569,6 +578,14 @@ defm RLDICL : MDForm_1r<30, 0,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI,
[]>, isPPC64;
+// For fast-isel:
+let isCodeGenOnly = 1 in
+def RLDICL_32_64 : MDForm_1<30, 0,
+ (outs g8rc:$rA),
+ (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
+ []>, isPPC64;
+// End fast-isel.
defm RLDICR : MDForm_1r<30, 1,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI,
@@ -620,6 +637,15 @@ def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src),
"lwax $rD, $src", LdStLHA,
[(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
+// For fast-isel:
+let isCodeGenOnly = 1, mayLoad = 1 in {
+def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
+ "lwa $rD, $src", LdStLWA, []>, isPPC64,
+ PPC970_DGroup_Cracked;
+def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src),
+ "lwax $rD, $src", LdStLHA, []>, isPPC64,
+ PPC970_DGroup_Cracked;
+} // end fast-isel isCodeGenOnly
// Update forms.
let mayLoad = 1, neverHasSideEffects = 1 in {
@@ -942,6 +968,9 @@ let PPC970_Unit = 3, neverHasSideEffects = 1,
defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
"fcfid", "$frD, $frB", FPGeneral,
[(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
+defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctid", "$frD, $frB", FPGeneral,
+ []>, isPPC64;
defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
"fctidz", "$frD, $frB", FPGeneral,
[(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index fdea51d..a55abe3 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -229,35 +229,45 @@ let Predicates = [HasAltivec] in {
let isCodeGenOnly = 1 in {
def DSS : DSS_Form<822, (outs),
(ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
- "dss $STRM", LdStLoad /*FIXME*/, []>;
+ "dss $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSSALL : DSS_Form<822, (outs),
(ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
- "dssall", LdStLoad /*FIXME*/, []>;
+ "dssall", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DST : DSS_Form<342, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTT : DSS_Form<342, (outs),
(ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTST : DSS_Form<374, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTSTT : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DST64 : DSS_Form<342, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTT64 : DSS_Form<342, (outs),
(ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTST64 : DSS_Form<374, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTSTT64 : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
}
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 42adc02..29233d4 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -398,6 +398,13 @@ class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let RST = 0;
}
+class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let B = 0;
+}
+
class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -438,6 +445,17 @@ class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XForm_mtmsr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<1> L;
+
+ let Inst{6-10} = RS;
+ let Inst{15} = L;
+ let Inst{21-30} = xo;
+}
+
class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: XForm_16<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -534,6 +552,21 @@ class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = RC;
}
+class XForm_0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+ let A = 0;
+ let B = 0;
+}
+
+class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+ let A = 0;
+}
+
// DCB_Form - Form X instruction, used for dcb* instructions.
class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 375daee..315ad04 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -33,7 +33,7 @@
#include "llvm/Support/raw_ostream.h"
#define GET_INSTRMAP_INFO
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "PPCGenInstrInfo.inc"
using namespace llvm;
@@ -45,6 +45,9 @@ opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
cl::desc("Disable compare instruction optimization"), cl::Hidden);
+// Pin the vtable to this file.
+void PPCInstrInfo::anchor() {}
+
PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
TM(tm), RI(*TM.getSubtargetImpl()) {}
@@ -985,6 +988,10 @@ bool PPCInstrInfo::SubsumesPredicate(
if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
return false;
+ // P1 can only subsume P2 if they test the same condition register.
+ if (Pred1[1].getReg() != Pred2[1].getReg())
+ return false;
+
PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index bd72a4d..f140c41 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -78,6 +78,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs,
bool &NonRI, bool &SpillsVRS) const;
+ virtual void anchor();
public:
explicit PPCInstrInfo(PPCTargetMachine &TM);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 398a11b..2bd3aad 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -785,6 +785,20 @@ multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
}
}
+multiclass XForm_28r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_28<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : XForm_28<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
@@ -1678,6 +1692,9 @@ let isCompare = 1, neverHasSideEffects = 1 in {
let Uses = [RM] in {
let neverHasSideEffects = 1 in {
+ defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiw", "$frD, $frB", FPGeneral,
+ []>;
defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiwz", "$frD, $frB", FPGeneral,
[(set f64:$frD, (PPCfctiwz f64:$frB))]>;
@@ -1686,23 +1703,13 @@ let Uses = [RM] in {
"frsp", "$frD, $frB", FPGeneral,
[(set f32:$frD, (fround f64:$frB))]>;
- // The frin -> nearbyint mapping is valid only in fast-math mode.
let Interpretation64Bit = 1 in
defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
"frin", "$frD, $frB", FPGeneral,
- [(set f64:$frD, (fnearbyint f64:$frB))]>;
+ [(set f64:$frD, (frnd f64:$frB))]>;
defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
"frin", "$frD, $frB", FPGeneral,
- [(set f32:$frD, (fnearbyint f32:$frB))]>;
- }
-
- // These pseudos expand to rint but also set FE_INEXACT when the result does
- // not equal the argument.
- let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
- def FRINDrint : Pseudo<(outs f8rc:$frD), (ins f8rc:$frB),
- "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
- def FRINSrint : Pseudo<(outs f4rc:$frD), (ins f4rc:$frB),
- "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
+ [(set f32:$frD, (frnd f32:$frB))]>;
}
let neverHasSideEffects = 1 in {
@@ -1772,6 +1779,14 @@ defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB),
"fneg", "$frD, $frB", FPGeneral,
[(set f64:$frD, (fneg f64:$frB))]>;
+defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB),
+ "fcpsgn", "$frD, $frA, $frB", FPGeneral,
+ [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>;
+let Interpretation64Bit = 1 in
+defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB),
+ "fcpsgn", "$frD, $frA, $frB", FPGeneral,
+ [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>;
+
// Reciprocal estimates.
defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
"fre", "$frD, $frB", FPGeneral,
@@ -1855,7 +1870,7 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
"mtspr $SPR, $RT", SprMTSPR>;
def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
- "mftb $RT, $SPR", SprMFTB>;
+ "mftb $RT, $SPR", SprMFTB>, Deprecated<DeprecatedMFTB>;
let Uses = [CTR] in {
def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
@@ -1927,6 +1942,7 @@ def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
@@ -2280,6 +2296,12 @@ def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
(FNMSUBS $A, $C, $B)>;
+// FCOPYSIGN's operand types need not agree.
+def : Pat<(fcopysign f64:$frB, f32:$frA),
+ (FCPSGND (COPY_TO_REGCLASS $frA, F8RC), $frB)>;
+def : Pat<(fcopysign f32:$frB, f64:$frA),
+ (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>;
+
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
@@ -2300,6 +2322,35 @@ def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
"wait $L", LdStLoad, []>;
+def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
+ "mtmsr $RS, $L", SprMTMSR>;
+
+def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
+ "mfmsr $RT", SprMFMSR, []>;
+
+def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L),
+ "mtmsrd $RS, $L", SprMTMSRD>;
+
+def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
+ "slbie $RB", SprSLBIE, []>;
+
+def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB),
+ "slbmte $RS, $RB", SprSLBMTE, []>;
+
+def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
+ "slbmfee $RT, $RB", SprSLBMFEE, []>;
+
+def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", SprSLBIA, []>;
+
+def TLBSYNC : XForm_0<31, 566, (outs), (ins),
+ "tlbsync", SprTLBSYNC, []>;
+
+def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB),
+ "tlbiel $RB", SprTLBIEL, []>;
+
+def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
+ "tlbie $RB,$RS", SprTLBIE, []>;
+
//===----------------------------------------------------------------------===//
// PowerPC Assembler Instruction Aliases
//
@@ -2368,6 +2419,46 @@ def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
+def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
+
+def : InstAlias<"mfsprg $RT, 0", (MFSPR gprc:$RT, 272)>;
+def : InstAlias<"mfsprg $RT, 1", (MFSPR gprc:$RT, 273)>;
+def : InstAlias<"mfsprg $RT, 2", (MFSPR gprc:$RT, 274)>;
+def : InstAlias<"mfsprg $RT, 3", (MFSPR gprc:$RT, 275)>;
+
+def : InstAlias<"mfsprg0 $RT", (MFSPR gprc:$RT, 272)>;
+def : InstAlias<"mfsprg1 $RT", (MFSPR gprc:$RT, 273)>;
+def : InstAlias<"mfsprg2 $RT", (MFSPR gprc:$RT, 274)>;
+def : InstAlias<"mfsprg3 $RT", (MFSPR gprc:$RT, 275)>;
+
+def : InstAlias<"mtsprg 0, $RT", (MTSPR 272, gprc:$RT)>;
+def : InstAlias<"mtsprg 1, $RT", (MTSPR 273, gprc:$RT)>;
+def : InstAlias<"mtsprg 2, $RT", (MTSPR 274, gprc:$RT)>;
+def : InstAlias<"mtsprg 3, $RT", (MTSPR 275, gprc:$RT)>;
+
+def : InstAlias<"mtsprg0 $RT", (MTSPR 272, gprc:$RT)>;
+def : InstAlias<"mtsprg1 $RT", (MTSPR 273, gprc:$RT)>;
+def : InstAlias<"mtsprg2 $RT", (MTSPR 274, gprc:$RT)>;
+def : InstAlias<"mtsprg3 $RT", (MTSPR 275, gprc:$RT)>;
+
+def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>;
+
+def : InstAlias<"mfdec $RT", (MFSPR gprc:$RT, 22)>;
+def : InstAlias<"mtdec $RT", (MTSPR 22, gprc:$RT)>;
+
+def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>;
+
+def : InstAlias<"mfsdr1 $RT", (MFSPR gprc:$RT, 25)>;
+def : InstAlias<"mtsdr1 $RT", (MTSPR 25, gprc:$RT)>;
+
+def : InstAlias<"mfsrr0 $RT", (MFSPR gprc:$RT, 26)>;
+def : InstAlias<"mfsrr1 $RT", (MFSPR gprc:$RT, 27)>;
+def : InstAlias<"mtsrr0 $RT", (MTSPR 26, gprc:$RT)>;
+def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>;
+
+def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
+
def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index d69aa4a..f61c8bf 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -69,7 +69,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
if (MO.isGlobal()) {
StubSym =
MachineModuleInfoImpl::
- StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(AP.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
} else {
Name.erase(Name.end()-5, Name.end());
@@ -95,7 +95,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
if (StubSym.getPointer() == 0) {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym = MachineModuleInfoImpl::
- StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(AP.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index adba613..19ccbfc 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -69,6 +69,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST)
ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX;
ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX;
ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+ ImmToIdxMap[PPC::LWA_32] = PPC::LWAX_32;
// 64-bit
ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
@@ -532,6 +533,7 @@ static bool usesIXAddr(const MachineInstr &MI) {
default:
return false;
case PPC::LWA:
+ case PPC::LWA_32:
case PPC::LD:
case PPC::STD:
return true;
@@ -689,14 +691,6 @@ unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
}
-unsigned PPCRegisterInfo::getEHExceptionRegister() const {
- return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3;
-}
-
-unsigned PPCRegisterInfo::getEHHandlerRegister() const {
- return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
-}
-
unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
if (!hasBasePointer(MF))
return getFrameRegister(MF);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index d02af9e..dd3bb40 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -97,10 +97,6 @@ public:
bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
-
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 660c0c3..92ba69c 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -108,6 +108,14 @@ def VecPerm : InstrItinClass;
def VecFPRound : InstrItinClass;
def VecVSL : InstrItinClass;
def VecVSR : InstrItinClass;
+def SprMTMSRD : InstrItinClass;
+def SprSLIE : InstrItinClass;
+def SprSLBIE : InstrItinClass;
+def SprSLBMTE : InstrItinClass;
+def SprSLBMFEE : InstrItinClass;
+def SprSLBIA : InstrItinClass;
+def SprTLBIEL : InstrItinClass;
+def SprTLBIE : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index 8d5838e..1612cd2 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -14,39 +14,8 @@
//===----------------------------------------------------------------------===//
// Functional units on the PowerPC A2 chip sets
//
-def IU0to3_0 : FuncUnit; // Fetch unit 1 to 4 slot 1
-def IU0to3_1 : FuncUnit; // Fetch unit 1 to 4 slot 2
-def IU0to3_2 : FuncUnit; // Fetch unit 1 to 4 slot 3
-def IU0to3_3 : FuncUnit; // Fetch unit 1 to 4 slot 4
-def IU4_0 : FuncUnit; // Instruction buffer slot 1
-def IU4_1 : FuncUnit; // Instruction buffer slot 2
-def IU4_2 : FuncUnit; // Instruction buffer slot 3
-def IU4_3 : FuncUnit; // Instruction buffer slot 4
-def IU4_4 : FuncUnit; // Instruction buffer slot 5
-def IU4_5 : FuncUnit; // Instruction buffer slot 6
-def IU4_6 : FuncUnit; // Instruction buffer slot 7
-def IU4_7 : FuncUnit; // Instruction buffer slot 8
-def IU5 : FuncUnit; // Dependency resolution
-def IU6 : FuncUnit; // Instruction issue
-def RF0 : FuncUnit;
-def XRF1 : FuncUnit;
-def XEX1 : FuncUnit; // Execution stage 1 for the XU pipeline
-def XEX2 : FuncUnit; // Execution stage 2 for the XU pipeline
-def XEX3 : FuncUnit; // Execution stage 3 for the XU pipeline
-def XEX4 : FuncUnit; // Execution stage 4 for the XU pipeline
-def XEX5 : FuncUnit; // Execution stage 5 for the XU pipeline
-def XEX6 : FuncUnit; // Execution stage 6 for the XU pipeline
-def FRF1 : FuncUnit;
-def FEX1 : FuncUnit; // Execution stage 1 for the FU pipeline
-def FEX2 : FuncUnit; // Execution stage 2 for the FU pipeline
-def FEX3 : FuncUnit; // Execution stage 3 for the FU pipeline
-def FEX4 : FuncUnit; // Execution stage 4 for the FU pipeline
-def FEX5 : FuncUnit; // Execution stage 5 for the FU pipeline
-def FEX6 : FuncUnit; // Execution stage 6 for the FU pipeline
-
-def CR_Bypass : Bypass; // The bypass for condition regs.
-//def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
-//def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+def XU : FuncUnit; // XU pipeline
+def FU : FuncUnit; // FI pipeline
//
// This file defines the itinerary class data for the PPC A2 processor.
@@ -55,699 +24,119 @@ def CR_Bypass : Bypass; // The bypass for condition regs.
def PPCA2Itineraries : ProcessorItineraries<
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3,
- IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7,
- IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6,
- FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6],
- [CR_Bypass, GPR_Bypass, FPR_Bypass], [
- InstrItinData<IntSimple , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntGeneral , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntCompare , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntDivW , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<38, [XEX6]>],
- [53, 7, 7],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMFFS , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMTFSB0 , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHW , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHWU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulLI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotate , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotateD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotateDI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntShift , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntTrapW , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntTrapD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrB , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<BrCR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [CR_Bypass, CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [CR_Bypass, CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCRX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBA , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 11],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStDCBF , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 11],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStDCBI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 11],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLoad , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoadUpd , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLDU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStStore , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStStoreUpd, [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStICBI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTFD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<LdStSTFDU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<LdStLFD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLFDU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLHA , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLHAU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLMW , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLWARX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [26, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTDU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTDCX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [26, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTWCX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [26, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSync , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<12, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>]>,
- InstrItinData<SprISYNC , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>,
- InstrItinData<SprMFSR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [GPR_Bypass, NoBypass]>,
- InstrItinData<SprMTMSR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprTLBSYNC , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>,
- InstrItinData<SprMFCR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7],
- [GPR_Bypass, CR_Bypass]>,
- InstrItinData<SprMFMSR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [GPR_Bypass, NoBypass]>,
- InstrItinData<SprMFSPR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMFTB , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
- [29, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSPR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSRIN , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
- [29, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprRFI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
- [29, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprSC , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
- [29, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<FPGeneral , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [15, 7, 7],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPAddSub , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [15, 7, 7],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPCompare , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [13, 7, 7],
- [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<71, [FRF1], 0>,
- InstrStage<71, [FEX1], 0>,
- InstrStage<71, [FEX2], 0>,
- InstrStage<71, [FEX3], 0>,
- InstrStage<71, [FEX4], 0>,
- InstrStage<71, [FEX5], 0>,
- InstrStage<71, [FEX6]>],
- [86, 7, 7],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivS , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<58, [FRF1], 0>,
- InstrStage<58, [FEX1], 0>,
- InstrStage<58, [FEX2], 0>,
- InstrStage<58, [FEX3], 0>,
- InstrStage<58, [FEX4], 0>,
- InstrStage<58, [FEX5], 0>,
- InstrStage<58, [FEX6]>],
- [73, 7, 7],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPSqrt , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<68, [FRF1], 0>,
- InstrStage<68, [FEX1], 0>,
- InstrStage<68, [FEX2], 0>,
- InstrStage<68, [FEX3], 0>,
- InstrStage<68, [FEX4], 0>,
- InstrStage<68, [FEX5], 0>,
- InstrStage<68, [FEX6]>],
- [86, 7], // FIXME: should be [86, 7] for double
- // and [82, 7] for single. Likewise,
- // the FEX? cycle count should be 68
- // for double and 64 for single.
- [NoBypass, FPR_Bypass]>,
- InstrItinData<FPFused , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [15, 7, 7, 7],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPRes , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [15, 7],
- [FPR_Bypass, FPR_Bypass]>
+ [XU, FU], [], [
+ InstrItinData<IntSimple , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntCompare , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntDivW , [InstrStage<1, [XU]>],
+ [39, 1, 1]>,
+ InstrItinData<IntDivD , [InstrStage<1, [XU]>],
+ [71, 1, 1]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [XU]>],
+ [5, 1, 1]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [XU]>],
+ [5, 1, 1]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<IntRotate , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntRotateD , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntRotateDI , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntShift , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [XU]>],
+ [2, 1]>,
+ InstrItinData<IntTrapD , [InstrStage<1, [XU]>],
+ [2, 1]>,
+ InstrItinData<BrB , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<BrCR , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<BrMCR , [InstrStage<1, [XU]>],
+ [5, 1, 1]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStLoad , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [XU]>],
+ [6, 8, 1, 1]>,
+ InstrItinData<LdStLDU , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<LdStStore , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [XU]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<LdStICBI, [InstrStage<1, [XU]>],
+ [16, 1, 1]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [XU]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<LdStLFD , [InstrStage<1, [XU]>],
+ [7, 1, 1]>,
+ InstrItinData<LdStLFDU , [InstrStage<1, [XU]>],
+ [7, 9, 1, 1]>,
+ InstrItinData<LdStLHA , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [XU]>],
+ [6, 8, 1, 1]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [XU]>],
+ [82, 1, 1]>, // L2 latency
+ InstrItinData<LdStSTD , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStSTDU , [InstrStage<1, [XU]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<LdStSTDCX , [InstrStage<1, [XU]>],
+ [82, 1, 1]>, // L2 latency
+ InstrItinData<LdStSTWCX , [InstrStage<1, [XU]>],
+ [82, 1, 1]>, // L2 latency
+ InstrItinData<LdStSync , [InstrStage<1, [XU]>],
+ [6]>,
+ InstrItinData<SprISYNC , [InstrStage<1, [XU]>],
+ [16]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [XU]>],
+ [16, 1]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [XU]>],
+ [6, 1]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [XU]>],
+ [4, 1]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [XU]>],
+ [6, 1]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [XU]>],
+ [4, 1]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [XU]>],
+ [6, 1]>,
+ InstrItinData<SprRFI , [InstrStage<1, [XU]>],
+ [16]>,
+ InstrItinData<SprSC , [InstrStage<1, [XU]>],
+ [16]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FU]>],
+ [6, 1, 1]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [FU]>],
+ [6, 1, 1]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FU]>],
+ [5, 1, 1]>,
+ InstrItinData<FPDivD , [InstrStage<1, [FU]>],
+ [72, 1, 1]>,
+ InstrItinData<FPDivS , [InstrStage<1, [FU]>],
+ [59, 1, 1]>,
+ InstrItinData<FPSqrt , [InstrStage<1, [FU]>],
+ [69, 1, 1]>,
+ InstrItinData<FPFused , [InstrStage<1, [FU]>],
+ [6, 1, 1, 1]>,
+ InstrItinData<FPRes , [InstrStage<1, [FU]>],
+ [6, 1]>
]>;
// ===---------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td
index 9bb779a..c189b9e 100644
--- a/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -36,6 +36,8 @@ def CFX_0 : FuncUnit; // CFX pipeline
def LSU_0 : FuncUnit; // LSU pipeline
def FPU_0 : FuncUnit; // FPU pipeline
+def CR_Bypass : Bypass;
+
def PPCE500mcItineraries : ProcessorItineraries<
[DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0],
[CR_Bypass, GPR_Bypass, FPR_Bypass], [
diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td
index d7e11ac..7a24d20 100644
--- a/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -39,6 +39,7 @@ def CFX_1 : FuncUnit; // CFX pipeline stage 1
// def LSU_0 : FuncUnit; // LSU pipeline
// def FPU_0 : FuncUnit; // FPU pipeline
+// def CR_Bypass : Bypass;
def PPCE5500Itineraries : ProcessorItineraries<
[DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1,
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 12d0326..7231ab1 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -15,6 +15,7 @@
#include "PPC.h"
#include "PPCRegisterInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Function.h"
@@ -74,6 +75,7 @@ void PPCSubtarget::initializeEnvironment() {
Use64BitRegs = false;
HasAltivec = false;
HasQPX = false;
+ HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
HasFRES = false;
@@ -88,6 +90,8 @@ void PPCSubtarget::initializeEnvironment() {
HasPOPCNTD = false;
HasLDBRX = false;
IsBookE = false;
+ DeprecatedMFTB = false;
+ DeprecatedDST = false;
HasLazyResolverStubs = false;
IsJITCodeModel = false;
}
@@ -163,14 +167,7 @@ bool PPCSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
- // FIXME: It would be best to use TargetSubtargetInfo::ANTIDEP_ALL here,
- // but we can't because we can't reassign the cr registers. There is a
- // dependence between the cr register and the RLWINM instruction used
- // to extract its value which the anti-dependency breaker can't currently
- // see. Maybe we should make a late-expanded pseudo to encode this dependency.
- // (the relevant code is in PPCDAGToDAGISel::SelectSETCC)
-
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ Mode = TargetSubtargetInfo::ANTIDEP_ALL;
CriticalPathRCs.clear();
@@ -179,9 +176,44 @@ bool PPCSubtarget::enablePostRAScheduler(
else
CriticalPathRCs.push_back(&PPC::GPRCRegClass);
- CriticalPathRCs.push_back(&PPC::F8RCRegClass);
- CriticalPathRCs.push_back(&PPC::VRRCRegClass);
-
return OptLevel >= CodeGenOpt::Default;
}
+// Embedded cores need aggressive scheduling.
+static bool needsAggressiveScheduling(unsigned Directive) {
+ switch (Directive) {
+ default: return false;
+ case PPC::DIR_440:
+ case PPC::DIR_A2:
+ case PPC::DIR_E500mc:
+ case PPC::DIR_E5500:
+ return true;
+ }
+}
+
+bool PPCSubtarget::enableMachineScheduler() const {
+ // Enable MI scheduling for the embedded cores.
+ // FIXME: Enable this for all cores (some additional modeling
+ // may be necessary).
+ return needsAggressiveScheduling(DarwinDirective);
+}
+
+void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const {
+ if (needsAggressiveScheduling(DarwinDirective)) {
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+ }
+
+ // Spilling is generally expensive on all PPC cores, so always enable
+ // register-pressure tracking.
+ Policy.ShouldTrackPressure = true;
+}
+
+bool PPCSubtarget::useAA() const {
+ // Use AA during code generation for the embedded cores.
+ return needsAggressiveScheduling(DarwinDirective);
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 3f3fc0e..c863a6e 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -76,6 +76,8 @@ protected:
bool IsPPC64;
bool HasAltivec;
bool HasQPX;
+ bool HasVSX;
+ bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
bool HasRecipPrec;
@@ -87,6 +89,8 @@ protected:
bool HasPOPCNTD;
bool HasLDBRX;
bool IsBookE;
+ bool DeprecatedMFTB;
+ bool DeprecatedDST;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
bool IsLittleEndian;
@@ -171,6 +175,7 @@ public:
bool isLittleEndian() const { return IsLittleEndian; }
// Specific obvious features.
+ bool hasFCPSGN() const { return HasFCPSGN; }
bool hasFSQRT() const { return HasFSQRT; }
bool hasFRE() const { return HasFRE; }
bool hasFRES() const { return HasFRES; }
@@ -188,6 +193,8 @@ public:
bool hasPOPCNTD() const { return HasPOPCNTD; }
bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
+ bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
+ bool isDeprecatedDST() const { return DeprecatedDST; }
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -205,6 +212,14 @@ public:
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const;
+
+ // Scheduling customization.
+ bool enableMachineScheduler() const;
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const;
+ bool useAA() const;
};
} // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
new file mode 100644
index 0000000..e876be1
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -0,0 +1,23 @@
+//===-- PPCTargetStreamer.h - PPC Target Streamer --s-----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETSTREAMER_H
+#define PPCTARGETSTREAMER_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class PPCTargetStreamer : public MCTargetStreamer {
+public:
+ virtual ~PPCTargetStreamer();
+ virtual void emitTCEntry(const MCSymbol &S) = 0;
+};
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 2504ba7..8879630 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -77,6 +77,7 @@ public:
/// \name Scalar TTI Implementations
/// @{
virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+ virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
/// @}
@@ -129,6 +130,14 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
return PSK_Software;
}
+void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
+ if (ST->getDarwinDirective() == PPC::DIR_A2) {
+ // The A2 is in-order with a deep pipeline, and concatenation unrolling
+ // helps expose latency-hiding opportunities to the instruction scheduler.
+ UP.Partial = UP.Runtime = true;
+ }
+}
+
unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
if (Vector && !ST->hasAltivec())
return 0;
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 6b374cb..025b28e 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -29,11 +29,13 @@ FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
FunctionPass *createR600TextureIntrinsicsReplacer();
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600ClauseMergePass(TargetMachine &tm);
FunctionPass *createR600Packetizer(TargetMachine &tm);
FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm);
// SI Passes
+FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
@@ -43,7 +45,6 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm);
// Passes common to R600 and SI
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
-FunctionPass *createAMDGPUIndirectAddressingPass(TargetMachine &tm);
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
/// \brief Creates an AMDGPU-specific Target Transformation Info pass.
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index 0048e25..182235b 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -21,8 +21,18 @@ def FeatureDumpCode : SubtargetFeature <"DumpCode",
"true",
"Dump MachineInstrs in the CodeEmitter">;
+def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
+ "EnableIRStructurizer",
+ "false",
+ "Disable IR Structurizer">;
+
// Target features
+def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
+ "EnableIfCvt",
+ "false",
+ "Disable the if conversion pass">;
+
def FeatureFP64 : SubtargetFeature<"fp64",
"FP64",
"true",
@@ -82,6 +92,8 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[Feature64BitPtr, FeatureFP64]>;
+def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
+ [Feature64BitPtr, FeatureFP64]>;
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index e039b77..67bdba2 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -45,32 +45,60 @@ extern "C" void LLVMInitializeR600AsmPrinter() {
TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
}
+AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer)
+{
+ DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode() &&
+ ! Streamer.hasRawTextSupport();
+}
+
/// We need to override this function so we can avoid
/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
- const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
- if (STM.dumpCode()) {
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- MF.dump();
-#endif
- }
SetupMachineFunction(MF);
if (OutStreamer.hasRawTextSupport()) {
OutStreamer.EmitRawText("@" + MF.getName() + ":");
}
- const MCSectionELF *ConfigSection = getObjFileLowering().getContext()
- .getELFSection(".AMDGPU.config",
+ MCContext &Context = getObjFileLowering().getContext();
+ const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
ELF::SHT_PROGBITS, 0,
SectionKind::getReadOnly());
OutStreamer.SwitchSection(ConfigSection);
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
EmitProgramInfoSI(MF);
} else {
EmitProgramInfoR600(MF);
}
+
+ DisasmLines.clear();
+ HexLines.clear();
+ DisasmLineMaxLen = 0;
+
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
EmitFunctionBody();
+
+ if (STM.dumpCode()) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ MF.dump();
+#endif
+
+ if (DisasmEnabled) {
+ OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
+ ELF::SHT_NOTE, 0,
+ SectionKind::getReadOnly()));
+
+ for (size_t i = 0; i < DisasmLines.size(); ++i) {
+ std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
+ Comment += " ; " + HexLines[i] + "\n";
+
+ OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
+ OutStreamer.EmitBytes(StringRef(Comment));
+ }
+ }
+ }
+
return false;
}
@@ -139,6 +167,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
}
void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
unsigned MaxSGPR = 0;
unsigned MaxVGPR = 0;
bool VCCUsed = false;
@@ -154,7 +183,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
unsigned numOperands = MI.getNumOperands();
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
- MachineOperand & MO = MI.getOperand(op_idx);
+ MachineOperand &MO = MI.getOperand(op_idx);
unsigned maxUsed;
unsigned width = 0;
bool isSGPR = false;
@@ -168,8 +197,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
VCCUsed = true;
continue;
}
+
switch (reg) {
default: break;
+ case AMDGPU::SCC:
case AMDGPU::EXEC:
case AMDGPU::M0:
continue;
@@ -202,6 +233,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
} else if (AMDGPU::VReg_256RegClass.contains(reg)) {
isSGPR = false;
width = 8;
+ } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 16;
} else if (AMDGPU::VReg_512RegClass.contains(reg)) {
isSGPR = false;
width = 16;
@@ -234,13 +268,24 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
OutStreamer.EmitIntValue(RsrcReg, 4);
OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
+ unsigned LDSAlignShift;
+ if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+ // LDS is allocated in 64 dword blocks
+ LDSAlignShift = 8;
+ } else {
+ // LDS is allocated in 128 dword blocks
+ LDSAlignShift = 9;
+ }
+ unsigned LDSBlocks =
+ RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
+
if (MFI->ShaderType == ShaderType::COMPUTE) {
OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
- OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(RoundUpToAlignment(MFI->LDSSize, 256) >> 8), 4);
+ OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
}
if (MFI->ShaderType == ShaderType::PIXEL) {
OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
- OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(RoundUpToAlignment(MFI->LDSSize, 256) >> 8), 4);
+ OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index f425ef4..05dc9bb 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,14 +16,15 @@
#define AMDGPU_ASMPRINTER_H
#include "llvm/CodeGen/AsmPrinter.h"
+#include <string>
+#include <vector>
namespace llvm {
class AMDGPUAsmPrinter : public AsmPrinter {
public:
- explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) { }
+ explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -38,6 +39,11 @@ public:
/// Implemented in AMDGPUMCInstLower.cpp
virtual void EmitInstruction(const MachineInstr *MI);
+
+protected:
+ bool DisasmEnabled;
+ std::vector<std::string> DisasmLines, HexLines;
+ size_t DisasmLineMaxLen;
};
} // End anonymous llvm
diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
index fc95d58..65cdb24 100644
--- a/lib/Target/R600/AMDGPUCallingConv.td
+++ b/lib/Target/R600/AMDGPUCallingConv.td
@@ -19,12 +19,13 @@ def CC_SI : CallingConv<[
CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
- SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
+ SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
+ SGPR16
]>>>,
CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
[ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
- [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR12, SGPR15 ]
+ [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
>>>,
CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
@@ -32,21 +33,33 @@ def CC_SI : CallingConv<[
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
- ]>>>
+ ]>>>,
+
+ CCIfByVal<CCIfType<[i64] , CCAssignToRegWithShadow<
+ [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+ [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+ >>>
]>;
+// Calling convention for R600
+def CC_R600 : CallingConv<[
+ CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
+ T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
+ T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
+ T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
+ T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
+ T30_XYZW, T31_XYZW, T32_XYZW
+ ]>>>
+]>;
+
// Calling convention for compute kernels
def CC_AMDGPU_Kernel : CallingConv<[
- CCIfType<[v4i32, v4f32], CCAssignToStack <16, 16>>,
- CCIfType<[i64, f64, v2f32, v2i32], CCAssignToStack < 8, 8>>,
- CCIfType<[i32, f32], CCAssignToStack < 4, 4>>,
- CCIfType<[i16], CCAssignToStack < 2, 4>>,
- CCIfType<[i8], CCAssignToStack < 1, 4>>
+ CCCustom<"allocateStack">
]>;
def CC_AMDGPU : CallingConv<[
- CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() == "
+ CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() >= "
"AMDGPUSubtarget::SOUTHERN_ISLANDS && "
"State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"#
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
@@ -55,5 +68,7 @@ def CC_AMDGPU : CallingConv<[
"State.getMachineFunction().getInfo<R600MachineFunctionInfo>()->"
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
- ".getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>
+ ".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>,
+ CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
+ ".getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_R600>>
]>;
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index f222901..a989135 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -77,6 +77,7 @@ private:
bool isLocalLoad(const LoadSDNode *N) const;
bool isRegionLoad(const LoadSDNode *N) const;
+ const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr,
SDValue &BaseReg, SDValue& Offset);
@@ -102,6 +103,37 @@ AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
}
+/// \brief Determine the register class for \p OpNo
+/// \returns The register class of the virtual register that will be used for
+/// the given operand number \OpNo or NULL if the register class cannot be
+/// determined.
+const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
+ unsigned OpNo) const {
+ if (!N->isMachineOpcode()) {
+ return NULL;
+ }
+ switch (N->getMachineOpcode()) {
+ default: {
+ const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode());
+ unsigned OpIdx = Desc.getNumDefs() + OpNo;
+ if (OpIdx >= Desc.getNumOperands())
+ return NULL;
+ int RegClass = Desc.OpInfo[OpIdx].RegClass;
+ if (RegClass == -1) {
+ return NULL;
+ }
+ return TM.getRegisterInfo()->getRegClass(RegClass);
+ }
+ case AMDGPU::REG_SEQUENCE: {
+ const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(
+ cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
+ unsigned SubRegIdx =
+ dyn_cast<ConstantSDNode>(N->getOperand(OpNo + 1))->getZExtValue();
+ return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx);
+ }
+ }
+}
+
SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
@@ -161,130 +193,94 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
}
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo*>(TM.getInstrInfo());
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
return NULL; // Already selected.
}
switch (Opc) {
default: break;
- case AMDGPUISD::CONST_ADDRESS: {
- for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
- I != SDNode::use_end(); I = Next) {
- Next = llvm::next(I);
- if (!I->isMachineOpcode()) {
- continue;
- }
- unsigned Opcode = I->getMachineOpcode();
- bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
- int SrcIdx = I.getOperandNo();
- int SelIdx;
- // Unlike MachineInstrs, SDNodes do not have results in their operand
- // list, so we need to increment the SrcIdx, since
- // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
- if (HasDst) {
- SrcIdx++;
- }
-
- SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
- if (SelIdx < 0) {
- continue;
- }
-
- SDValue CstOffset;
- if (N->getValueType(0).isVector() ||
- !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
- continue;
-
- // Gather constants values
- int SrcIndices[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
- };
- std::vector<unsigned> Consts;
- for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
- int OtherSrcIdx = SrcIndices[i];
- int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
- if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
+ case ISD::BUILD_VECTOR: {
+ unsigned RegClassID;
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ const AMDGPURegisterInfo *TRI =
+ static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
+ const SIRegisterInfo *SIRI =
+ static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+ EVT VT = N->getValueType(0);
+ unsigned NumVectorElts = VT.getVectorNumElements();
+ assert(VT.getVectorElementType().bitsEq(MVT::i32));
+ if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ bool UseVReg = true;
+ for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
+ U != E; ++U) {
+ if (!U->isMachineOpcode()) {
continue;
}
- if (HasDst) {
- OtherSrcIdx--;
- OtherSelIdx--;
+ const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
+ if (!RC) {
+ continue;
}
- if (RegisterSDNode *Reg =
- dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
- if (Reg->getReg() == AMDGPU::ALU_CONST) {
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
- Consts.push_back(Cst->getZExtValue());
- }
+ if (SIRI->isSGPRClass(RC)) {
+ UseVReg = false;
}
}
-
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
- Consts.push_back(Cst->getZExtValue());
- if (!TII->fitsConstReadLimitations(Consts))
- continue;
-
- // Convert back to SDNode indices
- if (HasDst) {
- SrcIdx--;
- SelIdx--;
+ switch(NumVectorElts) {
+ case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
+ AMDGPU::SReg_32RegClassID;
+ break;
+ case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
+ AMDGPU::SReg_64RegClassID;
+ break;
+ case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
+ AMDGPU::SReg_128RegClassID;
+ break;
+ case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
+ AMDGPU::SReg_256RegClassID;
+ break;
+ case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
+ AMDGPU::SReg_512RegClassID;
+ break;
+ default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
}
- std::vector<SDValue> Ops;
- for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
- if (i == SrcIdx) {
- Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
- } else if (i == SelIdx) {
- Ops.push_back(CstOffset);
- } else {
- Ops.push_back(I->getOperand(i));
- }
+ } else {
+ // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+ // that adds a 128 bits reg copy when going through TwoAddressInstructions
+ // pass. We want to avoid 128 bits copies as much as possible because they
+ // can't be bundled by our scheduler.
+ switch(NumVectorElts) {
+ case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
+ case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
+ default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
}
- CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
- }
- break;
- }
- case ISD::BUILD_VECTOR: {
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
- break;
}
- unsigned RegClassID;
- switch(N->getValueType(0).getVectorNumElements()) {
- case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
- case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
- default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+ SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
+
+ if (NumVectorElts == 1) {
+ return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS,
+ VT.getVectorElementType(),
+ N->getOperand(0), RegClass);
}
- // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
- // that adds a 128 bits reg copy when going through TwoAddressInstructions
- // pass. We want to avoid 128 bits copies as much as possible because they
- // can't be bundled by our scheduler.
- SDValue RegSeqArgs[9] = {
- CurDAG->getTargetConstant(RegClassID, MVT::i32),
- SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
- SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
- SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
- SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
- };
+
+ assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
+ "supported yet");
+ // 16 = Max Num Vector Elements
+ // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
+ // 1 = Vector Register Class
+ SDValue RegSeqArgs[16 * 2 + 1];
+
+ RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
bool IsRegSeq = true;
for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ // XXX: Why is this here?
if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
IsRegSeq = false;
break;
}
- RegSeqArgs[2 * i + 1] = N->getOperand(i);
+ RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
+ RegSeqArgs[1 + (2 * i) + 1] =
+ CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
}
if (!IsRegSeq)
break;
@@ -313,285 +309,44 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
SDLoc(N), N->getValueType(0), Ops);
}
-
- case ISD::ConstantFP:
- case ISD::Constant: {
+ case AMDGPUISD::REGISTER_LOAD: {
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
- // XXX: Custom immediate lowering not implemented yet. Instead we use
- // pseudo instructions defined in SIInstructions.td
- if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
- }
-
- uint64_t ImmValue = 0;
- unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
-
- if (N->getOpcode() == ISD::ConstantFP) {
- // XXX: 64-bit Immediates not supported yet
- assert(N->getValueType(0) != MVT::f64);
-
- ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
- APFloat Value = C->getValueAPF();
- float FloatValue = Value.convertToFloat();
- if (FloatValue == 0.0) {
- ImmReg = AMDGPU::ZERO;
- } else if (FloatValue == 0.5) {
- ImmReg = AMDGPU::HALF;
- } else if (FloatValue == 1.0) {
- ImmReg = AMDGPU::ONE;
- } else {
- ImmValue = Value.bitcastToAPInt().getZExtValue();
- }
- } else {
- // XXX: 64-bit Immediates not supported yet
- assert(N->getValueType(0) != MVT::i64);
-
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
- if (C->getZExtValue() == 0) {
- ImmReg = AMDGPU::ZERO;
- } else if (C->getZExtValue() == 1) {
- ImmReg = AMDGPU::ONE_INT;
- } else {
- ImmValue = C->getZExtValue();
- }
- }
-
- for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
- Use != SDNode::use_end(); Use = Next) {
- Next = llvm::next(Use);
- std::vector<SDValue> Ops;
- for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
- Ops.push_back(Use->getOperand(i));
- }
-
- if (!Use->isMachineOpcode()) {
- if (ImmReg == AMDGPU::ALU_LITERAL_X) {
- // We can only use literal constants (e.g. AMDGPU::ZERO,
- // AMDGPU::ONE, etc) in machine opcodes.
- continue;
- }
- } else {
- if (!TII->isALUInstr(Use->getMachineOpcode()) ||
- (TII->get(Use->getMachineOpcode()).TSFlags &
- R600_InstFlag::VECTOR)) {
- continue;
- }
-
- int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
- AMDGPU::OpName::literal);
- if (ImmIdx == -1) {
- continue;
- }
-
- if (TII->getOperandIdx(Use->getMachineOpcode(),
- AMDGPU::OpName::dst) != -1) {
- // subtract one from ImmIdx, because the DST operand is usually index
- // 0 for MachineInstrs, but we have no DST in the Ops vector.
- ImmIdx--;
- }
-
- // Check that we aren't already using an immediate.
- // XXX: It's possible for an instruction to have more than one
- // immediate operand, but this is not supported yet.
- if (ImmReg == AMDGPU::ALU_LITERAL_X) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
- assert(C);
-
- if (C->getZExtValue() != 0) {
- // This instruction is already using an immediate.
- continue;
- }
-
- // Set the immediate value
- Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
- }
- }
- // Set the immediate register
- Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
-
- CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
- }
- break;
- }
- }
- SDNode *Result = SelectCode(N);
-
- // Fold operands of selected node
-
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo*>(TM.getInstrInfo());
- if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
- bool IsModified = false;
- do {
- std::vector<SDValue> Ops;
- for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
- I != E; ++I)
- Ops.push_back(*I);
- IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
- if (IsModified) {
- Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
- }
- } while (IsModified);
-
- }
- if (Result && Result->isMachineOpcode() &&
- !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
- && TII->hasInstrModifiers(Result->getMachineOpcode())) {
- // Fold FNEG/FABS
- // TODO: Isel can generate multiple MachineInst, we need to recursively
- // parse Result
- bool IsModified = false;
- do {
- std::vector<SDValue> Ops;
- for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
- I != E; ++I)
- Ops.push_back(*I);
- IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
- if (IsModified) {
- Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
- }
- } while (IsModified);
-
- // If node has a single use which is CLAMP_R600, folds it
- if (Result->hasOneUse() && Result->isMachineOpcode()) {
- SDNode *PotentialClamp = *Result->use_begin();
- if (PotentialClamp->isMachineOpcode() &&
- PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
- unsigned ClampIdx =
- TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp);
- std::vector<SDValue> Ops;
- unsigned NumOp = Result->getNumOperands();
- for (unsigned i = 0; i < NumOp; ++i) {
- Ops.push_back(Result->getOperand(i));
- }
- Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
- Result = CurDAG->SelectNodeTo(PotentialClamp,
- Result->getMachineOpcode(), PotentialClamp->getVTList(),
- Ops.data(), NumOp);
- }
- }
- }
+ SDValue Addr, Offset;
+
+ SelectADDRIndirect(N->getOperand(1), Addr, Offset);
+ const SDValue Ops[] = {
+ Addr,
+ Offset,
+ CurDAG->getTargetConstant(0, MVT::i32),
+ N->getOperand(0),
+ };
+ return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
+ CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
+ Ops);
}
-
- return Result;
-}
-
-bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
- SDValue &Abs, const R600InstrInfo *TII) {
- switch (Src.getOpcode()) {
- case ISD::FNEG:
- Src = Src.getOperand(0);
- Neg = CurDAG->getTargetConstant(1, MVT::i32);
- return true;
- case ISD::FABS:
- if (!Abs.getNode())
- return false;
- Src = Src.getOperand(0);
- Abs = CurDAG->getTargetConstant(1, MVT::i32);
- return true;
- case ISD::BITCAST:
- Src = Src.getOperand(0);
- return true;
- default:
- return false;
+ case AMDGPUISD::REGISTER_STORE: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ break;
+ SDValue Addr, Offset;
+ SelectADDRIndirect(N->getOperand(2), Addr, Offset);
+ const SDValue Ops[] = {
+ N->getOperand(1),
+ Addr,
+ Offset,
+ CurDAG->getTargetConstant(0, MVT::i32),
+ N->getOperand(0),
+ };
+ return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
+ CurDAG->getVTList(MVT::Other),
+ Ops);
}
-}
-
-bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
- const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
- int OperandIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
- };
- int SelIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel)
- };
- int NegIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
- };
- int AbsIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
- -1
- };
-
-
- for (unsigned i = 0; i < 3; i++) {
- if (OperandIdx[i] < 0)
- return false;
- SDValue &Src = Ops[OperandIdx[i] - 1];
- SDValue &Sel = Ops[SelIdx[i] - 1];
- SDValue &Neg = Ops[NegIdx[i] - 1];
- SDValue FakeAbs;
- SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
- if (FoldOperand(Src, Sel, Neg, Abs, TII))
- return true;
}
- return false;
+ return SelectCode(N);
}
-bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
- const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
- int OperandIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
- };
- int SelIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W)
- };
- int NegIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
- };
- int AbsIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
- };
-
- for (unsigned i = 0; i < 8; i++) {
- if (OperandIdx[i] < 0)
- return false;
- SDValue &Src = Ops[OperandIdx[i] - 1];
- SDValue &Sel = Ops[SelIdx[i] - 1];
- SDValue &Neg = Ops[NegIdx[i] - 1];
- SDValue &Abs = Ops[AbsIdx[i] - 1];
- if (FoldOperand(Src, Sel, Neg, Abs, TII))
- return true;
- }
- return false;
-}
bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
if (!ptr) {
@@ -804,26 +559,27 @@ bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, SDValue &U24) {
}
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
-
- if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- return;
- }
-
- // Go over all selected nodes and try to fold them a bit more
const AMDGPUTargetLowering& Lowering =
(*(const AMDGPUTargetLowering*)getTargetLowering());
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
+ bool IsModified = false;
+ do {
+ IsModified = false;
+ // Go over all selected nodes and try to fold them a bit more
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I) {
- SDNode *Node = I;
+ SDNode *Node = I;
- MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
- if (!MachineNode)
- continue;
+ MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
+ if (!MachineNode)
+ continue;
- SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
- if (ResNode != Node) {
- ReplaceUses(Node, ResNode);
+ SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
+ if (ResNode != Node) {
+ ReplaceUses(Node, ResNode);
+ IsModified = true;
+ }
}
- }
+ CurDAG->RemoveDeadNodes();
+ } while (IsModified);
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index efd2756..c4d75ff 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUFrameLowering.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDILIntrinsicInfo.h"
@@ -28,6 +29,14 @@
#include "llvm/IR/DataLayout.h"
using namespace llvm;
+static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign());
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+
+ return true;
+}
#include "AMDGPUGenCallingConv.inc"
@@ -49,6 +58,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FABS, MVT::f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
// The hardware supports ROTR, but not ROTL
setOperationAction(ISD::ROTL, MVT::i32, Expand);
@@ -64,9 +74,29 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
+ setOperationAction(ISD::STORE, MVT::v8f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
+
+ setOperationAction(ISD::STORE, MVT::v16f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
+
setOperationAction(ISD::STORE, MVT::f64, Promote);
AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
+ // Custom lowering of vector stores is required for local address space
+ // stores.
+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ // XXX: Native v2i32 local address space stores are possible, but not
+ // currently implemented.
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+
+ setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
+ setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
+ // XXX: This can be change to Custom, once ExpandVectorStores can
+ // handle 64-bit stores.
+ setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
+
setOperationAction(ISD::LOAD, MVT::f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
@@ -76,15 +106,38 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
+ setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
+
+ setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
+
setOperationAction(ISD::LOAD, MVT::f64, Promote);
AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
@@ -93,14 +146,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
- static const int types[] = {
- (int)MVT::v2i32,
- (int)MVT::v4i32
+ static const MVT::SimpleValueType IntTypes[] = {
+ MVT::v2i32, MVT::v4i32
};
- const size_t NumTypes = array_lengthof(types);
+ const size_t NumIntTypes = array_lengthof(IntTypes);
- for (unsigned int x = 0; x < NumTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+ for (unsigned int x = 0; x < NumIntTypes; ++x) {
+ MVT::SimpleValueType VT = IntTypes[x];
//Expand the following operations for the current type by default
setOperationAction(ISD::ADD, VT, Expand);
setOperationAction(ISD::AND, VT, Expand);
@@ -119,6 +171,23 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::XOR, VT, Expand);
}
+
+ static const MVT::SimpleValueType FloatTypes[] = {
+ MVT::v2f32, MVT::v4f32
+ };
+ const size_t NumFloatTypes = array_lengthof(FloatTypes);
+
+ for (unsigned int x = 0; x < NumFloatTypes; ++x) {
+ MVT::SimpleValueType VT = FloatTypes[x];
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FADD, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FMUL, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FSUB, VT, Expand);
+ }
}
//===----------------------------------------------------------------------===//
@@ -129,6 +198,18 @@ MVT AMDGPUTargetLowering::getVectorIdxTy() const {
return MVT::i32;
}
+bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
+ EVT CastTy) const {
+ if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
+ return true;
+
+ unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
+ unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
+
+ return ((LScalarSize <= CastScalarSize) ||
+ (CastScalarSize >= 32) ||
+ (LScalarSize < 32));
+}
//===---------------------------------------------------------------------===//
// Target Properties
@@ -182,8 +263,12 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
// AMDGPU DAG lowering
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
}
return Op;
}
@@ -194,18 +279,82 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
const DataLayout *TD = getTargetMachine().getDataLayout();
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
+
+ assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
// XXX: What does the value of G->getOffset() mean?
assert(G->getOffset() == 0 &&
"Do not know what to do with an non-zero offset");
- unsigned Offset = MFI->LDSSize;
const GlobalValue *GV = G->getGlobal();
- uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
- // XXX: Account for alignment?
- MFI->LDSSize += Size;
+ unsigned Offset;
+ if (MFI->LocalMemoryObjects.count(GV) == 0) {
+ uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+ Offset = MFI->LDSSize;
+ MFI->LocalMemoryObjects[GV] = Offset;
+ // XXX: Account for alignment?
+ MFI->LDSSize += Size;
+ } else {
+ Offset = MFI->LocalMemoryObjects[GV];
+ }
+
+ return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
+}
+
+void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Args,
+ unsigned Start,
+ unsigned Count) const {
+ EVT VT = Op.getValueType();
+ for (unsigned i = Start, e = Start + Count; i != e; ++i) {
+ Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
+ VT.getVectorElementType(),
+ Op, DAG.getConstant(i, MVT::i32)));
+ }
+}
+
+SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SmallVector<SDValue, 8> Args;
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+
+ ExtractVectorElements(A, DAG, Args, 0,
+ A.getValueType().getVectorNumElements());
+ ExtractVectorElements(B, DAG, Args, 0,
+ B.getValueType().getVectorNumElements());
+
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
+ &Args[0], Args.size());
+}
+
+SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ SmallVector<SDValue, 8> Args;
+ EVT VT = Op.getValueType();
+ unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
+ VT.getVectorNumElements());
+
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
+ &Args[0], Args.size());
+}
+
+SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
+ SelectionDAG &DAG) const {
- return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
+ MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL =
+ static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
+
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
+ assert(FIN);
+
+ unsigned FrameIndex = FIN->getIndex();
+ unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+ return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
+ Op.getValueType());
}
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
@@ -335,7 +484,122 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
return Op;
}
+SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
+ SelectionDAG &DAG) const {
+ LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
+ EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
+ EVT EltVT = Op.getValueType().getVectorElementType();
+ EVT PtrVT = Load->getBasePtr().getValueType();
+ unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
+ SmallVector<SDValue, 8> Loads;
+ SDLoc SL(Op);
+
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
+ DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
+ Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
+ Load->getChain(), Ptr,
+ MachinePointerInfo(Load->getMemOperand()->getValue()),
+ MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
+ Load->getAlignment()));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
+ Loads.size());
+}
+
+SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
+ SelectionDAG &DAG) const {
+ StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
+ EVT MemVT = Store->getMemoryVT();
+ unsigned MemBits = MemVT.getSizeInBits();
+
+ // Byte stores are really expensive, so if possible, try to pack
+ // 32-bit vector truncatating store into an i32 store.
+ // XXX: We could also handle optimize other vector bitwidths
+ if (!MemVT.isVector() || MemBits > 32) {
+ return SDValue();
+ }
+
+ SDLoc DL(Op);
+ const SDValue &Value = Store->getValue();
+ EVT VT = Value.getValueType();
+ const SDValue &Ptr = Store->getBasePtr();
+ EVT MemEltVT = MemVT.getVectorElementType();
+ unsigned MemEltBits = MemEltVT.getSizeInBits();
+ unsigned MemNumElements = MemVT.getVectorNumElements();
+ EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+ SDValue Mask;
+ switch(MemEltBits) {
+ case 8:
+ Mask = DAG.getConstant(0xFF, PackedVT);
+ break;
+ case 16:
+ Mask = DAG.getConstant(0xFFFF, PackedVT);
+ break;
+ default:
+ llvm_unreachable("Cannot lower this vector store");
+ }
+ SDValue PackedValue;
+ for (unsigned i = 0; i < MemNumElements; ++i) {
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
+ DAG.getConstant(i, MVT::i32));
+ Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
+ Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
+ SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
+ Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
+ if (i == 0) {
+ PackedValue = Elt;
+ } else {
+ PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
+ }
+ }
+ return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
+ MachinePointerInfo(Store->getMemOperand()->getValue()),
+ Store->isVolatile(), Store->isNonTemporal(),
+ Store->getAlignment());
+}
+
+SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
+ EVT EltVT = Store->getValue().getValueType().getVectorElementType();
+ EVT PtrVT = Store->getBasePtr().getValueType();
+ unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
+ SDLoc SL(Op);
+
+ SmallVector<SDValue, 8> Chains;
+
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Store->getValue(), DAG.getConstant(i, MVT::i32));
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
+ Store->getBasePtr(),
+ DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
+ PtrVT));
+ Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
+ MachinePointerInfo(Store->getMemOperand()->getValue()),
+ MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
+ Store->getAlignment()));
+ }
+ return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
+}
+SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
+ if (Result.getNode()) {
+ return Result;
+ }
+
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
+ Store->getValue().getValueType().isVector()) {
+ return SplitVectorStore(Op, DAG);
+ }
+ return SDValue();
+}
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const {
@@ -392,13 +656,13 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
DAG.getConstant(-1, VT),
DAG.getConstant(0, VT),
- ISD::SETGE);
- // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
- SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
- DAG.getConstant(0, VT),
+ ISD::SETUGE);
+ // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
+ SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
+ Num_S_Remainder,
DAG.getConstant(-1, VT),
DAG.getConstant(0, VT),
- ISD::SETGE);
+ ISD::SETUGE);
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
Remainder_GE_Zero);
@@ -442,10 +706,62 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
return DAG.getMergeValues(Ops, 2, DL);
}
+SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue S0 = Op.getOperand(0);
+ SDLoc DL(Op);
+ if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64)
+ return SDValue();
+
+ // f32 uint_to_fp i64
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
+ DAG.getConstant(0, MVT::i32));
+ SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
+ DAG.getConstant(1, MVT::i32));
+ SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
+ FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
+ DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
+ return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
+
+}
+
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
+void AMDGPUTargetLowering::getOriginalFunctionArgs(
+ SelectionDAG &DAG,
+ const Function *F,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<ISD::InputArg> &OrigIns) const {
+
+ for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
+ if (Ins[i].ArgVT == Ins[i].VT) {
+ OrigIns.push_back(Ins[i]);
+ continue;
+ }
+
+ EVT VT;
+ if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
+ // Vector has been split into scalars.
+ VT = Ins[i].ArgVT.getVectorElementType();
+ } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
+ Ins[i].ArgVT.getVectorElementType() !=
+ Ins[i].VT.getVectorElementType()) {
+ // Vector elements have been promoted
+ VT = Ins[i].ArgVT;
+ } else {
+ // Vector has been spilt into smaller vectors.
+ VT = Ins[i].VT;
+ }
+
+ ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
+ Ins[i].OrigArgIndex, Ins[i].PartOffset);
+ OrigIns.push_back(Arg);
+ }
+}
+
bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->isExactlyValue(1.0);
@@ -507,5 +823,13 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_ADDRESS)
NODE_NAME_CASE(REGISTER_LOAD)
NODE_NAME_CASE(REGISTER_STORE)
+ NODE_NAME_CASE(LOAD_CONSTANT)
+ NODE_NAME_CASE(LOAD_INPUT)
+ NODE_NAME_CASE(SAMPLE)
+ NODE_NAME_CASE(SAMPLEB)
+ NODE_NAME_CASE(SAMPLED)
+ NODE_NAME_CASE(SAMPLEL)
+ NODE_NAME_CASE(STORE_MSKOR)
+ NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
}
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index f614e23..2dfd3cf 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -25,8 +25,20 @@ class MachineRegisterInfo;
class AMDGPUTargetLowering : public TargetLowering {
private:
+ void ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Args,
+ unsigned Start, unsigned Count) const;
+ SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ /// \brief Lower vector stores by merging the vector elements into an integer
+ /// of the same bitwidth.
+ SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
+ /// \brief Split a vector store into multiple scalar stores.
+ /// \returns The resulting chain.
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
protected:
@@ -39,10 +51,23 @@ protected:
unsigned Reg, EVT VT) const;
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const;
-
+ /// \brief Split a vector load into multiple scalar loads.
+ SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
+ SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
+ /// The SelectionDAGBuilder will automatically promote function arguments
+ /// with illegal types. However, this does not work for the AMDGPU targets
+ /// since the function arguments are stored in memory as these illegal types.
+ /// In order to handle this properly we need to get the origianl types sizes
+ /// from the LLVM IR Function and fixup the ISD:InputArg values before
+ /// passing them to AnalyzeFormalArguments()
+ void getOriginalFunctionArgs(SelectionDAG &DAG,
+ const Function *F,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<ISD::InputArg> &OrigIns) const;
void AnalyzeFormalArguments(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
@@ -52,6 +77,7 @@ public:
virtual bool isFAbsFree(EVT VT) const;
virtual bool isFNegFree(EVT VT) const;
virtual MVT getVectorIdxTy() const;
+ virtual bool isLoadBitCastBeneficial(EVT, EVT) const LLVM_OVERRIDE;
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -139,6 +165,15 @@ enum {
CONST_ADDRESS,
REGISTER_LOAD,
REGISTER_STORE,
+ LOAD_INPUT,
+ SAMPLE,
+ SAMPLEB,
+ SAMPLED,
+ SAMPLEL,
+ FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ STORE_MSKOR,
+ LOAD_CONSTANT,
+ TBUFFER_STORE_FORMAT,
LAST_AMDGPU_ISD_NUMBER
};
diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
deleted file mode 100644
index 3ce3ecf..0000000
--- a/lib/Target/R600/AMDGPUIndirectAddressing.cpp
+++ /dev/null
@@ -1,345 +0,0 @@
-//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// Instructions can use indirect addressing to index the register file as if it
-/// were memory. This pass lowers RegisterLoad and RegisterStore instructions
-/// to either a COPY or a MOV that uses indirect addressing.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
-
-private:
- static char ID;
- const AMDGPUInstrInfo *TII;
-
- bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
-
-public:
- AMDGPUIndirectAddressingPass(TargetMachine &tm) :
- MachineFunctionPass(ID),
- TII(0)
- { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const { return "R600 Handle indirect addressing"; }
-
-};
-
-} // End anonymous namespace
-
-char AMDGPUIndirectAddressingPass::ID = 0;
-
-FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
- return new AMDGPUIndirectAddressingPass(tm);
-}
-
-bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- TII = static_cast<const AMDGPUInstrInfo*>(MF.getTarget().getInstrInfo());
-
- int IndirectBegin = TII->getIndirectIndexBegin(MF);
- int IndirectEnd = TII->getIndirectIndexEnd(MF);
-
- if (IndirectBegin == -1) {
- // No indirect addressing, we can skip this pass
- assert(IndirectEnd == -1);
- return false;
- }
-
- // The map keeps track of the indirect address that is represented by
- // each virtual register. The key is the register and the value is the
- // indirect address it uses.
- std::map<unsigned, unsigned> RegisterAddressMap;
-
- // First pass - Lower all of the RegisterStore instructions and track which
- // registers are live.
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- // This map keeps track of the current live indirect registers.
- // The key is the address and the value is the register
- std::map<unsigned, unsigned> LiveAddressRegisterMap;
- MachineBasicBlock &MBB = *BB;
-
- for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
- I != MBB.end(); I = Next) {
- Next = llvm::next(I);
- MachineInstr &MI = *I;
-
- if (!TII->isRegisterStore(MI)) {
- continue;
- }
-
- // Lower RegisterStore
-
- unsigned RegIndex = MI.getOperand(2).getImm();
- unsigned Channel = MI.getOperand(3).getImm();
- unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
- const TargetRegisterClass *IndirectStoreRegClass =
- TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
-
- if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
- // Direct register access.
- unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
-
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
- .addOperand(MI.getOperand(0));
-
- RegisterAddressMap[DstReg] = Address;
- LiveAddressRegisterMap[Address] = DstReg;
- } else {
- // Indirect register access.
- MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
- MI.getOperand(0).getReg(), // Value
- Address,
- MI.getOperand(1).getReg()); // Offset
- for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
- unsigned Addr = TII->calculateIndirectAddress(i, Channel);
- unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
- MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
- RegisterAddressMap[DstReg] = Addr;
- LiveAddressRegisterMap[Addr] = DstReg;
- }
- }
- MI.eraseFromParent();
- }
-
- // Update the live-ins of the succesor blocks
- for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
- SuccEnd = MBB.succ_end();
- SuccEnd != Succ; ++Succ) {
- std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
- for (Key = LiveAddressRegisterMap.begin(),
- KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
- (*Succ)->addLiveIn(Key->second);
- }
- }
- }
-
- // Second pass - Lower the RegisterLoad instructions
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- // Key is the address and the value is the register
- std::map<unsigned, unsigned> LiveAddressRegisterMap;
- MachineBasicBlock &MBB = *BB;
-
- MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
- while (LI != MBB.livein_end()) {
- std::vector<unsigned> PhiRegisters;
-
- // Make sure this live in is used for indirect addressing
- if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
- ++LI;
- continue;
- }
-
- unsigned Address = RegisterAddressMap[*LI];
- LiveAddressRegisterMap[Address] = *LI;
- PhiRegisters.push_back(*LI);
-
- // Check if there are other live in registers which map to the same
- // indirect address.
- for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
- LE = MBB.livein_end();
- LJ != LE; ++LJ) {
- unsigned Reg = *LJ;
- if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
- continue;
- }
-
- if (RegisterAddressMap[Reg] == Address) {
- PhiRegisters.push_back(Reg);
- }
- }
-
- if (PhiRegisters.size() == 1) {
- // We don't need to insert a Phi instruction, so we can just add the
- // registers to the live list for the block.
- LiveAddressRegisterMap[Address] = *LI;
- MBB.removeLiveIn(*LI);
- } else {
- // We need to insert a PHI, because we have the same address being
- // written in multiple predecessor blocks.
- const TargetRegisterClass *PhiDstClass =
- TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
- unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
- MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
- MBB.findDebugLoc(MBB.begin()),
- TII->get(AMDGPU::PHI), PhiDstReg);
-
- for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(),
- RE = PhiRegisters.end();
- RI != RE; ++RI) {
- unsigned Reg = *RI;
- MachineInstr *DefInst = MRI.getVRegDef(Reg);
- assert(DefInst);
- MachineBasicBlock *RegBlock = DefInst->getParent();
- Phi.addReg(Reg);
- Phi.addMBB(RegBlock);
- MBB.removeLiveIn(Reg);
- }
- RegisterAddressMap[PhiDstReg] = Address;
- LiveAddressRegisterMap[Address] = PhiDstReg;
- }
- LI = MBB.livein_begin();
- }
-
- for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
- I != MBB.end(); I = Next) {
- Next = llvm::next(I);
- MachineInstr &MI = *I;
-
- if (!TII->isRegisterLoad(MI)) {
- if (MI.getOpcode() == AMDGPU::PHI) {
- continue;
- }
- // Check for indirect register defs
- for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
- OpIdx < NumOperands; ++OpIdx) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- if (MO.isReg() && MO.isDef() &&
- RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
- unsigned Reg = MO.getReg();
- unsigned LiveAddress = RegisterAddressMap[Reg];
- // Chain the live-ins
- if (LiveAddressRegisterMap.find(LiveAddress) !=
- LiveAddressRegisterMap.end()) {
- MI.addOperand(MachineOperand::CreateReg(
- LiveAddressRegisterMap[LiveAddress],
- false, // isDef
- true, // isImp
- true)); // isKill
- }
- LiveAddressRegisterMap[LiveAddress] = Reg;
- }
- }
- continue;
- }
-
- const TargetRegisterClass *SuperIndirectRegClass =
- TII->getSuperIndirectRegClass();
- const TargetRegisterClass *IndirectLoadRegClass =
- TII->getIndirectAddrLoadRegClass();
- unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
-
- unsigned RegIndex = MI.getOperand(2).getImm();
- unsigned Channel = MI.getOperand(3).getImm();
- unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
-
- if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
- // Direct register access
- unsigned Reg = LiveAddressRegisterMap[Address];
- unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
-
- if (regHasExplicitDef(MRI, Reg)) {
- // If the register we are reading from has an explicit def, then that
- // means it was written via a direct register access (i.e. COPY
- // or other instruction that doesn't use indirect addressing). In
- // this case we know where the value has been stored, so we can just
- // issue a copy.
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
- MI.getOperand(0).getReg())
- .addReg(Reg);
- } else {
- // If the register we are reading has an implicit def, then that
- // means it was written by an indirect register access (i.e. An
- // instruction that uses indirect addressing.
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
- MI.getOperand(0).getReg())
- .addReg(AddrReg)
- .addReg(Reg, RegState::Implicit);
- }
- } else {
- // Indirect register access
-
- // Note on REQ_SEQUENCE instructons: You can't actually use the register
- // it defines unless you have an instruction that takes the defined
- // register class as an operand.
-
- MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
- TII->get(AMDGPU::REG_SEQUENCE),
- IndirectReg);
- for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
- unsigned Addr = TII->calculateIndirectAddress(i, Channel);
- if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
- continue;
- }
- unsigned Reg = LiveAddressRegisterMap[Addr];
-
- // We only need to use REG_SEQUENCE for explicit defs, since the
- // register coalescer won't do anything with the implicit defs.
- if (!regHasExplicitDef(MRI, Reg)) {
- continue;
- }
-
- // Insert a REQ_SEQUENCE instruction to force the register allocator
- // to allocate the virtual register to the correct physical register.
- Sequence.addReg(LiveAddressRegisterMap[Addr]);
- Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
- }
- MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
- MI.getOperand(0).getReg(), // Value
- Address,
- MI.getOperand(1).getReg()); // Offset
-
-
-
- Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
- Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit);
-
- }
- MI.eraseFromParent();
- }
- }
- return false;
-}
-
-bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
- unsigned Reg) const {
- MachineInstr *DefInstr = MRI.getVRegDef(Reg);
-
- if (!DefInstr) {
- return false;
- }
-
- if (DefInstr->getOpcode() == AMDGPU::PHI) {
- bool Explicit = false;
- for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(),
- E = DefInstr->operands_end();
- I != E; ++I) {
- const MachineOperand &MO = *I;
- if (!MO.isReg() || MO.isDef()) {
- continue;
- }
-
- Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg());
- }
- return Explicit;
- }
-
- return DefInstr->getOperand(0).isReg() &&
- DefInstr->getOperand(0).getReg() == Reg;
-}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index 61437e9..4f7084b 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -20,15 +20,19 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRINFO_NAMED_OPS
#define GET_INSTRMAP_INFO
#include "AMDGPUGenInstrInfo.inc"
using namespace llvm;
+
+// Pin the vtable to this file.
+void AMDGPUInstrInfo::anchor() {}
+
AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
- : AMDGPUGenInstrInfo(0,0), RI(tm), TM(tm) { }
+ : AMDGPUGenInstrInfo(-1,-1), RI(tm), TM(tm) { }
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
return RI;
@@ -118,6 +122,55 @@ AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
assert(!"Not Implemented");
}
+bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ int OffsetOpIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::addr);
+ // addr is a custom operand with multiple MI operands, and only the
+ // first MI operand is given a name.
+ int RegOpIdx = OffsetOpIdx + 1;
+ int ChanOpIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::chan);
+
+ if (isRegisterLoad(*MI)) {
+ int DstOpIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
+ unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
+ unsigned Address = calculateIndirectAddress(RegIndex, Channel);
+ unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
+ if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+ buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
+ getIndirectAddrRegClass()->getRegister(Address));
+ } else {
+ buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
+ Address, OffsetReg);
+ }
+ } else if (isRegisterStore(*MI)) {
+ int ValOpIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::val);
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
+ unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
+ unsigned Address = calculateIndirectAddress(RegIndex, Channel);
+ unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
+ if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+ buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
+ MI->getOperand(ValOpIdx).getReg());
+ } else {
+ buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(),
+ calculateIndirectAddress(RegIndex, Channel),
+ OffsetReg);
+ }
+ } else {
+ return false;
+ }
+
+ MBB->erase(MI);
+ return true;
+}
+
+
MachineInstr *
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
@@ -223,6 +276,57 @@ bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
}
+int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int Offset = -1;
+
+ if (MFI->getNumObjects() == 0) {
+ return -1;
+ }
+
+ if (MRI.livein_empty()) {
+ return 0;
+ }
+
+ const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+ LE = MRI.livein_end();
+ LI != LE; ++LI) {
+ unsigned Reg = LI->first;
+ if (TargetRegisterInfo::isVirtualRegister(Reg) ||
+ !IndirectRC->contains(Reg))
+ continue;
+
+ unsigned RegIndex;
+ unsigned RegEnd;
+ for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
+ ++RegIndex) {
+ if (IndirectRC->getRegister(RegIndex) == Reg)
+ break;
+ }
+ Offset = std::max(Offset, (int)RegIndex);
+ }
+
+ return Offset + 1;
+}
+
+int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+ int Offset = 0;
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Variable sized objects are not supported
+ assert(!MFI->hasVarSizedObjects());
+
+ if (MFI->getNumObjects() == 0) {
+ return -1;
+ }
+
+ Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
+
+ return getIndirectIndexBegin(MF) + Offset;
+}
+
void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const {
@@ -244,3 +348,12 @@ void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
}
}
}
+
+int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
+ switch (Channels) {
+ default: return Opcode;
+ case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
+ case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
+ case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
+ }
+}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index 306f467..ce5b58c 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -43,6 +43,7 @@ private:
const AMDGPURegisterInfo RI;
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
MachineBasicBlock &MBB) const;
+ virtual void anchor();
protected:
TargetMachine &TM;
public:
@@ -87,6 +88,8 @@ public:
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
protected:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
@@ -97,6 +100,14 @@ protected:
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const;
+ /// \returns the smallest register index that will be accessed by an indirect
+ /// read or write or -1 if indirect addressing is not used by this program.
+ virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+ /// \returns the largest register index that will be accessed by an indirect
+ /// read or write or -1 if indirect addressing is not used by this program.
+ virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
public:
bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const;
@@ -139,19 +150,9 @@ public:
// Pure virtual funtions to be implemented by sub-classes.
//===---------------------------------------------------------------------===//
- virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const = 0;
virtual unsigned getIEQOpcode() const = 0;
virtual bool isMov(unsigned opcode) const = 0;
- /// \returns the smallest register index that will be accessed by an indirect
- /// read or write or -1 if indirect addressing is not used by this program.
- virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0;
-
- /// \returns the largest register index that will be accessed by an indirect
- /// read or write or -1 if indirect addressing is not used by this program.
- virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0;
-
/// \brief Calculate the "Indirect Address" for the given \p RegIndex and
/// \p Channel
///
@@ -162,14 +163,9 @@ public:
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
unsigned Channel) const = 0;
- /// \returns The register class to be used for storing values to an
- /// "Indirect Address" .
- virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
- unsigned SourceReg) const = 0;
-
- /// \returns The register class to be used for loading values from
- /// an "Indirect Address" .
- virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
+ /// \returns The register class to be used for loading and storing values
+ /// from an "Indirect Address" .
+ virtual const TargetRegisterClass *getIndirectAddrRegClass() const = 0;
/// \brief Build instruction(s) for an indirect register write.
///
@@ -187,16 +183,21 @@ public:
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const = 0;
- /// \returns the register class whose sub registers are the set of all
- /// possible registers that can be used for indirect addressing.
- virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
-
/// \brief Convert the AMDIL MachineInstr to a supported ISA
/// MachineInstr
virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const;
+ /// \brief Build a MOV instruction.
+ virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg) const = 0;
+
+ /// \brief Given a MIMG \p Opcode that writes all 4 channels, return the
+ /// equivalent opcode that writes \p Channels Channels.
+ int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const;
+
};
namespace AMDGPU {
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 48d89dd..fccede0 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -72,3 +72,17 @@ def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
[SDNPHasChain, SDNPMayStore]>;
+
+// MSKOR instructions are atomic memory instructions used mainly for storing
+// 8-bit and 16-bit values. The definition is:
+//
+// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src)
+//
+// src0: vec4(src, 0, 0, mask)
+// src1: dst - rat offset (aka pointer) in dwords
+def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
+ SDTypeProfile<0, 2, []>,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def AMDGPUround : SDNode<"ISD::FROUND",
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index d6a7759..3c5375d 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -35,46 +35,75 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
}
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
+def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
-def COND_EQ : PatLeaf <
+//===----------------------------------------------------------------------===//
+// PatLeafs for floating-point comparisons
+//===----------------------------------------------------------------------===//
+
+def COND_OEQ : PatLeaf <
(cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOEQ: case ISD::SETUEQ:
- case ISD::SETEQ: return true;}}}]
+ [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
>;
-def COND_NE : PatLeaf <
+def COND_OGT : PatLeaf <
+ (cond),
+ [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
+>;
+
+def COND_OGE : PatLeaf <
+ (cond),
+ [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
+>;
+
+def COND_OLT : PatLeaf <
(cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETONE: case ISD::SETUNE:
- case ISD::SETNE: return true;}}}]
+ [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
>;
-def COND_GT : PatLeaf <
+
+def COND_OLE : PatLeaf <
(cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOGT: case ISD::SETUGT:
- case ISD::SETGT: return true;}}}]
+ [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
>;
-def COND_GE : PatLeaf <
+def COND_UNE : PatLeaf <
(cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOGE: case ISD::SETUGE:
- case ISD::SETGE: return true;}}}]
+ [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
>;
-def COND_LT : PatLeaf <
+def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
+def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
+
+//===----------------------------------------------------------------------===//
+// PatLeafs for unsigned comparisons
+//===----------------------------------------------------------------------===//
+
+def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
+def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
+def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
+def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
+
+//===----------------------------------------------------------------------===//
+// PatLeafs for signed comparisons
+//===----------------------------------------------------------------------===//
+
+def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
+def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
+def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
+def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
+
+//===----------------------------------------------------------------------===//
+// PatLeafs for integer equality
+//===----------------------------------------------------------------------===//
+
+def COND_EQ : PatLeaf <
(cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOLT: case ISD::SETULT:
- case ISD::SETLT: return true;}}}]
+ [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
>;
-def COND_LE : PatLeaf <
+def COND_NE : PatLeaf <
(cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOLE: case ISD::SETULE:
- case ISD::SETLE: return true;}}}]
+ [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
>;
def COND_NULL : PatLeaf <
@@ -96,6 +125,10 @@ def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
}]>;
+def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
@@ -108,8 +141,12 @@ def sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
-def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
- return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
@@ -132,6 +169,14 @@ def sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
+def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
@@ -146,20 +191,55 @@ def az_extloadi32_constant : PatFrag<(ops node:$ptr),
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
-def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isLocalLoad(dyn_cast<LoadSDNode>(N));
+def truncstorei8_global : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei8 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei16 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;
def local_store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
- return isLocalStore(dyn_cast<StoreSDNode>(N));
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei8_local : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei8 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei16_local : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei16 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value),
+ (atomic_load_add node:$ptr, node:$value), [{
+ return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
+def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value),
+ (atomic_load_sub node:$ptr, node:$value), [{
+ return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
+def mskor_global : PatFrag<(ops node:$val, node:$ptr),
+ (AMDGPUstore_mskor node:$val, node:$ptr), [{
+ return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
class Constants {
int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
-int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
+int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
}
def CONST : Constants;
@@ -205,6 +285,8 @@ class FNEG <RegisterClass rc> : AMDGPUShaderInst <
multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
ComplexPattern addrPat> {
+let UseNamedOperandTable = 1 in {
+
def RegisterLoad : AMDGPUShaderInst <
(outs dstClass:$dst),
(ins addrClass:$addr, i32imm:$chan),
@@ -223,6 +305,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
let isRegisterStore = 1;
}
}
+}
} // End isCodeGenOnly = 1, isPseudo = 1
@@ -254,61 +337,12 @@ class Insert_Element <ValueType elem_type, ValueType vec_type,
(INSERT_SUBREG $vec, $elem, sub_reg)
>;
-// Vector Build pattern
-class Vector1_Build <ValueType vecType, ValueType elemType,
- RegisterClass rc> : Pat <
- (vecType (build_vector elemType:$src)),
- (vecType (COPY_TO_REGCLASS $src, rc))
->;
-
-class Vector2_Build <ValueType vecType, ValueType elemType> : Pat <
- (vecType (build_vector elemType:$sub0, elemType:$sub1)),
- (INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1)
->;
-
class Vector4_Build <ValueType vecType, ValueType elemType> : Pat <
(vecType (build_vector elemType:$x, elemType:$y, elemType:$z, elemType:$w)),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
(vecType (IMPLICIT_DEF)), $x, sub0), $y, sub1), $z, sub2), $w, sub3)
>;
-class Vector8_Build <ValueType vecType, ValueType elemType> : Pat <
- (vecType (build_vector elemType:$sub0, elemType:$sub1,
- elemType:$sub2, elemType:$sub3,
- elemType:$sub4, elemType:$sub5,
- elemType:$sub6, elemType:$sub7)),
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1),
- $sub2, sub2), $sub3, sub3),
- $sub4, sub4), $sub5, sub5),
- $sub6, sub6), $sub7, sub7)
->;
-
-class Vector16_Build <ValueType vecType, ValueType elemType> : Pat <
- (vecType (build_vector elemType:$sub0, elemType:$sub1,
- elemType:$sub2, elemType:$sub3,
- elemType:$sub4, elemType:$sub5,
- elemType:$sub6, elemType:$sub7,
- elemType:$sub8, elemType:$sub9,
- elemType:$sub10, elemType:$sub11,
- elemType:$sub12, elemType:$sub13,
- elemType:$sub14, elemType:$sub15)),
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1),
- $sub2, sub2), $sub3, sub3),
- $sub4, sub4), $sub5, sub5),
- $sub6, sub6), $sub7, sub7),
- $sub8, sub8), $sub9, sub9),
- $sub10, sub10), $sub11, sub11),
- $sub12, sub12), $sub13, sub13),
- $sub14, sub14), $sub15, sub15)
->;
-
// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
// can handle COPY instructions.
// bitconvert pattern
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
index 1dc1c65..0ed598e 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -15,14 +15,19 @@
#include "AMDGPUMCInstLower.h"
#include "AMDGPUAsmPrinter.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
#include "R600InstrInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include <algorithm>
using namespace llvm;
@@ -69,15 +74,45 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MachineBasicBlock::const_instr_iterator I = MI;
++I;
while (I != MBB->end() && I->isInsideBundle()) {
- MCInst MCBundleInst;
- const MachineInstr *BundledInst = I;
- MCInstLowering.lower(BundledInst, MCBundleInst);
- OutStreamer.EmitInstruction(MCBundleInst);
+ EmitInstruction(I);
++I;
}
} else {
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
+
+ if (DisasmEnabled) {
+ // Disassemble instruction/operands to text.
+ DisasmLines.resize(DisasmLines.size() + 1);
+ std::string &DisasmLine = DisasmLines.back();
+ raw_string_ostream DisasmStream(DisasmLine);
+
+ AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *TM.getInstrInfo(),
+ *TM.getRegisterInfo());
+ InstPrinter.printInst(&TmpInst, DisasmStream, StringRef());
+
+ // Disassemble instruction/operands to hex representation.
+ SmallVector<MCFixup, 4> Fixups;
+ SmallVector<char, 16> CodeBytes;
+ raw_svector_ostream CodeStream(CodeBytes);
+
+ MCObjectStreamer &ObjStreamer = (MCObjectStreamer &)OutStreamer;
+ MCCodeEmitter &InstEmitter = ObjStreamer.getAssembler().getEmitter();
+ InstEmitter.EncodeInstruction(TmpInst, CodeStream, Fixups);
+ CodeStream.flush();
+
+ HexLines.resize(HexLines.size() + 1);
+ std::string &HexLine = HexLines.back();
+ raw_string_ostream HexStream(HexLine);
+
+ for (size_t i = 0; i < CodeBytes.size(); i += 4) {
+ unsigned int CodeDWord = *(unsigned int *)&CodeBytes[i];
+ HexStream << format("%s%08X", (i > 0 ? " " : ""), CodeDWord);
+ }
+
+ DisasmStream.flush();
+ DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLine.size());
+ }
}
}
diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp
index f2342b0..14171f4 100644
--- a/lib/Target/R600/AMDGPUMachineFunction.cpp
+++ b/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -6,6 +6,9 @@ using namespace llvm;
static const char *const ShaderTypeAttribute = "ShaderType";
+// Pin the vtable to this file.
+void AMDGPUMachineFunction::anchor() {}
+
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo() {
ShaderType = ShaderType::COMPUTE;
diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h
index 789b96a..fea0b39 100644
--- a/lib/Target/R600/AMDGPUMachineFunction.h
+++ b/lib/Target/R600/AMDGPUMachineFunction.h
@@ -14,13 +14,18 @@
#define AMDGPUMACHINEFUNCTION_H
#include "llvm/CodeGen/MachineFunction.h"
+#include <map>
namespace llvm {
class AMDGPUMachineFunction : public MachineFunctionInfo {
+ virtual void anchor();
public:
AMDGPUMachineFunction(const MachineFunction &MF);
unsigned ShaderType;
+ /// A map to keep track of local memory objects and their offsets within
+ /// the local memory space.
+ std::map<const GlobalValue *, unsigned> LocalMemoryObjects;
/// Number of bytes in the LDS that are being used.
unsigned LDSSize;
};
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
index 3402092..47617a7 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -46,27 +46,21 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return 0;
}
+unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
+ static const unsigned SubRegs[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
+ AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9,
+ AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14,
+ AMDGPU::sub15
+ };
+
+ assert (Channel < array_lengthof(SubRegs));
+ return SubRegs[Channel];
+}
+
unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
- switch(IndirectIndex) {
- case 0: return AMDGPU::sub0;
- case 1: return AMDGPU::sub1;
- case 2: return AMDGPU::sub2;
- case 3: return AMDGPU::sub3;
- case 4: return AMDGPU::sub4;
- case 5: return AMDGPU::sub5;
- case 6: return AMDGPU::sub6;
- case 7: return AMDGPU::sub7;
- case 8: return AMDGPU::sub8;
- case 9: return AMDGPU::sub9;
- case 10: return AMDGPU::sub10;
- case 11: return AMDGPU::sub11;
- case 12: return AMDGPU::sub12;
- case 13: return AMDGPU::sub13;
- case 14: return AMDGPU::sub14;
- case 15: return AMDGPU::sub15;
- default: llvm_unreachable("indirect index out of range");
- }
+ return getSubRegFromChannel(IndirectIndex);
}
#define GET_REGINFO_TARGET_DESC
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
index 7cbd34b..688e1a0 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -50,6 +50,14 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
assert(!"Unimplemented"); return NULL;
}
+ virtual unsigned getHWRegIndex(unsigned Reg) const {
+ assert(!"Unimplemented"); return 0;
+ }
+
+ /// \returns the sub reg enum value for the given \p Channel
+ /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
+ unsigned getSubRegFromChannel(unsigned Channel) const;
+
const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 8ed5a74..061793a 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
-#include <stdio.h>
using namespace llvm;
@@ -37,6 +36,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
Gen = AMDGPUSubtarget::R600;
FP64 = false;
CaymanISA = false;
+ EnableIRStructurizer = true;
+ EnableIfCvt = true;
ParseSubtargetFeatures(GPU, FS);
DevName = GPU;
}
@@ -66,6 +67,14 @@ AMDGPUSubtarget::hasCaymanISA() const {
return CaymanISA;
}
bool
+AMDGPUSubtarget::IsIRStructurizerEnabled() const {
+ return EnableIRStructurizer;
+}
+bool
+AMDGPUSubtarget::isIfCvtEnabled() const {
+ return EnableIfCvt;
+}
+bool
AMDGPUSubtarget::isTargetELF() const {
return false;
}
@@ -98,6 +107,10 @@ AMDGPUSubtarget::getDataLayout() const {
DataLayout.append("-p:32:32:32");
}
+ if (Gen >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ DataLayout.append("-p3:32:32:32");
+ }
+
return DataLayout;
}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 8c65096..4288d27 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -33,7 +33,8 @@ public:
R700,
EVERGREEN,
NORTHERN_ISLANDS,
- SOUTHERN_ISLANDS
+ SOUTHERN_ISLANDS,
+ SEA_ISLANDS
};
private:
@@ -48,6 +49,8 @@ private:
enum Generation Gen;
bool FP64;
bool CaymanISA;
+ bool EnableIRStructurizer;
+ bool EnableIfCvt;
InstrItineraryData InstrItins;
@@ -63,6 +66,12 @@ public:
enum Generation getGeneration() const;
bool hasHWFP64() const;
bool hasCaymanISA() const;
+ bool IsIRStructurizerEnabled() const;
+ bool isIfCvtEnabled() const;
+
+ virtual bool enableMachineScheduler() const {
+ return getGeneration() <= NORTHERN_ISLANDS;
+ }
// Helper functions to simplify if statements
bool isTargetELF() const;
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 5ebc5f2..bc4f5d7 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -33,6 +33,7 @@
#include "llvm/Transforms/Scalar.h"
#include <llvm/CodeGen/Passes.h>
+
using namespace llvm;
extern "C" void LLVMInitializeR600Target() {
@@ -58,8 +59,9 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
Subtarget(TT, CPU, FS),
Layout(Subtarget.getDataLayout()),
- FrameLowering(TargetFrameLowering::StackGrowsUp, 16 // Stack Alignment
- , 0),
+ FrameLowering(TargetFrameLowering::StackGrowsUp,
+ 64 * 16 // Maximum stack alignment (long16)
+ , 0),
IntrinsicInfo(this),
InstrItins(&Subtarget.getInstrItineraryData()) {
// TLInfo uses InstrInfo so it must be initialized after.
@@ -80,17 +82,20 @@ namespace {
class AMDGPUPassConfig : public TargetPassConfig {
public:
AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- enablePass(&MachineSchedulerID);
- MachineSchedRegistry::setDefault(createR600MachineScheduler);
- }
- }
+ : TargetPassConfig(TM, PM) {}
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
return getTM<AMDGPUTargetMachine>();
}
+
+ virtual ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ return createR600MachineScheduler(C);
+ return 0;
+ }
+
virtual bool addPreISel();
virtual bool addInstSelector();
virtual bool addPreRegAlloc();
@@ -120,8 +125,11 @@ bool
AMDGPUPassConfig::addPreISel() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
addPass(createFlattenCFGPass());
- if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ if (ST.IsIRStructurizerEnabled())
addPass(createStructurizeCFGPass());
+ if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ addPass(createSinkingPass());
+ addPass(createSITypeRewriter());
addPass(createSIAnnotateControlFlowPass());
} else {
addPass(createR600TextureIntrinsicsReplacer());
@@ -131,12 +139,6 @@ AMDGPUPassConfig::addPreISel() {
bool AMDGPUPassConfig::addInstSelector() {
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
-
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- // This callbacks this pass uses are not implemented yet on SI.
- addPass(createAMDGPUIndirectAddressingPass(*TM));
- }
return false;
}
@@ -164,10 +166,12 @@ bool AMDGPUPassConfig::addPostRegAlloc() {
bool AMDGPUPassConfig::addPreSched2() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
addPass(createR600EmitClauseMarkers(*TM));
- }
- addPass(&IfConverterID);
+ if (ST.isIfCvtEnabled())
+ addPass(&IfConverterID);
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ addPass(createR600ClauseMergePass(*TM));
return false;
}
@@ -185,4 +189,3 @@ bool AMDGPUPassConfig::addPreEmitPass() {
return false;
}
-
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index 687eadb..507570f 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -1005,13 +1005,14 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
return 0;
assert(isCondBranch(BranchMI));
+ int NumMatch = 0;
MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
- serialPatternMatch(TrueMBB);
- ifPatternMatch(TrueMBB);
+ NumMatch += serialPatternMatch(TrueMBB);
+ NumMatch += ifPatternMatch(TrueMBB);
MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
- serialPatternMatch(FalseMBB);
- ifPatternMatch(FalseMBB);
+ NumMatch += serialPatternMatch(FalseMBB);
+ NumMatch += ifPatternMatch(FalseMBB);
MachineBasicBlock *LandBlk;
int Cloned = 0;
@@ -1040,7 +1041,7 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
&& isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
LandBlk = *TrueMBB->succ_begin();
} else {
- return handleJumpintoIf(MBB, TrueMBB, FalseMBB);
+ return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
}
// improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
@@ -1068,7 +1069,7 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
numClonedBlock += Cloned;
- return 1 + Cloned;
+ return 1 + Cloned + NumMatch;
}
int AMDGPUCFGStructurizer::loopendPatternMatch() {
@@ -1233,7 +1234,7 @@ int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
numClonedBlock += Num;
Num += serialPatternMatch(*HeadMBB->succ_begin());
- Num += serialPatternMatch(*(++HeadMBB->succ_begin()));
+ Num += serialPatternMatch(*llvm::next(HeadMBB->succ_begin()));
Num += ifPatternMatch(HeadMBB);
assert(Num > 0);
@@ -1335,32 +1336,77 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
// add initReg = initVal to headBlk
const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- unsigned InitReg =
- HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
- if (!MigrateTrue || !MigrateFalse)
- llvm_unreachable("Extra register needed to handle CFG");
+ if (!MigrateTrue || !MigrateFalse) {
+ // XXX: We have an opportunity here to optimize the "branch into if" case
+ // here. Branch into if looks like this:
+ // entry
+ // / |
+ // diamond_head branch_from
+ // / \ |
+ // diamond_false diamond_true
+ // \ /
+ // done
+ //
+ // The diamond_head block begins the "if" and the diamond_true block
+ // is the block being "branched into".
+ //
+ // If MigrateTrue is true, then TrueBB is the block being "branched into"
+ // and if MigrateFalse is true, then FalseBB is the block being
+ // "branched into"
+ //
+ // Here is the pseudo code for how I think the optimization should work:
+ // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
+ // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
+ // 3. Move the branch instruction from diamond_head into its own basic
+ // block (new_block).
+ // 4. Add an unconditional branch from diamond_head to new_block
+ // 5. Replace the branch instruction in branch_from with an unconditional
+ // branch to new_block. If branch_from has multiple predecessors, then
+ // we need to replace the True/False block in the branch
+ // instruction instead of replacing it.
+ // 6. Change the condition of the branch instruction in new_block from
+ // COND to (COND || GPR0)
+ //
+ // In order insert these MOV instruction, we will need to use the
+ // RegisterScavenger. Usually liveness stops being tracked during
+ // the late machine optimization passes, however if we implement
+ // bool TargetRegisterInfo::requiresRegisterScavenging(
+ // const MachineFunction &MF)
+ // and have it return true, liveness will be tracked correctly
+ // by generic optimization passes. We will also need to make sure that
+ // all of our target-specific passes that run after regalloc and before
+ // the CFGStructurizer track liveness and we will need to modify this pass
+ // to correctly track liveness.
+ //
+ // After the above changes, the new CFG should look like this:
+ // entry
+ // / |
+ // diamond_head branch_from
+ // \ /
+ // new_block
+ // / |
+ // diamond_false diamond_true
+ // \ /
+ // done
+ //
+ // Without this optimization, we are forced to duplicate the diamond_true
+ // block and we will end up with a CFG like this:
+ //
+ // entry
+ // / |
+ // diamond_head branch_from
+ // / \ |
+ // diamond_false diamond_true diamond_true (duplicate)
+ // \ / |
+ // done --------------------|
+ //
+ // Duplicating diamond_true can be very costly especially if it has a
+ // lot of instructions.
+ return 0;
+ }
int NumNewBlk = 0;
- if (!LandBlk) {
- LandBlk = HeadMBB->getParent()->CreateMachineBasicBlock();
- HeadMBB->getParent()->push_back(LandBlk); //insert to function
-
- if (TrueMBB) {
- TrueMBB->addSuccessor(LandBlk);
- } else {
- HeadMBB->addSuccessor(LandBlk);
- }
-
- if (FalseMBB) {
- FalseMBB->addSuccessor(LandBlk);
- } else {
- HeadMBB->addSuccessor(LandBlk);
- }
-
- NumNewBlk ++;
- }
-
bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
//insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
@@ -1375,6 +1421,10 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
CmpResReg, DebugLoc());
}
+ // XXX: We are running this after RA, so creating virtual registers will
+ // cause an assertion failure in the PostRA scheduling pass.
+ unsigned InitReg =
+ HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
DebugLoc());
@@ -1713,7 +1763,7 @@ void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
if (MBB->succ_size() != 2)
return;
MachineBasicBlock *MBB1 = *MBB->succ_begin();
- MachineBasicBlock *MBB2 = *(++MBB->succ_begin());
+ MachineBasicBlock *MBB2 = *llvm::next(MBB->succ_begin());
if (MBB1 != MBB2)
return;
diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td
index f7d0bd5..0f0c88d 100644
--- a/lib/Target/R600/AMDILInstrInfo.td
+++ b/lib/Target/R600/AMDILInstrInfo.td
@@ -118,15 +118,15 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
// Multiclass Instruction formats
//===--------------------------------------------------------------------===//
// Multiclass that handles branch instructions
-multiclass BranchConditional<SDNode Op> {
+multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
def _i32 : ILFormat<(outs),
- (ins brtarget:$target, GPRI32:$src0),
+ (ins brtarget:$target, rci:$src0),
"; i32 Pseudo branch instruction",
- [(Op bb:$target, GPRI32:$src0)]>;
+ [(Op bb:$target, (i32 rci:$src0))]>;
def _f32 : ILFormat<(outs),
- (ins brtarget:$target, GPRF32:$src0),
+ (ins brtarget:$target, rcf:$src0),
"; f32 Pseudo branch instruction",
- [(Op bb:$target, GPRF32:$src0)]>;
+ [(Op bb:$target, (f32 rcf:$src0))]>;
}
// Only scalar types should generate flow control
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index fde187b..9f8f6a8 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -17,7 +17,6 @@ add_llvm_target(R600CodeGen
AMDILISelLowering.cpp
AMDGPUAsmPrinter.cpp
AMDGPUFrameLowering.cpp
- AMDGPUIndirectAddressing.cpp
AMDGPUISelDAGToDAG.cpp
AMDGPUMCInstLower.cpp
AMDGPUMachineFunction.cpp
@@ -28,6 +27,7 @@ add_llvm_target(R600CodeGen
AMDGPUConvertToISA.cpp
AMDGPUInstrInfo.cpp
AMDGPURegisterInfo.cpp
+ R600ClauseMergePass.cpp
R600ControlFlowFinalizer.cpp
R600EmitClauseMarkers.cpp
R600ExpandSpecialInstrs.cpp
@@ -47,6 +47,7 @@ add_llvm_target(R600CodeGen
SILowerControlFlow.cpp
SIMachineFunctionInfo.cpp
SIRegisterInfo.cpp
+ SITypeRewriter.cpp
)
add_dependencies(LLVMR600CodeGen AMDGPUCommonTableGen intrinsics_gen)
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index fac3c39..99e1377 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -23,6 +23,63 @@ void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
printAnnotation(OS, Annot);
}
+void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
+ switch (reg) {
+ case AMDGPU::VCC:
+ O << "vcc";
+ return;
+ case AMDGPU::SCC:
+ O << "scc";
+ return;
+ case AMDGPU::EXEC:
+ O << "exec";
+ return;
+ case AMDGPU::M0:
+ O << "m0";
+ return;
+ default:
+ break;
+ }
+
+ // It's seems there's no way to use SIRegisterInfo here, and dealing with the
+ // giant enum of all the different shifted sets of registers is pretty
+ // unmanagable, so parse the name and reformat it to be prettier.
+ StringRef Name(getRegisterName(reg));
+
+ std::pair<StringRef, StringRef> Split = Name.split('_');
+ StringRef SubRegName = Split.first;
+ StringRef Rest = Split.second;
+
+ if (SubRegName.size() <= 4) { // Must at least be as long as "SGPR"/"VGPR".
+ O << Name;
+ return;
+ }
+
+ unsigned RegIndex;
+ StringRef RegIndexStr = SubRegName.drop_front(4);
+
+ if (RegIndexStr.getAsInteger(10, RegIndex)) {
+ O << Name;
+ return;
+ }
+
+ if (SubRegName.front() == 'V')
+ O << 'v';
+ else if (SubRegName.front() == 'S')
+ O << 's';
+ else {
+ O << Name;
+ return;
+ }
+
+ if (Rest.empty()) // Only 1 32-bit register
+ O << RegIndex;
+ else {
+ unsigned NumReg = Rest.count('_') + 2;
+ O << '[' << RegIndex << ':' << (RegIndex + NumReg - 1) << ']';
+ }
+}
+
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
@@ -30,8 +87,12 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
switch (Op.getReg()) {
// This is the default predicate state, so we don't need to print it.
- case AMDGPU::PRED_SEL_OFF: break;
- default: O << getRegisterName(Op.getReg()); break;
+ case AMDGPU::PRED_SEL_OFF:
+ break;
+
+ default:
+ printRegOperand(Op.getReg(), O);
+ break;
}
} else if (Op.isImm()) {
O << Op.getImm();
@@ -255,4 +316,21 @@ void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
}
}
+void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // Note: Mask values are taken from SIInsertWaits.cpp and not from ISA docs
+ // SIInsertWaits.cpp bits usage does not match ISA docs description but it
+ // works so it might be a misprint in docs.
+ unsigned SImm16 = MI->getOperand(OpNo).getImm();
+ unsigned Vmcnt = SImm16 & 0xF;
+ unsigned Expcnt = (SImm16 >> 4) & 0xF;
+ unsigned Lgkmcnt = (SImm16 >> 8) & 0xF;
+ if (Vmcnt != 0xF)
+ O << "vmcnt(" << Vmcnt << ") ";
+ if (Expcnt != 0x7)
+ O << "expcnt(" << Expcnt << ") ";
+ if (Lgkmcnt != 0x7)
+ O << "lgkmcnt(" << Lgkmcnt << ")";
+}
+
#include "AMDGPUGenAsmWriter.inc"
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 4c1dfa6..77af942 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -32,6 +32,7 @@ public:
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
private:
+ void printRegOperand(unsigned RegNo, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -52,6 +53,7 @@ private:
void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
} // End namespace llvm
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index 9a36903..29d0acf 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -95,7 +95,9 @@ public:
} // end anonymous namespace
-MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
StringRef CPU) {
return new ELFAMDGPUAsmBackend(T);
}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 59136f3..4a8e1b0 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -21,7 +21,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
HasStaticCtorDtorReferenceInStaticMode = false;
LinkerRequiresNonEmptyDwarfLines = true;
MaxInstLength = 16;
- PCSymbol = "$";
SeparatorString = "\n";
CommentColumn = 40;
CommentString = ";";
@@ -32,9 +31,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
InlineAsmStart = ";#ASMSTART";
InlineAsmEnd = ";#ASMEND";
AssemblerDialect = 0;
- AllowQuotesInName = false;
- AllowNameToStartWithDigit = false;
- AllowPeriodsInName = false;
//===--- Data Emission Directives -------------------------------------===//
ZeroDirective = ".zero";
@@ -56,13 +52,11 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
//===--- Global Variable Emission Directives --------------------------===//
GlobalDirective = ".global";
- ExternDirective = ".extern";
HasSetDirective = false;
HasAggressiveSymbolFolding = true;
COMMDirectiveAlignmentIsInBytes = false;
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
- HasSymbolResolver = false;
WeakRefDirective = ".weakref\t";
LinkOnceDirective = 0;
//===--- Dwarf Emission Directives -----------------------------------===//
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
new file mode 100644
index 0000000..521b3b3
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -0,0 +1,21 @@
+//===-- AMDGPUCodeEmitter.cpp - AMDGPU Code Emitter interface -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCCodeEmitter.h"
+
+using namespace llvm;
+
+// pin vtable to this file
+void AMDGPUMCCodeEmitter::anchor() {}
+
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
index cd3a7ce..d8cf64a 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -24,6 +24,7 @@ class MCInst;
class MCOperand;
class AMDGPUMCCodeEmitter : public MCCodeEmitter {
+ virtual void anchor();
public:
uint64_t getBinaryCodeForInstr(const MCInst &MI,
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 61d70bb..a1bec28 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -88,7 +88,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCCodeEmitter *_Emitter,
bool RelaxAll,
bool NoExecStack) {
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, false, false);
+ return createELFStreamer(Ctx, 0, MAB, _OS, _Emitter, false, false);
}
extern "C" void LLVMInitializeR600TargetMC() {
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index abb0320..f6b3376 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -40,8 +40,8 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT,
- StringRef CPU);
+MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
} // End llvm namespace
diff --git a/lib/Target/R600/MCTargetDesc/CMakeLists.txt b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
index 3ccdf42..98f6925 100644
--- a/lib/Target/R600/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
@@ -2,6 +2,7 @@
add_llvm_library(LLVMR600Desc
AMDGPUAsmBackend.cpp
AMDGPUELFObjectWriter.cpp
+ AMDGPUMCCodeEmitter.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUMCAsmInfo.cpp
R600MCCodeEmitter.cpp
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index f470783..dd8df65 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -24,7 +24,6 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
-#include <stdio.h>
using namespace llvm;
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index 4631c04..ee190e4 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -48,6 +48,7 @@ def : Proc<"pitcairn", SI_Itin, [FeatureSouthernIslands]>;
def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>;
def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>;
def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>;
-def : Proc<"bonaire", SI_Itin, [FeatureSouthernIslands]>;
-def : Proc<"kabini", SI_Itin, [FeatureSouthernIslands]>;
-def : Proc<"kaveri", SI_Itin, [FeatureSouthernIslands]>;
+def : Proc<"bonaire", SI_Itin, [FeatureSeaIslands]>;
+def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>;
+def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>;
+def : Proc<"hawaii", SI_Itin, [FeatureSeaIslands]>;
diff --git a/lib/Target/R600/R600ClauseMergePass.cpp b/lib/Target/R600/R600ClauseMergePass.cpp
new file mode 100644
index 0000000..33d2ca3
--- /dev/null
+++ b/lib/Target/R600/R600ClauseMergePass.cpp
@@ -0,0 +1,204 @@
+//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
+/// This pass is merging consecutive CFAlus where applicable.
+/// It needs to be called after IfCvt for best results.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "r600mergeclause"
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+static bool isCFAlu(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case AMDGPU::CF_ALU:
+ case AMDGPU::CF_ALU_PUSH_BEFORE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+class R600ClauseMergePass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+
+ unsigned getCFAluSize(const MachineInstr *MI) const;
+ bool isCFAluEnabled(const MachineInstr *MI) const;
+
+ /// IfCvt pass can generate "disabled" ALU clause marker that need to be
+ /// removed and their content affected to the previous alu clause.
+ /// This function parse instructions after CFAlu untill it find a disabled
+ /// CFAlu and merge the content, or an enabled CFAlu.
+ void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
+
+ /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
+ /// it is the case.
+ bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
+ const;
+
+public:
+ R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const;
+};
+
+char R600ClauseMergePass::ID = 0;
+
+unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
+ assert(isCFAlu(MI));
+ return MI->getOperand(
+ TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
+}
+
+bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
+ assert(isCFAlu(MI));
+ return MI->getOperand(
+ TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
+}
+
+void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
+ const {
+ int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
+ MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
+ I++;
+ do {
+ while (I!= E && !isCFAlu(I))
+ I++;
+ if (I == E)
+ return;
+ MachineInstr *MI = I++;
+ if (isCFAluEnabled(MI))
+ break;
+ CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
+ MI->eraseFromParent();
+ } while (I != E);
+}
+
+bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
+ const MachineInstr *LatrCFAlu) const {
+ assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
+ int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
+ unsigned RootInstCount = getCFAluSize(RootCFAlu),
+ LaterInstCount = getCFAluSize(LatrCFAlu);
+ unsigned CumuledInsts = RootInstCount + LaterInstCount;
+ if (CumuledInsts >= TII->getMaxAlusPerClause()) {
+ DEBUG(dbgs() << "Excess inst counts\n");
+ return false;
+ }
+ if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+ return false;
+ // Is KCache Bank 0 compatible ?
+ int Mode0Idx =
+ TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
+ int KBank0Idx =
+ TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
+ int KBank0LineIdx =
+ TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
+ if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
+ RootCFAlu->getOperand(Mode0Idx).getImm() &&
+ (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
+ RootCFAlu->getOperand(KBank0Idx).getImm() ||
+ LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
+ RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
+ DEBUG(dbgs() << "Wrong KC0\n");
+ return false;
+ }
+ // Is KCache Bank 1 compatible ?
+ int Mode1Idx =
+ TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
+ int KBank1Idx =
+ TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
+ int KBank1LineIdx =
+ TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
+ if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
+ RootCFAlu->getOperand(Mode1Idx).getImm() &&
+ (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
+ RootCFAlu->getOperand(KBank1Idx).getImm() ||
+ LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
+ RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
+ DEBUG(dbgs() << "Wrong KC0\n");
+ return false;
+ }
+ if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
+ RootCFAlu->getOperand(Mode0Idx).setImm(
+ LatrCFAlu->getOperand(Mode0Idx).getImm());
+ RootCFAlu->getOperand(KBank0Idx).setImm(
+ LatrCFAlu->getOperand(KBank0Idx).getImm());
+ RootCFAlu->getOperand(KBank0LineIdx).setImm(
+ LatrCFAlu->getOperand(KBank0LineIdx).getImm());
+ }
+ if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
+ RootCFAlu->getOperand(Mode1Idx).setImm(
+ LatrCFAlu->getOperand(Mode1Idx).getImm());
+ RootCFAlu->getOperand(KBank1Idx).setImm(
+ LatrCFAlu->getOperand(KBank1Idx).getImm());
+ RootCFAlu->getOperand(KBank1LineIdx).setImm(
+ LatrCFAlu->getOperand(KBank1LineIdx).getImm());
+ }
+ RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
+ RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
+ return true;
+}
+
+bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ MachineBasicBlock::iterator LatestCFAlu = E;
+ while (I != E) {
+ MachineInstr *MI = I++;
+ if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
+ TII->mustBeLastInClause(MI->getOpcode()))
+ LatestCFAlu = E;
+ if (!isCFAlu(MI))
+ continue;
+ cleanPotentialDisabledCFAlu(MI);
+
+ if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
+ MI->eraseFromParent();
+ } else {
+ assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
+ LatestCFAlu = MI;
+ }
+ }
+ }
+ return false;
+}
+
+const char *R600ClauseMergePass::getPassName() const {
+ return "R600 Merge Clause Markers Pass";
+}
+
+} // end anonymous namespace
+
+
+llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
+ return new R600ClauseMergePass(TM);
+}
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 715be37..ac3d8f6 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -373,15 +373,6 @@ public:
case AMDGPU::CF_ALU:
I = MI;
AluClauses.push_back(MakeALUClause(MBB, I));
- case AMDGPU::EG_ExportBuf:
- case AMDGPU::EG_ExportSwz:
- case AMDGPU::R600_ExportBuf:
- case AMDGPU::R600_ExportSwz:
- case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
- case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
- case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
- case AMDGPU::RAT_STORE_DWORD32_cm:
- case AMDGPU::RAT_STORE_DWORD64_cm:
DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
break;
@@ -491,6 +482,10 @@ public:
EmitALUClause(I, AluClauses[i], CfCount);
}
default:
+ if (TII->isExport(MI->getOpcode())) {
+ DEBUG(dbgs() << CfCount << ":"; MI->dump(););
+ CfCount++;
+ }
break;
}
}
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
index 90fc29c..1781f2a 100644
--- a/lib/Target/R600/R600Defines.h
+++ b/lib/Target/R600/R600Defines.h
@@ -44,7 +44,9 @@ namespace R600_InstFlag {
TEX_INST = (1 << 13),
ALU_INST = (1 << 14),
LDS_1A = (1 << 15),
- LDS_1A1D = (1 << 16)
+ LDS_1A1D = (1 << 16),
+ IS_EXPORT = (1 << 17),
+ LDS_1A2D = (1 << 18)
};
}
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
index fac2b47..1bbfd2b 100644
--- a/lib/Target/R600/R600EmitClauseMarkers.cpp
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -47,6 +47,11 @@ private:
break;
}
+ // These will be expanded to two ALU instructions in the
+ // ExpandSpecialInstructions pass.
+ if (TII->isLDSRetInstr(MI->getOpcode()))
+ return 2;
+
if(TII->isVector(*MI) ||
TII->isCubeOp(MI->getOpcode()) ||
TII->isReductionOp(MI->getOpcode()))
@@ -84,6 +89,7 @@ private:
switch (MI->getOpcode()) {
case AMDGPU::KILL:
case AMDGPU::RETURN:
+ case AMDGPU::IMPLICIT_DEF:
return true;
default:
return false;
@@ -105,8 +111,13 @@ private:
}
bool SubstituteKCacheBank(MachineInstr *MI,
- std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const {
+ std::vector<std::pair<unsigned, unsigned> > &CachedConsts,
+ bool UpdateInstr = true) const {
std::vector<std::pair<unsigned, unsigned> > UsedKCache;
+
+ if (!TII->isALUInstr(MI->getOpcode()) && MI->getOpcode() != AMDGPU::DOT_4)
+ return true;
+
const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Consts =
TII->getSrcs(MI);
assert((TII->isALUInstr(MI->getOpcode()) ||
@@ -139,6 +150,9 @@ private:
return false;
}
+ if (!UpdateInstr)
+ return true;
+
for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
continue;
@@ -159,6 +173,52 @@ private:
return true;
}
+ bool canClauseLocalKillFitInClause(
+ unsigned AluInstCount,
+ std::vector<std::pair<unsigned, unsigned> > KCacheBanks,
+ MachineBasicBlock::iterator Def,
+ MachineBasicBlock::iterator BBEnd) {
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+ for (MachineInstr::const_mop_iterator
+ MOI = Def->operands_begin(),
+ MOE = Def->operands_end(); MOI != MOE; ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef() ||
+ TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
+ continue;
+
+ // Def defines a clause local register, so check that its use will fit
+ // in the clause.
+ unsigned LastUseCount = 0;
+ for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
+ AluInstCount += OccupiedDwords(UseI);
+ // Make sure we won't need to end the clause due to KCache limitations.
+ if (!SubstituteKCacheBank(UseI, KCacheBanks, false))
+ return false;
+
+ // We have reached the maximum instruction limit before finding the
+ // use that kills this register, so we cannot use this def in the
+ // current clause.
+ if (AluInstCount >= TII->getMaxAlusPerClause())
+ return false;
+
+ // Register kill flags have been cleared by the time we get to this
+ // pass, but it is safe to assume that all uses of this register
+ // occur in the same basic block as its definition, because
+ // it is illegal for the scheduler to schedule them in
+ // different blocks.
+ if (UseI->findRegisterUseOperandIdx(MOI->getReg()))
+ LastUseCount = AluInstCount;
+
+ if (UseI != Def && UseI->findRegisterDefOperandIdx(MOI->getReg()) != -1)
+ break;
+ }
+ if (LastUseCount)
+ return LastUseCount <= TII->getMaxAlusPerClause();
+ llvm_unreachable("Clause local register live at end of clause.");
+ }
+ return true;
+ }
+
MachineBasicBlock::iterator
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator ClauseHead = I;
@@ -173,6 +233,14 @@ private:
if (AluInstCount > TII->getMaxAlusPerClause())
break;
if (I->getOpcode() == AMDGPU::PRED_X) {
+ // We put PRED_X in its own clause to ensure that ifcvt won't create
+ // clauses with more than 128 insts.
+ // IfCvt is indeed checking that "then" and "else" branches of an if
+ // statement have less than ~60 insts thus converted clauses can't be
+ // bigger than ~121 insts (predicate setter needs to be in the same
+ // clause as predicated alus).
+ if (AluInstCount > 0)
+ break;
if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH)
PushBeforeModifier = true;
AluInstCount ++;
@@ -189,11 +257,13 @@ private:
I++;
break;
}
- if (TII->isALUInstr(I->getOpcode()) &&
- !SubstituteKCacheBank(I, KCacheBanks))
+
+ // If this instruction defines a clause local register, make sure
+ // its use can fit in this clause.
+ if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
break;
- if (I->getOpcode() == AMDGPU::DOT_4 &&
- !SubstituteKCacheBank(I, KCacheBanks))
+
+ if (!SubstituteKCacheBank(I, KCacheBanks))
break;
AluInstCount += OccupiedDwords(I);
}
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
index 67b42d7..aeee4aa 100644
--- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -68,6 +68,23 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
I = llvm::next(I);
+ // Expand LDS_*_RET instructions
+ if (TII->isLDSRetInstr(MI.getOpcode())) {
+ int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
+ assert(DstIdx != -1);
+ MachineOperand &DstOp = MI.getOperand(DstIdx);
+ MachineInstr *Mov = TII->buildMovInstr(&MBB, I,
+ DstOp.getReg(), AMDGPU::OQAP);
+ DstOp.setReg(AMDGPU::OQAP);
+ int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::pred_sel);
+ int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(),
+ AMDGPU::OpName::pred_sel);
+ // Copy the pred_sel bit
+ Mov->getOperand(MovPredSelIdx).setReg(
+ MI.getOperand(LDSPredSelIdx).getReg());
+ }
+
switch (MI.getOpcode()) {
default: break;
// Expand PRED_X to one of the PRED_SET instructions.
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index ce6ac89..0fcb488 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -38,14 +38,24 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
computeRegisterProperties();
- setOperationAction(ISD::FADD, MVT::v4f32, Expand);
- setOperationAction(ISD::FADD, MVT::v2f32, Expand);
- setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
- setOperationAction(ISD::FMUL, MVT::v2f32, Expand);
- setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
- setOperationAction(ISD::FDIV, MVT::v2f32, Expand);
- setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
- setOperationAction(ISD::FSUB, MVT::v2f32, Expand);
+ // Set condition code actions
+ setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
+
+ setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
+ setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Custom);
setOperationAction(ISD::FSIN, MVT::f32, Custom);
@@ -69,21 +79,33 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
- setOperationAction(ISD::SELECT, MVT::i32, Custom);
- setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+
+ // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
+ // spaces, so it is custom lowered to handle those where it isn't.
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
+
setOperationAction(ISD::STORE, MVT::i8, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setTruncStoreAction(MVT::i32, MVT::i8, Custom);
+ setTruncStoreAction(MVT::i32, MVT::i16, Custom);
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
@@ -99,7 +121,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setBooleanContents(ZeroOrNegativeOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- setSchedulingPreference(Sched::VLIW);
+ setSchedulingPreference(Sched::Source);
}
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
@@ -111,7 +133,25 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
switch (MI->getOpcode()) {
- default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ default:
+ // Replace LDS_*_RET instruction that don't have any uses with the
+ // equivalent LDS_*_NORET instruction.
+ if (TII->isLDSRetInstr(MI->getOpcode())) {
+ int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ assert(DstIdx != -1);
+ MachineInstrBuilder NewMI;
+ if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
+ return BB;
+
+ NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
+ TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
+ for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
+ NewMI.addOperand(MI->getOperand(i));
+ }
+ } else {
+ return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ }
+ break;
case AMDGPU::CLAMP_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
AMDGPU::MOV,
@@ -147,19 +187,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
break;
}
- case AMDGPU::LDS_READ_RET: {
- MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
- TII->get(MI->getOpcode()),
- AMDGPU::OQAP);
- for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
- NewMI.addOperand(MI->getOperand(i));
- }
- TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
- MI->getOperand(0).getReg(),
- AMDGPU::OQAP);
- break;
- }
-
case AMDGPU::MOV_IMM_F32:
TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
MI->getOperand(1).getFPImm()->getValueAPF()
@@ -486,10 +513,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::FCOS:
case ISD::FSIN: return LowerTrig(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
- case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
@@ -554,7 +579,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
DL, MVT::f32, SDValue(interp, 0));
}
-
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
@@ -576,6 +600,24 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
RegisterJNode, RegisterINode);
return SDValue(interp, slot % 2);
}
+ case AMDGPUIntrinsic::R600_interp_xy:
+ case AMDGPUIntrinsic::R600_interp_zw: {
+ int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ MachineSDNode *interp;
+ SDValue RegisterINode = Op.getOperand(2);
+ SDValue RegisterJNode = Op.getOperand(3);
+
+ if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
+ interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
+ RegisterJNode, RegisterINode);
+ else
+ interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
+ RegisterJNode, RegisterINode);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
+ SDValue(interp, 0), SDValue(interp, 1));
+ }
case AMDGPUIntrinsic::R600_tex:
case AMDGPUIntrinsic::R600_texc:
case AMDGPUIntrinsic::R600_txl:
@@ -585,7 +627,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case AMDGPUIntrinsic::R600_txf:
case AMDGPUIntrinsic::R600_txq:
case AMDGPUIntrinsic::R600_ddx:
- case AMDGPUIntrinsic::R600_ddy: {
+ case AMDGPUIntrinsic::R600_ddy:
+ case AMDGPUIntrinsic::R600_ldptr: {
unsigned TextureOp;
switch (IntrinsicID) {
case AMDGPUIntrinsic::R600_tex:
@@ -618,6 +661,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case AMDGPUIntrinsic::R600_ddy:
TextureOp = 9;
break;
+ case AMDGPUIntrinsic::R600_ldptr:
+ TextureOp = 10;
+ break;
default:
llvm_unreachable("Unknow Texture Operation");
}
@@ -792,20 +838,6 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
false, false, false, 0);
}
-SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
-
- MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
-
- FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
- assert(FIN);
-
- unsigned FrameIndex = FIN->getIndex();
- unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
- return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
-}
-
bool R600TargetLowering::isZero(SDValue Op) const {
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
return Cst->isNullValue();
@@ -836,16 +868,27 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
//
// SET* can match the following patterns:
//
- // select_cc f32, f32, -1, 0, cc_any
- // select_cc f32, f32, 1.0f, 0.0f, cc_any
- // select_cc i32, i32, -1, 0, cc_any
+ // select_cc f32, f32, -1, 0, cc_supported
+ // select_cc f32, f32, 1.0f, 0.0f, cc_supported
+ // select_cc i32, i32, -1, 0, cc_supported
//
// Move hardware True/False values to the correct operand.
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ ISD::CondCode InverseCC =
+ ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
if (isHWTrueValue(False) && isHWFalseValue(True)) {
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- std::swap(False, True);
- CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
+ if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
+ std::swap(False, True);
+ CC = DAG.getCondCode(InverseCC);
+ } else {
+ ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
+ if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
+ std::swap(False, True);
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(SwapInvCC);
+ }
+ }
}
if (isHWTrueValue(True) && isHWFalseValue(False) &&
@@ -858,14 +901,34 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
//
// CND* can match the following patterns:
//
- // select_cc f32, 0.0, f32, f32, cc_any
- // select_cc f32, 0.0, i32, i32, cc_any
- // select_cc i32, 0, f32, f32, cc_any
- // select_cc i32, 0, i32, i32, cc_any
+ // select_cc f32, 0.0, f32, f32, cc_supported
+ // select_cc f32, 0.0, i32, i32, cc_supported
+ // select_cc i32, 0, f32, f32, cc_supported
+ // select_cc i32, 0, i32, i32, cc_supported
//
- if (isZero(LHS) || isZero(RHS)) {
- SDValue Cond = (isZero(LHS) ? RHS : LHS);
- SDValue Zero = (isZero(LHS) ? LHS : RHS);
+
+ // Try to move the zero value to the RHS
+ if (isZero(LHS)) {
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ // Try swapping the operands
+ ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
+ if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(CCSwapped);
+ } else {
+ // Try inverting the conditon and then swapping the operands
+ ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
+ CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
+ if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
+ std::swap(True, False);
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(CCSwapped);
+ }
+ }
+ }
+ if (isZero(RHS)) {
+ SDValue Cond = LHS;
+ SDValue Zero = RHS;
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
if (CompareVT != VT) {
// Bitcast True / False to the correct types. This will end up being
@@ -875,20 +938,11 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
}
- if (isZero(LHS)) {
- CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
- }
switch (CCOpcode) {
case ISD::SETONE:
case ISD::SETUNE:
case ISD::SETNE:
- case ISD::SETULE:
- case ISD::SETULT:
- case ISD::SETOLE:
- case ISD::SETOLT:
- case ISD::SETLE:
- case ISD::SETLT:
CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Temp = True;
True = False;
@@ -936,17 +990,6 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
DAG.getCondCode(ISD::SETNE));
}
-SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
- return DAG.getNode(ISD::SELECT_CC,
- SDLoc(Op),
- Op.getValueType(),
- Op.getOperand(0),
- DAG.getConstant(0, MVT::i32),
- Op.getOperand(1),
- Op.getOperand(2),
- DAG.getCondCode(ISD::SETNE));
-}
-
/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
/// convert these pointers to a register index. Each register holds
/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
@@ -1009,19 +1052,59 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue Value = Op.getOperand(1);
SDValue Ptr = Op.getOperand(2);
- if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
- Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
- // Convert pointer from byte address to dword address.
- Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
- DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
- Ptr, DAG.getConstant(2, MVT::i32)));
+ SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
+ if (Result.getNode()) {
+ return Result;
+ }
- if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
- assert(!"Truncated and indexed stores not supported yet");
- } else {
- Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
+ if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
+ if (StoreNode->isTruncatingStore()) {
+ EVT VT = Value.getValueType();
+ assert(VT.bitsLE(MVT::i32));
+ EVT MemVT = StoreNode->getMemoryVT();
+ SDValue MaskConstant;
+ if (MemVT == MVT::i8) {
+ MaskConstant = DAG.getConstant(0xFF, MVT::i32);
+ } else {
+ assert(MemVT == MVT::i16);
+ MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
+ }
+ SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
+ DAG.getConstant(2, MVT::i32));
+ SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(0x00000003, VT));
+ SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
+ DAG.getConstant(3, VT));
+ SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
+ SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
+ // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
+ // vector instead.
+ SDValue Src[4] = {
+ ShiftedValue,
+ DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ Mask
+ };
+ SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
+ SDValue Args[3] = { Chain, Input, DWordAddr };
+ return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
+ Op->getVTList(), Args, 3, MemVT,
+ StoreNode->getMemOperand());
+ } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
+ Value.getValueType().bitsGE(MVT::i32)) {
+ // Convert pointer from byte address to dword address.
+ Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
+ DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
+ Ptr, DAG.getConstant(2, MVT::i32)));
+
+ if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
+ assert(!"Truncated and indexed stores not supported yet");
+ } else {
+ Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
+ }
+ return Chain;
}
- return Chain;
}
EVT ValueVT = Value.getValueType();
@@ -1121,12 +1204,22 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SDValue Ptr = Op.getOperand(1);
SDValue LoweredLoad;
+ if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
+ SDValue MergedValues[2] = {
+ SplitVectorLoad(Op, DAG),
+ Chain
+ };
+ return DAG.getMergeValues(MergedValues, 2, DL);
+ }
+
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
- if (ConstantBlock > -1) {
+ if (ConstantBlock > -1 &&
+ ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
+ (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
SDValue Result;
- if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
- dyn_cast<Constant>(LoadNode->getSrcValue()) ||
- dyn_cast<ConstantSDNode>(Ptr)) {
+ if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
+ isa<Constant>(LoadNode->getSrcValue()) ||
+ isa<ConstantSDNode>(Ptr)) {
SDValue Slots[4];
for (unsigned i = 0; i < 4; i++) {
// We want Const position encoded with the following formula :
@@ -1166,13 +1259,13 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
return DAG.getMergeValues(MergedValues, 2, DL);
}
- // For most operations returning SDValue() will result int he node being
- // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
- // we need to manually expand loads that may be legal in some address spaces
- // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
- // for compute shaders, since the data is sign extended when it is uploaded
- // to the buffer. Howerver SEXT loads from other addresspaces are not
- // supported, so we need to expand them here.
+ // For most operations returning SDValue() will result in the node being
+ // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
+ // need to manually expand loads that may be legal in some address spaces and
+ // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
+ // compute shaders, since the data is sign extended when it is uploaded to the
+ // buffer. However SEXT loads from other address spaces are not supported, so
+ // we need to expand them here.
if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
EVT MemVT = LoadNode->getMemoryVT();
assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
@@ -1252,23 +1345,39 @@ SDValue R600TargetLowering::LowerFormalArguments(
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
+
+ SmallVector<ISD::InputArg, 8> LocalIns;
+
+ getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
+ LocalIns);
- AnalyzeFormalArguments(CCInfo, Ins);
+ AnalyzeFormalArguments(CCInfo, LocalIns);
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
CCValAssign &VA = ArgLocs[i];
- EVT VT = VA.getLocVT();
+ EVT VT = Ins[i].VT;
+ EVT MemVT = LocalIns[i].VT;
+
+ if (ShaderType != ShaderType::COMPUTE) {
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
+ SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ InVals.push_back(Register);
+ continue;
+ }
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::CONSTANT_BUFFER_0);
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
- SDValue Arg = DAG.getLoad(VT, DL, Chain,
- DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
- MachinePointerInfo(UndefValue::get(PtrTy)), false,
- false, false, 4); // 4 is the prefered alignment for
- // the CONSTANT memory space.
+ SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
+ DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
+ MachinePointerInfo(UndefValue::get(PtrTy)),
+ MemVT, false, false, 4);
+ // 4 is the prefered alignment for
+ // the CONSTANT memory space.
InVals.push_back(Arg);
}
return Chain;
@@ -1292,6 +1401,11 @@ CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
};
for (unsigned i = 0; i < 4; i++) {
+ if (NewBldVec[i].getOpcode() == ISD::UNDEF)
+ // We mask write here to teach later passes that the ith element of this
+ // vector is undef. Thus we can use it to reduce 128 bits reg usage,
+ // break false dependencies and additionnaly make assembly easier to read.
+ RemapSwizzle[i] = 7; // SEL_MASK_WRITE
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
if (C->isZero()) {
RemapSwizzle[i] = 4; // SEL_0
@@ -1335,12 +1449,16 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
->getZExtValue();
- if (!isUnmovable[Idx]) {
- // Swap i and Idx
- std::swap(NewBldVec[Idx], NewBldVec[i]);
- std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
+ if (i == Idx) {
+ isUnmovable[Idx] = true;
+ continue;
}
- isUnmovable[Idx] = true;
+ if (isUnmovable[Idx])
+ continue;
+ // Swap i and Idx
+ std::swap(NewBldVec[Idx], NewBldVec[i]);
+ std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
+ break;
}
}
@@ -1422,8 +1540,8 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
break;
}
- // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
- // => build_vector elt0, …, NewEltIdx, …, eltN
+ // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
+ // => build_vector elt0, ... , NewEltIdx, ... , eltN
case ISD::INSERT_VECTOR_ELT: {
SDValue InVec = N->getOperand(0);
SDValue InVal = N->getOperand(1);
@@ -1525,15 +1643,20 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
LHSCC = ISD::getSetCCInverse(LHSCC,
LHS.getOperand(0).getValueType().isInteger());
- return DAG.getSelectCC(SDLoc(N),
- LHS.getOperand(0),
- LHS.getOperand(1),
- LHS.getOperand(2),
- LHS.getOperand(3),
- LHSCC);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
+ return DAG.getSelectCC(SDLoc(N),
+ LHS.getOperand(0),
+ LHS.getOperand(1),
+ LHS.getOperand(2),
+ LHS.getOperand(3),
+ LHSCC);
+ break;
}
}
+ return SDValue();
}
+
case AMDGPUISD::EXPORT: {
SDValue Arg = N->getOperand(1);
if (Arg.getOpcode() != ISD::BUILD_VECTOR)
@@ -1586,3 +1709,253 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
}
return SDValue();
}
+
+static bool
+FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
+ SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
+ if (!Src.isMachineOpcode())
+ return false;
+ switch (Src.getMachineOpcode()) {
+ case AMDGPU::FNEG_R600:
+ if (!Neg.getNode())
+ return false;
+ Src = Src.getOperand(0);
+ Neg = DAG.getTargetConstant(1, MVT::i32);
+ return true;
+ case AMDGPU::FABS_R600:
+ if (!Abs.getNode())
+ return false;
+ Src = Src.getOperand(0);
+ Abs = DAG.getTargetConstant(1, MVT::i32);
+ return true;
+ case AMDGPU::CONST_COPY: {
+ unsigned Opcode = ParentNode->getMachineOpcode();
+ bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+
+ if (!Sel.getNode())
+ return false;
+
+ SDValue CstOffset = Src.getOperand(0);
+ if (ParentNode->getValueType(0).isVector())
+ return false;
+
+ // Gather constants values
+ int SrcIndices[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
+ };
+ std::vector<unsigned> Consts;
+ for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
+ int OtherSrcIdx = SrcIndices[i];
+ int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
+ if (OtherSrcIdx < 0 || OtherSelIdx < 0)
+ continue;
+ if (HasDst) {
+ OtherSrcIdx--;
+ OtherSelIdx--;
+ }
+ if (RegisterSDNode *Reg =
+ dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
+ if (Reg->getReg() == AMDGPU::ALU_CONST) {
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
+ ParentNode->getOperand(OtherSelIdx));
+ Consts.push_back(Cst->getZExtValue());
+ }
+ }
+ }
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+ Consts.push_back(Cst->getZExtValue());
+ if (!TII->fitsConstReadLimitations(Consts)) {
+ return false;
+ }
+
+ Sel = CstOffset;
+ Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ return true;
+ }
+ case AMDGPU::MOV_IMM_I32:
+ case AMDGPU::MOV_IMM_F32: {
+ unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
+ uint64_t ImmValue = 0;
+
+
+ if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
+ ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
+ float FloatValue = FPC->getValueAPF().convertToFloat();
+ if (FloatValue == 0.0) {
+ ImmReg = AMDGPU::ZERO;
+ } else if (FloatValue == 0.5) {
+ ImmReg = AMDGPU::HALF;
+ } else if (FloatValue == 1.0) {
+ ImmReg = AMDGPU::ONE;
+ } else {
+ ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
+ }
+ } else {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
+ uint64_t Value = C->getZExtValue();
+ if (Value == 0) {
+ ImmReg = AMDGPU::ZERO;
+ } else if (Value == 1) {
+ ImmReg = AMDGPU::ONE_INT;
+ } else {
+ ImmValue = Value;
+ }
+ }
+
+ // Check that we aren't already using an immediate.
+ // XXX: It's possible for an instruction to have more than one
+ // immediate operand, but this is not supported yet.
+ if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+ if (!Imm.getNode())
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
+ assert(C);
+ if (C->getZExtValue())
+ return false;
+ Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
+ }
+ Src = DAG.getRegister(ImmReg, MVT::i32);
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+
+/// \brief Fold the instructions after selecting them
+SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
+ SelectionDAG &DAG) const {
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
+ if (!Node->isMachineOpcode())
+ return Node;
+ unsigned Opcode = Node->getMachineOpcode();
+ SDValue FakeOp;
+
+ std::vector<SDValue> Ops;
+ for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
+ I != E; ++I)
+ Ops.push_back(*I);
+
+ if (Opcode == AMDGPU::DOT_4) {
+ int OperandIdx[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
+ };
+ int NegIdx[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
+ };
+ int AbsIdx[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
+ };
+ for (unsigned i = 0; i < 8; i++) {
+ if (OperandIdx[i] < 0)
+ return Node;
+ SDValue &Src = Ops[OperandIdx[i] - 1];
+ SDValue &Neg = Ops[NegIdx[i] - 1];
+ SDValue &Abs = Ops[AbsIdx[i] - 1];
+ bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+ int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
+ if (HasDst)
+ SelIdx--;
+ SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
+ if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
+ return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
+ }
+ } else if (Opcode == AMDGPU::REG_SEQUENCE) {
+ for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
+ SDValue &Src = Ops[i];
+ if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
+ return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
+ }
+ } else if (Opcode == AMDGPU::CLAMP_R600) {
+ SDValue Src = Node->getOperand(0);
+ if (!Src.isMachineOpcode() ||
+ !TII->hasInstrModifiers(Src.getMachineOpcode()))
+ return Node;
+ int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
+ AMDGPU::OpName::clamp);
+ if (ClampIdx < 0)
+ return Node;
+ std::vector<SDValue> Ops;
+ unsigned NumOp = Src.getNumOperands();
+ for(unsigned i = 0; i < NumOp; ++i)
+ Ops.push_back(Src.getOperand(i));
+ Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
+ return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
+ Node->getVTList(), Ops);
+ } else {
+ if (!TII->hasInstrModifiers(Opcode))
+ return Node;
+ int OperandIdx[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
+ };
+ int NegIdx[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
+ };
+ int AbsIdx[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
+ -1
+ };
+ for (unsigned i = 0; i < 3; i++) {
+ if (OperandIdx[i] < 0)
+ return Node;
+ SDValue &Src = Ops[OperandIdx[i] - 1];
+ SDValue &Neg = Ops[NegIdx[i] - 1];
+ SDValue FakeAbs;
+ SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
+ bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+ int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
+ int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
+ if (HasDst) {
+ SelIdx--;
+ ImmIdx--;
+ }
+ SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
+ SDValue &Imm = Ops[ImmIdx];
+ if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
+ return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
+ }
+ }
+
+ return Node;
+}
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index a033fcb..c10257e 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -56,11 +56,9 @@ private:
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
@@ -68,6 +66,7 @@ private:
void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
unsigned &Channel, unsigned &PtrIncr) const;
bool isZero(SDValue Op) const;
+ virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
};
} // End namespace llvm;
diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td
index 2d72404..9428bab 100644
--- a/lib/Target/R600/R600InstrFormats.td
+++ b/lib/Target/R600/R600InstrFormats.td
@@ -16,7 +16,6 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
: AMDGPUInst <outs, ins, asm, pattern> {
field bits<64> Inst;
- bit TransOnly = 0;
bit Trig = 0;
bit Op3 = 0;
bit isVector = 0;
@@ -29,6 +28,8 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
bit VTXInst = 0;
bit TEXInst = 0;
bit ALUInst = 0;
+ bit IsExport = 0;
+ bit LDS_1A2D = 0;
let Namespace = "AMDGPU";
let OutOperandList = outs;
@@ -37,7 +38,6 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
let Pattern = pattern;
let Itinerary = itin;
- let TSFlags{0} = TransOnly;
let TSFlags{4} = Trig;
let TSFlags{5} = Op3;
@@ -53,6 +53,8 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
let TSFlags{14} = ALUInst;
let TSFlags{15} = LDS_1A;
let TSFlags{16} = LDS_1A1D;
+ let TSFlags{17} = IsExport;
+ let TSFlags{18} = LDS_1A2D;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 4e7eff9..c0827fc 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -23,7 +23,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenDFAPacketizer.inc"
using namespace llvm;
@@ -77,16 +77,16 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
}
-MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
- unsigned DstReg, int64_t Imm) const {
- MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
- MachineInstrBuilder MIB(*MF, MI);
- MIB.addReg(DstReg, RegState::Define);
- MIB.addReg(AMDGPU::ALU_LITERAL_X);
- MIB.addImm(Imm);
- MIB.addReg(0); // PREDICATE_BIT
-
- return MI;
+/// \returns true if \p MBBI can be moved into a new basic.
+bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {
+ for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
+ E = MBBI->operands_end(); I != E; ++I) {
+ if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
+ I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
+ return false;
+ }
+ return true;
}
unsigned R600InstrInfo::getIEQOpcode() const {
@@ -149,17 +149,58 @@ bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
unsigned TargetFlags = get(Opcode).TSFlags;
return ((TargetFlags & R600_InstFlag::LDS_1A) |
- (TargetFlags & R600_InstFlag::LDS_1A1D));
+ (TargetFlags & R600_InstFlag::LDS_1A1D) |
+ (TargetFlags & R600_InstFlag::LDS_1A2D));
+}
+
+bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
+ return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
+}
+
+bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
+ return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
+}
+
+bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const {
+ if (isALUInstr(MI->getOpcode()))
+ return true;
+ if (isVector(*MI) || isCubeOp(MI->getOpcode()))
+ return true;
+ switch (MI->getOpcode()) {
+ case AMDGPU::PRED_X:
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::COPY:
+ case AMDGPU::DOT_4:
+ return true;
+ default:
+ return false;
+ }
}
bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
- return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY);
+ if (ST.hasCaymanISA())
+ return false;
+ return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
}
bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
return isTransOnly(MI->getOpcode());
}
+bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
+ return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
+}
+
+bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const {
+ return isVectorOnly(MI->getOpcode());
+}
+
+bool R600InstrInfo::isExport(unsigned Opcode) const {
+ return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
+}
+
bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
return ST.hasVertexCache() && IS_VTX(get(Opcode));
}
@@ -189,6 +230,30 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
}
}
+bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
+ return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
+}
+
+bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
+ return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
+}
+
+bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
+ if (!isALUInstr(MI->getOpcode())) {
+ return false;
+ }
+ for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+ E = MI->operands_end(); I != E; ++I) {
+ if (!I->isReg() || !I->isUse() ||
+ TargetRegisterInfo::isVirtualRegister(I->getReg()))
+ continue;
+
+ if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
+ return true;
+ }
+ return false;
+}
+
int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
static const unsigned OpTable[] = {
AMDGPU::OpName::src0,
@@ -321,6 +386,8 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI,
static std::vector<std::pair<int, unsigned> >
Swizzle(std::vector<std::pair<int, unsigned> > Src,
R600InstrInfo::BankSwizzle Swz) {
+ if (Src[0] == Src[1])
+ Src[1].first = -1;
switch (Swz) {
case R600InstrInfo::ALU_VEC_012_SCL_210:
break;
@@ -462,6 +529,9 @@ static bool
isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
const std::vector<std::pair<int, unsigned> > &TransOps,
unsigned ConstCount) {
+ // TransALU can't read 3 constants
+ if (ConstCount > 2)
+ return false;
for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
const std::pair<int, unsigned> &Src = TransOps[i];
unsigned Cycle = getTransSwizzle(TransSwz, i);
@@ -615,6 +685,11 @@ bool isJump(unsigned Opcode) {
return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
}
+static bool isBranch(unsigned Opcode) {
+ return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
+ Opcode == AMDGPU::BRANCH_COND_f32;
+}
+
bool
R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
@@ -633,6 +708,10 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
--I;
}
+ // AMDGPU::BRANCH* instructions are only available after isel and are not
+ // handled
+ if (isBranch(I->getOpcode()))
+ return true;
if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
return false;
}
@@ -942,6 +1021,20 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI,
return true;
}
+ if (MI->getOpcode() == AMDGPU::DOT_4) {
+ MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X))
+ .setReg(Pred[2].getReg());
+ MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y))
+ .setReg(Pred[2].getReg());
+ MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z))
+ .setReg(Pred[2].getReg());
+ MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W))
+ .setReg(Pred[2].getReg());
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+ return true;
+ }
+
if (PIdx != -1) {
MachineOperand &PMO = MI->getOperand(PIdx);
PMO.setReg(Pred[2].getReg());
@@ -953,6 +1046,10 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI,
return false;
}
+unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const {
+ return 2;
+}
+
unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -961,67 +1058,25 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return 2;
}
-int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int Offset = 0;
-
- if (MFI->getNumObjects() == 0) {
- return -1;
- }
-
- if (MRI.livein_empty()) {
- return 0;
- }
-
- for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
- LE = MRI.livein_end();
- LI != LE; ++LI) {
- Offset = std::max(Offset,
- GET_REG_INDEX(RI.getEncodingValue(LI->first)));
- }
-
- return Offset + 1;
-}
-
-int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
- int Offset = 0;
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Variable sized objects are not supported
- assert(!MFI->hasVarSizedObjects());
-
- if (MFI->getNumObjects() == 0) {
- return -1;
- }
-
- Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
-
- return getIndirectIndexBegin(MF) + Offset;
-}
-
-std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
+void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
const AMDGPUFrameLowering *TFL =
static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
- std::vector<unsigned> Regs;
unsigned StackWidth = TFL->getStackWidth(MF);
int End = getIndirectIndexEnd(MF);
- if (End == -1) {
- return Regs;
- }
+ if (End == -1)
+ return;
for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
- Regs.push_back(SuperReg);
+ Reserved.set(SuperReg);
for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
- Regs.push_back(Reg);
+ Reserved.set(Reg);
}
}
- return Regs;
}
unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
@@ -1031,13 +1086,8 @@ unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
return RegIndex;
}
-const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
- unsigned SourceReg) const {
- return &AMDGPU::R600_TReg32RegClass;
-}
-
-const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
- return &AMDGPU::TRegMemRegClass;
+const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
+ return &AMDGPU::R600_TReg32_XRegClass;
}
MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
@@ -1076,10 +1126,6 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
return Mov;
}
-const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
- return &AMDGPU::IndirectRegRegClass;
-}
-
unsigned R600InstrInfo::getMaxAlusPerClause() const {
return 115;
}
@@ -1197,6 +1243,11 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
AMDGPU::OpName::src1_sel,
};
+ MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
+ getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
+ MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
+ .setReg(MO.getReg());
+
for (unsigned i = 0; i < 14; i++) {
MachineOperand &MO = MI->getOperand(
getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
@@ -1217,6 +1268,12 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
return MovImm;
}
+MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg) const {
+ return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
+}
+
int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
return getOperandIdx(MI.getOpcode(), Op);
}
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index cdaa2fb..13d9810 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -55,6 +55,8 @@ namespace llvm {
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
+ bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
bool isTrig(const MachineInstr &MI) const;
bool isPlaceHolderOpcode(unsigned opcode) const;
@@ -65,9 +67,18 @@ namespace llvm {
bool isALUInstr(unsigned Opcode) const;
bool hasInstrModifiers(unsigned Opcode) const;
bool isLDSInstr(unsigned Opcode) const;
+ bool isLDSNoRetInstr(unsigned Opcode) const;
+ bool isLDSRetInstr(unsigned Opcode) const;
+
+ /// \returns true if this \p Opcode represents an ALU instruction or an
+ /// instruction that will be lowered in ExpandSpecialInstrs Pass.
+ bool canBeConsideredALU(const MachineInstr *MI) const;
bool isTransOnly(unsigned Opcode) const;
bool isTransOnly(const MachineInstr *MI) const;
+ bool isVectorOnly(unsigned Opcode) const;
+ bool isVectorOnly(const MachineInstr *MI) const;
+ bool isExport(unsigned Opcode) const;
bool usesVertexCache(unsigned Opcode) const;
bool usesVertexCache(const MachineInstr *MI) const;
@@ -75,6 +86,9 @@ namespace llvm {
bool usesTextureCache(const MachineInstr *MI) const;
bool mustBeLastInClause(unsigned Opcode) const;
+ bool usesAddressRegister(MachineInstr *MI) const;
+ bool definesAddressRegister(MachineInstr *MI) const;
+ bool readsLDSSrcReg(const MachineInstr *MI) const;
/// \returns The operand index for the given source number. Legal values
/// for SrcNum are 0, 1, and 2.
@@ -128,9 +142,6 @@ namespace llvm {
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;
- virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const;
-
virtual unsigned getIEQOpcode() const;
virtual bool isMov(unsigned Opcode) const;
@@ -177,6 +188,8 @@ namespace llvm {
bool PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const;
+ unsigned int getPredicationCost(const MachineInstr *) const;
+
unsigned int getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost = 0) const;
@@ -184,22 +197,14 @@ namespace llvm {
virtual int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const { return 1;}
- /// \returns a list of all the registers that may be accesed using indirect
- /// addressing.
- std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const;
-
- virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
-
- virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
-
+ /// \brief Reserve the registers that may be accesed using indirect addressing.
+ void reserveIndirectRegisters(BitVector &Reserved,
+ const MachineFunction &MF) const;
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
unsigned Channel) const;
- virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
- unsigned SourceReg) const;
-
- virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+ virtual const TargetRegisterClass *getIndirectAddrRegClass() const;
virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
@@ -211,8 +216,6 @@ namespace llvm {
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const;
- virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
-
unsigned getMaxAlusPerClause() const;
///buildDefaultInstruction - This function returns a MachineInstr with
@@ -239,6 +242,10 @@ namespace llvm {
unsigned DstReg,
uint64_t Imm) const;
+ MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg) const;
+
/// \brief Get the index of Op in the MachineInstr.
///
/// \returns -1 if the Instruction does not contain the specified \p Op.
@@ -272,6 +279,12 @@ namespace llvm {
void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
};
+namespace AMDGPU {
+
+int getLDSNoRetOp(uint16_t Opcode);
+
+} //End namespace AMDGPU
+
} // End llvm namespace
#endif // R600INSTRINFO_H_
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 7e61b18..0346e24 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -75,7 +75,6 @@ def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
-def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
@@ -230,7 +229,7 @@ def TEX_RECT : PatLeaf<
def TEX_ARRAY : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
- return TType == 9 || TType == 10 || TType == 15 || TType == 16;
+ return TType == 9 || TType == 10 || TType == 16;
}]
>;
@@ -241,12 +240,26 @@ def TEX_SHADOW_ARRAY : PatLeaf<
}]
>;
-class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> mask, dag outs,
- dag ins, string asm, list<dag> pattern> :
+def TEX_MSAA : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 14;
+ }]
+>;
+
+def TEX_ARRAY_MSAA : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 15;
+ }]
+>;
+
+class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
+ dag outs, dag ins, string asm, list<dag> pattern> :
InstR600ISA <outs, ins, asm, pattern>,
CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF {
- let rat_id = 0;
+ let rat_id = ratid;
let rat_inst = ratinst;
let rim = 0;
// XXX: Have a separate instruction for non-indexed writes.
@@ -264,6 +277,7 @@ class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> mask, dag outs,
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
+ let IsExport = 1;
}
@@ -403,7 +417,7 @@ def INTERP_VEC_LOAD : AMDGPUShaderInst <
(outs R600_Reg128:$dst),
(ins i32imm:$src0),
"INTERP_LOAD $src0 : $dst",
- []>;
+ [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>;
def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
let bank_swizzle = 5;
@@ -537,6 +551,7 @@ class ExportSwzInst : InstR600ISA<(
let elem_size = 3;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
+ let IsExport = 1;
}
} // End usesCustomInserter = 1
@@ -550,6 +565,7 @@ class ExportBufInst : InstR600ISA<(
let elem_size = 0;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
+ let IsExport = 1;
}
//===----------------------------------------------------------------------===//
@@ -573,6 +589,7 @@ i32imm:$COUNT, i32imm:$Enabled),
let ALT_CONST = 0;
let WHOLE_QUAD_MODE = 0;
let BARRIER = 1;
+ let UseNamedOperandTable = 1;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
@@ -672,42 +689,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
def SETE : R600_2OP <
0x08, "SETE",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))]
>;
def SGT : R600_2OP <
0x09, "SETGT",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))]
>;
def SGE : R600_2OP <
0xA, "SETGE",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))]
>;
def SNE : R600_2OP <
0xB, "SETNE",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))]
>;
def SETE_DX10 : R600_2OP <
0xC, "SETE_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))]
>;
def SETGT_DX10 : R600_2OP <
0xD, "SETGT_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))]
>;
def SETGE_DX10 : R600_2OP <
0xE, "SETGE_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))]
>;
def SETNE_DX10 : R600_2OP <
0xF, "SETNE_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))]
>;
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
@@ -805,12 +822,12 @@ def CNDE_INT : R600_3OP <
def CNDGE_INT : R600_3OP <
0x1E, "CNDGE_INT",
- [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))]
+ [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGE))]
>;
def CNDGT_INT : R600_3OP <
0x1D, "CNDGT_INT",
- [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))]
+ [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGT))]
>;
//===----------------------------------------------------------------------===//
@@ -863,6 +880,9 @@ def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">;
def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">;
def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">;
def TEX_LD : R600_TEX <0x03, "TEX_LD">;
+def TEX_LDPTR : R600_TEX <0x03, "TEX_LDPTR"> {
+ let INST_MOD = 1;
+}
def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">;
def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">;
def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">;
@@ -881,6 +901,7 @@ defm : TexPattern<6, TEX_LD, v4i32>;
defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>;
defm : TexPattern<8, TEX_GET_GRADIENTS_H>;
defm : TexPattern<9, TEX_GET_GRADIENTS_V>;
+defm : TexPattern<10, TEX_LDPTR, v4i32>;
//===----------------------------------------------------------------------===//
// Helper classes for common instructions
@@ -903,18 +924,22 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
class CNDE_Common <bits<5> inst> : R600_3OP <
inst, "CNDE",
- [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
+ [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
>;
class CNDGT_Common <bits<5> inst> : R600_3OP <
inst, "CNDGT",
- [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
->;
+ [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))]
+> {
+ let Itinerary = VecALU;
+}
class CNDGE_Common <bits<5> inst> : R600_3OP <
inst, "CNDGE",
- [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
->;
+ [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))]
+> {
+ let Itinerary = VecALU;
+}
let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
@@ -984,35 +1009,30 @@ multiclass CUBE_Common <bits<11> inst> {
class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
inst, "EXP_IEEE", fexp2
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "FLT_TO_INT", fp_to_sint
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "INT_TO_FLT", sint_to_fp
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "FLT_TO_UINT", fp_to_uint
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "UINT_TO_FLT", uint_to_fp
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
@@ -1023,7 +1043,6 @@ class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
inst, "LOG_IEEE", flog2
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
@@ -1033,75 +1052,68 @@ class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
inst, "MULHI_INT", mulhs
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
inst, "MULHI", mulhu
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
inst, "MULLO_INT", mul
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
inst, "RECIP_CLAMPED", []
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "RECIP_UINT", AMDGPUurecip
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
inst, "RECIPSQRT_IEEE", []
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class SIN_Common <bits<11> inst> : R600_1OP <
inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{
let Trig = 1;
- let TransOnly = 1;
let Itinerary = TransALU;
}
class COS_Common <bits<11> inst> : R600_1OP <
inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> {
let Trig = 1;
- let TransOnly = 1;
let Itinerary = TransALU;
}
+def CLAMP_R600 : CLAMP <R600_Reg32>;
+def FABS_R600 : FABS<R600_Reg32>;
+def FNEG_R600 : FNEG<R600_Reg32>;
+
//===----------------------------------------------------------------------===//
// Helper patterns for complex intrinsics
//===----------------------------------------------------------------------===//
@@ -1124,6 +1136,13 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie
(exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
>;
+// FROUND pattern
+class FROUNDPat<Instruction CNDGE> : Pat <
+ (AMDGPUround f32:$x),
+ (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))
+>;
+
+
//===----------------------------------------------------------------------===//
// R600 / R700 Instructions
//===----------------------------------------------------------------------===//
@@ -1165,11 +1184,12 @@ let Predicates = [isR600] in {
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
+ def : FROUNDPat <CNDGE_r600>;
def R600_ExportSwz : ExportSwzInst {
let Word1{20-17} = 0; // BURST_COUNT
let Word1{21} = eop;
- let Word1{22} = 1; // VALID_PIXEL_MODE
+ let Word1{22} = 0; // VALID_PIXEL_MODE
let Word1{30-23} = inst;
let Word1{31} = 1; // BARRIER
}
@@ -1178,7 +1198,7 @@ let Predicates = [isR600] in {
def R600_ExportBuf : ExportBufInst {
let Word1{20-17} = 0; // BURST_COUNT
let Word1{21} = eop;
- let Word1{22} = 1; // VALID_PIXEL_MODE
+ let Word1{22} = 0; // VALID_PIXEL_MODE
let Word1{30-23} = inst;
let Word1{31} = 1; // BARRIER
}
@@ -1247,6 +1267,33 @@ let Predicates = [isR700] in {
}
//===----------------------------------------------------------------------===//
+// Evergreen / Cayman store instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEGorCayman] in {
+
+class CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins,
+ string name, list<dag> pattern>
+ : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins,
+ "MEM_RAT_CACHELESS "#name, pattern>;
+
+class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name,
+ list<dag> pattern>
+ : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins,
+ "MEM_RAT "#name, pattern>;
+
+def RAT_MSKOR : CF_MEM_RAT <0x11, 0,
+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
+ "MSKOR $rw_gpr.XW, $index_gpr",
+ [(mskor_global v4i32:$rw_gpr, i32:$index_gpr)]
+> {
+ let eop = 0;
+}
+
+} // End Predicates = [isEGorCayman]
+
+
+//===----------------------------------------------------------------------===//
// Evergreen Only instructions
//===----------------------------------------------------------------------===//
@@ -1274,36 +1321,32 @@ def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
-let usesCustomInserter = 1 in {
-class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> mask, string name,
- list<dag> pattern>
- : EG_CF_RAT <0x57, 0x2, mask, (outs), ins, name, pattern> {
-}
-
-} // End usesCustomInserter = 1
+let usesCustomInserter = 1 in {
// 32-bit store
-def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
+def RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1,
(ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
- 0x1, "RAT_WRITE_CACHELESS_32_eg $rw_gpr, $index_gpr, $eop",
+ "STORE_RAW $rw_gpr, $index_gpr, $eop",
[(global_store i32:$rw_gpr, i32:$index_gpr)]
>;
// 64-bit store
-def RAT_WRITE_CACHELESS_64_eg : RAT_WRITE_CACHELESS_eg <
+def RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3,
(ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
- 0x3, "RAT_WRITE_CACHELESS_64_eg $rw_gpr.XY, $index_gpr, $eop",
+ "STORE_RAW $rw_gpr.XY, $index_gpr, $eop",
[(global_store v2i32:$rw_gpr, i32:$index_gpr)]
>;
//128-bit store
-def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
+def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf,
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
- 0xf, "RAT_WRITE_CACHELESS_128 $rw_gpr.XYZW, $index_gpr, $eop",
+ "STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop",
[(global_store v4i32:$rw_gpr, i32:$index_gpr)]
>;
+} // End usesCustomInserter = 1
+
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
: VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> {
@@ -1508,7 +1551,6 @@ let hasSideEffects = 1 in {
def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
let Pattern = [];
- let TransOnly = 0;
let Itinerary = AnyALU;
}
@@ -1600,29 +1642,83 @@ class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS <
let DisableEncoding = "$dst";
}
-class R600_LDS_1A1D <bits<6> lds_op, string name, list<dag> pattern> :
+class R600_LDS_1A1D <bits<6> lds_op, dag outs, string name, list<dag> pattern,
+ string dst =""> :
R600_LDS <
- lds_op,
- (outs),
+ lds_op, outs,
(ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
LAST:$last, R600_Pred:$pred_sel,
BANK_SWIZZLE:$bank_swizzle),
- " "#name#" $last $src0$src0_rel, $src1$src1_rel, $pred_sel",
+ " "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel",
pattern
> {
+ field string BaseOp;
+
let src2 = 0;
let src2_rel = 0;
let LDS_1A1D = 1;
}
+class R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> :
+ R600_LDS_1A1D <lds_op, (outs), name, pattern> {
+ let BaseOp = name;
+}
+
+class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
+ R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> {
+
+ let BaseOp = name;
+ let usesCustomInserter = 1;
+ let DisableEncoding = "$dst";
+ let Defs = [OQAP];
+}
+
+class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
+ R600_LDS <
+ lds_op,
+ (outs),
+ (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
+ R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
+ R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel,
+ LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle),
+ " "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel",
+ pattern> {
+ let LDS_1A2D = 1;
+}
+
+def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >;
+def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >;
+def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE",
+ [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
+>;
+def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE",
+ [(truncstorei8_local i32:$src1, i32:$src0)]
+>;
+def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE",
+ [(truncstorei16_local i32:$src1, i32:$src0)]
+>;
+def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
+ [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))]
+>;
+def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB",
+ [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))]
+>;
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
[(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
>;
-
-def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE",
- [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
+def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET",
+ [(set i32:$dst, (sextloadi8_local i32:$src0))]
+>;
+def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET",
+ [(set i32:$dst, (az_extloadi8_local i32:$src0))]
+>;
+def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET",
+ [(set i32:$dst, (sextloadi16_local i32:$src0))]
+>;
+def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET",
+ [(set i32:$dst, (az_extloadi16_local i32:$src0))]
>;
// TRUNC is used for the FLT_TO_INT instructions to work around a
@@ -1642,9 +1738,11 @@ def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE",
// SHA-256 Patterns
def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
+ def : FROUNDPat <CNDGE_eg>;
+
def EG_ExportSwz : ExportSwzInst {
let Word1{19-16} = 0; // BURST_COUNT
- let Word1{20} = 1; // VALID_PIXEL_MODE
+ let Word1{20} = 0; // VALID_PIXEL_MODE
let Word1{21} = eop;
let Word1{29-22} = inst;
let Word1{30} = 0; // MARK
@@ -1654,7 +1752,7 @@ def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE",
def EG_ExportBuf : ExportBufInst {
let Word1{19-16} = 0; // BURST_COUNT
- let Word1{20} = 1; // VALID_PIXEL_MODE
+ let Word1{20} = 0; // VALID_PIXEL_MODE
let Word1{21} = eop;
let Word1{29-22} = inst;
let Word1{30} = 0; // MARK
@@ -1771,23 +1869,17 @@ def : Pat <
def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
-
-class RAT_STORE_DWORD_cm <bits<4> mask, dag ins, list<dag> pat> : EG_CF_RAT <
- 0x57, 0x14, mask, (outs), ins,
- "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr", pat
-> {
+class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
+ CF_MEM_RAT_CACHELESS <0x14, 0, mask,
+ (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr),
+ "STORE_DWORD $rw_gpr, $index_gpr",
+ [(global_store vt:$rw_gpr, i32:$index_gpr)]> {
let eop = 0; // This bit is not used on Cayman.
}
-def RAT_STORE_DWORD32_cm : RAT_STORE_DWORD_cm <0x1,
- (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr),
- [(global_store i32:$rw_gpr, i32:$index_gpr)]
->;
-
-def RAT_STORE_DWORD64_cm : RAT_STORE_DWORD_cm <0x3,
- (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr),
- [(global_store v2i32:$rw_gpr, i32:$index_gpr)]
->;
+def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>;
+def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>;
+def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>;
class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
: VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> {
@@ -2012,10 +2104,6 @@ def TXD_SHADOW: InstR600 <
} // End isPseudo = 1
} // End usesCustomInserter = 1
-def CLAMP_R600 : CLAMP <R600_Reg32>;
-def FABS_R600 : FABS<R600_Reg32>;
-def FNEG_R600 : FNEG<R600_Reg32>;
-
//===---------------------------------------------------------------------===//
// Return instruction
//===---------------------------------------------------------------------===//
@@ -2164,7 +2252,7 @@ let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
"; Pseudo unconditional branch instruction",
[(br bb:$target)]>;
- defm BRANCH_COND : BranchConditional<IL_brcond>;
+ defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>;
}
//===---------------------------------------------------------------------===//
@@ -2235,7 +2323,7 @@ def : CND_INT_f32 <CNDGE_INT, SETGE>;
//CNDGE_INT extra pattern
def : Pat <
- (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT),
+ (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT),
(CNDGE_INT $src0, $src1, $src2)
>;
@@ -2250,86 +2338,6 @@ def KIL : Pat <
(MASK_WRITE (KILLGT (f32 ZERO), $src0))
>;
-// SGT Reverse args
-def : Pat <
- (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT),
- (SGT $src1, $src0)
->;
-
-// SGE Reverse args
-def : Pat <
- (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE),
- (SGE $src1, $src0)
->;
-
-// SETGT_DX10 reverse args
-def : Pat <
- (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT),
- (SETGT_DX10 $src1, $src0)
->;
-
-// SETGE_DX10 reverse args
-def : Pat <
- (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE),
- (SETGE_DX10 $src1, $src0)
->;
-
-// SETGT_INT reverse args
-def : Pat <
- (selectcc i32:$src0, i32:$src1, -1, 0, SETLT),
- (SETGT_INT $src1, $src0)
->;
-
-// SETGE_INT reverse args
-def : Pat <
- (selectcc i32:$src0, i32:$src1, -1, 0, SETLE),
- (SETGE_INT $src1, $src0)
->;
-
-// SETGT_UINT reverse args
-def : Pat <
- (selectcc i32:$src0, i32:$src1, -1, 0, SETULT),
- (SETGT_UINT $src1, $src0)
->;
-
-// SETGE_UINT reverse args
-def : Pat <
- (selectcc i32:$src0, i32:$src1, -1, 0, SETULE),
- (SETGE_UINT $src1, $src0)
->;
-
-// The next two patterns are special cases for handling 'true if ordered' and
-// 'true if unordered' conditionals. The assumption here is that the behavior of
-// SETE and SNE conforms to the Direct3D 10 rules for floating point values
-// described here:
-// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
-// We assume that SETE returns false when one of the operands is NAN and
-// SNE returns true when on of the operands is NAN
-
-//SETE - 'true if ordered'
-def : Pat <
- (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
- (SETE $src0, $src1)
->;
-
-//SETE_DX10 - 'true if ordered'
-def : Pat <
- (selectcc f32:$src0, f32:$src1, -1, 0, SETO),
- (SETE_DX10 $src0, $src1)
->;
-
-//SNE - 'true if unordered'
-def : Pat <
- (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
- (SNE $src0, $src1)
->;
-
-//SETNE_DX10 - 'true if ordered'
-def : Pat <
- (selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
- (SETNE_DX10 $src0, $src1)
->;
-
def : Extract_Element <f32, v4f32, 0, sub0>;
def : Extract_Element <f32, v4f32, 1, sub1>;
def : Extract_Element <f32, v4f32, 2, sub2>;
@@ -2378,3 +2386,11 @@ def : BitConvert <v4i32, v4f32, R600_Reg128>;
def : DwordAddrPat <i32, R600_Reg32>;
} // End isR600toCayman Predicate
+
+def getLDSNoRetOp : InstrMapping {
+ let FilterClass = "R600_LDS_1A1D";
+ let RowFields = ["BaseOp"];
+ let ColFields = ["DisableEncoding"];
+ let KeyCol = ["$dst"];
+ let ValueCols = [[""""]];
+}
diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td
index 58d86b6..9681747 100644
--- a/lib/Target/R600/R600Intrinsics.td
+++ b/lib/Target/R600/R600Intrinsics.td
@@ -43,6 +43,12 @@ let TargetPrefix = "R600", isTarget = 1 in {
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_R600_interp_input :
Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_R600_interp_const :
+ Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_R600_interp_xy :
+ Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+def int_R600_interp_zw :
+ Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_R600_load_texbuf :
Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_R600_tex : TextureIntrinsicFloatInput;
@@ -52,6 +58,7 @@ let TargetPrefix = "R600", isTarget = 1 in {
def int_R600_txb : TextureIntrinsicFloatInput;
def int_R600_txbc : TextureIntrinsicFloatInput;
def int_R600_txf : TextureIntrinsicInt32Input;
+ def int_R600_ldptr : TextureIntrinsicInt32Input;
def int_R600_txq : TextureIntrinsicInt32Input;
def int_R600_ddx : TextureIntrinsicFloatInput;
def int_R600_ddy : TextureIntrinsicFloatInput;
diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp
index 018b403..01105c6 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.cpp
+++ b/lib/Target/R600/R600MachineFunctionInfo.cpp
@@ -12,7 +12,9 @@
using namespace llvm;
-R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
- : AMDGPUMachineFunction(MF) { }
+// Pin the vtable to this file.
+void R600MachineFunctionInfo::anchor() {}
+R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
+ : AMDGPUMachineFunction(MF) { }
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
index f23d9b7..c1bec0a 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -21,6 +21,7 @@
namespace llvm {
class R600MachineFunctionInfo : public AMDGPUMachineFunction {
+ virtual void anchor();
public:
R600MachineFunctionInfo(const MachineFunction &MF);
SmallVector<unsigned, 4> LiveOuts;
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index 0dc0365..da2a4d8 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -9,7 +9,6 @@
//
/// \file
/// \brief R600 Machine Scheduler interface
-// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
//
//===----------------------------------------------------------------------===//
@@ -29,6 +28,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
DAG = dag;
TII = static_cast<const R600InstrInfo*>(DAG->TII);
TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
+ VLIW5 = !DAG->MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
MRI = &DAG->MRI;
CurInstKind = IDOther;
CurEmitted = 0;
@@ -92,15 +92,6 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
AllowSwitchFromAlu = true;
}
-
- // We want to scheduled AR defs as soon as possible to make sure they aren't
- // put in a different ALU clause from their uses.
- if (!SU && !UnscheduledARDefs.empty()) {
- SU = UnscheduledARDefs[0];
- UnscheduledARDefs.erase(UnscheduledARDefs.begin());
- NextInstKind = IDAlu;
- }
-
if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
(!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
// try to pick ALU
@@ -130,15 +121,6 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
NextInstKind = IDOther;
}
- // We want to schedule the AR uses as late as possible to make sure that
- // the AR defs have been released.
- if (!SU && !UnscheduledARUses.empty()) {
- SU = UnscheduledARUses[0];
- UnscheduledARUses.erase(UnscheduledARUses.begin());
- NextInstKind = IDAlu;
- }
-
-
DEBUG(
if (SU) {
dbgs() << " ** Pick node **\n";
@@ -217,20 +199,6 @@ void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
int IK = getInstKind(SU);
- // Check for AR register defines
- for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(),
- E = SU->getInstr()->operands_end();
- I != E; ++I) {
- if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
- if (I->isDef()) {
- UnscheduledARDefs.push_back(SU);
- } else {
- UnscheduledARUses.push_back(SU);
- }
- return;
- }
- }
-
// There is no export clause, we can schedule one as soon as its ready
if (IK == IDOther)
Available[IDOther].push_back(SU);
@@ -314,6 +282,10 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
return AluT_XYZW;
+ // LDS src registers cannot be used in the Trans slot.
+ if (TII->readsLDSSrcReg(MI))
+ return AluT_XYZW;
+
return AluAny;
}
@@ -342,14 +314,16 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
}
}
-SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
+SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
if (Q.empty())
return NULL;
for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
It != E; ++It) {
SUnit *SU = *It;
InstructionsGroupCandidate.push_back(SU->getInstr());
- if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {
+ if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)
+ && (!AnyALU || !TII->isVectorOnly(SU->getInstr()))
+ ) {
InstructionsGroupCandidate.pop_back();
Q.erase((It + 1).base());
return SU;
@@ -373,6 +347,8 @@ void R600SchedStrategy::PrepareNextSlot() {
DEBUG(dbgs() << "New Slot\n");
assert (OccupedSlotsMask && "Slot wasn't filled");
OccupedSlotsMask = 0;
+// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
+// OccupedSlotsMask |= 16;
InstructionsGroupCandidate.clear();
LoadAlu();
}
@@ -409,12 +385,12 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
}
}
-SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
+SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {
static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
- SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
+ SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
if (SlotedSU)
return SlotedSU;
- SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
+ SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
if (UnslotedSU)
AssignSlot(UnslotedSU->getInstr(), Slot);
return UnslotedSU;
@@ -434,30 +410,35 @@ SUnit* R600SchedStrategy::pickAlu() {
// Bottom up scheduling : predX must comes first
if (!AvailableAlus[AluPredX].empty()) {
OccupedSlotsMask |= 31;
- return PopInst(AvailableAlus[AluPredX]);
+ return PopInst(AvailableAlus[AluPredX], false);
}
// Flush physical reg copies (RA will discard them)
if (!AvailableAlus[AluDiscarded].empty()) {
OccupedSlotsMask |= 31;
- return PopInst(AvailableAlus[AluDiscarded]);
+ return PopInst(AvailableAlus[AluDiscarded], false);
}
// If there is a T_XYZW alu available, use it
if (!AvailableAlus[AluT_XYZW].empty()) {
OccupedSlotsMask |= 15;
- return PopInst(AvailableAlus[AluT_XYZW]);
+ return PopInst(AvailableAlus[AluT_XYZW], false);
}
}
bool TransSlotOccuped = OccupedSlotsMask & 16;
- if (!TransSlotOccuped) {
+ if (!TransSlotOccuped && VLIW5) {
if (!AvailableAlus[AluTrans].empty()) {
OccupedSlotsMask |= 16;
- return PopInst(AvailableAlus[AluTrans]);
+ return PopInst(AvailableAlus[AluTrans], false);
+ }
+ SUnit *SU = AttemptFillSlot(3, true);
+ if (SU) {
+ OccupedSlotsMask |= 16;
+ return SU;
}
}
for (int Chan = 3; Chan > -1; --Chan) {
bool isOccupied = OccupedSlotsMask & (1 << Chan);
if (!isOccupied) {
- SUnit *SU = AttemptFillSlot(Chan);
+ SUnit *SU = AttemptFillSlot(Chan, false);
if (SU) {
OccupedSlotsMask |= (1 << Chan);
InstructionsGroupCandidate.push_back(SU->getInstr());
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index f8965d8..97c8cde 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -53,8 +53,6 @@ class R600SchedStrategy : public MachineSchedStrategy {
std::vector<SUnit *> Available[IDLast], Pending[IDLast];
std::vector<SUnit *> AvailableAlus[AluLast];
- std::vector<SUnit *> UnscheduledARDefs;
- std::vector<SUnit *> UnscheduledARUses;
std::vector<SUnit *> PhysicalRegCopy;
InstKind CurInstKind;
@@ -84,15 +82,16 @@ public:
private:
std::vector<MachineInstr *> InstructionsGroupCandidate;
+ bool VLIW5;
int getInstKind(SUnit *SU);
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
AluKind getAluKind(SUnit *SU) const;
void LoadAlu();
unsigned AvailablesAluCount() const;
- SUnit *AttemptFillSlot (unsigned Slot);
+ SUnit *AttemptFillSlot (unsigned Slot, bool AnyAlu);
void PrepareNextSlot();
- SUnit *PopInst(std::vector<SUnit*> &Q);
+ SUnit *PopInst(std::vector<SUnit*> &Q, bool AnyALU);
void AssignSlot(MachineInstr *MI, unsigned Slot);
SUnit* pickAlu();
diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
index acacffa..cf719c0 100644
--- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
@@ -50,6 +50,9 @@ isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
E = MRI.def_end(); It != E; ++It) {
return (*It).isImplicitDef();
}
+ if (MRI.isReserved(Reg)) {
+ return false;
+ }
llvm_unreachable("Reg without a def");
return false;
}
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
index 5cf1fd3..cd9b6ea 100644
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -58,6 +58,8 @@ class R600PacketizerList : public VLIWPacketizerList {
private:
const R600InstrInfo *TII;
const R600RegisterInfo &TRI;
+ bool VLIW5;
+ bool ConsideredInstUsesAlreadyWrittenVectorElement;
unsigned getSlot(const MachineInstr *MI) const {
return TRI.getHWRegChan(MI->getOperand(0).getReg());
@@ -74,7 +76,13 @@ private:
MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
if (I->isBundle())
BI++;
+ int LastDstChan = -1;
do {
+ bool isTrans = false;
+ int BISlot = getSlot(BI);
+ if (LastDstChan >= BISlot)
+ isTrans = true;
+ LastDstChan = BISlot;
if (TII->isPredicated(BI))
continue;
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
@@ -85,7 +93,7 @@ private:
continue;
}
unsigned Dst = BI->getOperand(DstIdx).getReg();
- if (TII->isTransOnly(BI)) {
+ if (isTrans || TII->isTransOnly(BI)) {
Result[Dst] = AMDGPU::PS;
continue;
}
@@ -142,10 +150,14 @@ public:
MachineDominatorTree &MDT)
: VLIWPacketizerList(MF, MLI, MDT, true),
TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
- TRI(TII->getRegisterInfo()) { }
+ TRI(TII->getRegisterInfo()) {
+ VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
+ }
// initPacketizerState - initialize some internal flags.
- void initPacketizerState() { }
+ void initPacketizerState() {
+ ConsideredInstUsesAlreadyWrittenVectorElement = false;
+ }
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
@@ -172,8 +184,8 @@ public:
// together.
bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
- if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))
- return false;
+ if (getSlot(MII) == getSlot(MIJ))
+ ConsideredInstUsesAlreadyWrittenVectorElement = true;
// Does MII and MIJ share the same pred_sel ?
int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
@@ -194,6 +206,14 @@ public:
return false;
}
}
+
+ bool ARDef = TII->definesAddressRegister(MII) ||
+ TII->definesAddressRegister(MIJ);
+ bool ARUse = TII->usesAddressRegister(MII) ||
+ TII->usesAddressRegister(MIJ);
+ if (ARDef && ARUse)
+ return false;
+
return true;
}
@@ -211,6 +231,20 @@ public:
std::vector<R600InstrInfo::BankSwizzle> &BS,
bool &isTransSlot) {
isTransSlot = TII->isTransOnly(MI);
+ assert (!isTransSlot || VLIW5);
+
+ // Is the dst reg sequence legal ?
+ if (!isTransSlot && !CurrentPacketMIs.empty()) {
+ if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
+ if (ConsideredInstUsesAlreadyWrittenVectorElement &&
+ !TII->isVectorOnly(MI) && VLIW5) {
+ isTransSlot = true;
+ DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
+ }
+ else
+ return false;
+ }
+ }
// Are the Constants limitations met ?
CurrentPacketMIs.push_back(MI);
@@ -246,6 +280,10 @@ public:
return false;
}
+ // We cannot read LDS source registrs from the Trans slot.
+ if (isTransSlot && TII->readsLDSSrcReg(MI))
+ return false;
+
CurrentPacketMIs.pop_back();
return true;
}
@@ -278,6 +316,8 @@ public:
return It;
}
endPacket(MI->getParent(), MI);
+ if (TII->isTransOnly(MI))
+ return MI;
return VLIWPacketizerList::addToPacket(MI);
}
};
@@ -308,7 +348,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
MachineBasicBlock::iterator End = MBB->end();
MachineBasicBlock::iterator MI = MBB->begin();
while (MI != End) {
- if (MI->isKill() ||
+ if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
(MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
MachineBasicBlock::iterator DeleteMI = MI;
++MI;
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
index a42043b..f3bb88b 100644
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -28,6 +28,8 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm)
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
Reserved.set(AMDGPU::ONE);
@@ -41,26 +43,15 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::PRED_SEL_OFF);
Reserved.set(AMDGPU::PRED_SEL_ZERO);
Reserved.set(AMDGPU::PRED_SEL_ONE);
+ Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
Reserved.set(*I);
}
- for (TargetRegisterClass::iterator I = AMDGPU::TRegMemRegClass.begin(),
- E = AMDGPU::TRegMemRegClass.end();
- I != E; ++I) {
- Reserved.set(*I);
- }
+ TII->reserveIndirectRegisters(Reserved, MF);
- const R600InstrInfo *RII =
- static_cast<const R600InstrInfo*>(TM.getInstrInfo());
- std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);
- for (std::vector<unsigned>::iterator I = IndirectRegs.begin(),
- E = IndirectRegs.end();
- I != E; ++I) {
- Reserved.set(*I);
- }
return Reserved;
}
@@ -78,6 +69,10 @@ unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
}
+unsigned R600RegisterInfo::getHWRegIndex(unsigned Reg) const {
+ return GET_REG_INDEX(getEncodingValue(Reg));
+}
+
const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
MVT VT) const {
switch(VT.SimpleTy) {
@@ -86,17 +81,20 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
}
}
-unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
- switch (Channel) {
- default: assert(!"Invalid channel index"); return 0;
- case 0: return AMDGPU::sub0;
- case 1: return AMDGPU::sub1;
- case 2: return AMDGPU::sub2;
- case 3: return AMDGPU::sub3;
- }
-}
-
const RegClassWeight &R600RegisterInfo::getRegClassWeight(
const TargetRegisterClass *RC) const {
return RCW;
}
+
+bool R600RegisterInfo::isPhysRegLiveAcrossClauses(unsigned Reg) const {
+ assert(!TargetRegisterInfo::isVirtualRegister(Reg));
+
+ switch (Reg) {
+ case AMDGPU::OQAP:
+ case AMDGPU::OQBP:
+ case AMDGPU::AR_X:
+ return false;
+ default:
+ return true;
+ }
+}
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
index 9b286ee..c74c49e 100644
--- a/lib/Target/R600/R600RegisterInfo.h
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -39,16 +39,16 @@ struct R600RegisterInfo : public AMDGPURegisterInfo {
/// \brief get the HW encoding for a register's channel.
unsigned getHWRegChan(unsigned reg) const;
+ virtual unsigned getHWRegIndex(unsigned Reg) const;
+
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
- /// \returns the sub reg enum value for the given \p Channel
- /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
- unsigned getSubRegFromChannel(unsigned Channel) const;
-
virtual const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const;
+ // \returns true if \p Reg can be defined in one ALU caluse and used in another.
+ virtual bool isPhysRegLiveAcrossClauses(unsigned Reg) const;
};
} // End namespace llvm
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index fa987cf..68bcd20 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -39,8 +39,6 @@ foreach Index = 0-127 in {
// Indirect addressing offset registers
def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
Index, Chan>;
- def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
- Chan>;
}
// 128-bit Temporary Registers
def T#Index#_XYZW : R600Reg_128 <"T"#Index#"",
@@ -95,6 +93,12 @@ foreach Index = 448-480 in {
// Special Registers
+def OQA : R600Reg<"OQA", 219>;
+def OQB : R600Reg<"OQB", 220>;
+def OQAP : R600Reg<"OQAP", 221>;
+def OQBP : R600Reg<"OQAP", 222>;
+def LDS_DIRECT_A : R600Reg<"LDS_DIRECT_A", 223>;
+def LDS_DIRECT_B : R600Reg<"LDS_DIRECT_B", 224>;
def ZERO : R600Reg<"0.0", 248>;
def ONE : R600Reg<"1.0", 249>;
def NEG_ONE : R600Reg<"-1.0", 249>;
@@ -115,7 +119,6 @@ def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
def AR_X : R600Reg<"AR.x", 0>;
-def OQAP : R600Reg<"OQAP", 221>;
def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "ArrayBase%u", 448, 480))>;
@@ -130,7 +133,8 @@ let isAllocatable = 0 in {
// XXX: Only use the X channel, until we support wider stack widths
def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
-} // End isAllocatable = 0
+def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32,
+ (add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>;
def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "KC0_%u_X", 128, 159))>;
@@ -164,6 +168,8 @@ def R600_KC1 : RegisterClass <"AMDGPU", [f32, i32], 32,
(interleave R600_KC1_X, R600_KC1_Y,
R600_KC1_Z, R600_KC1_W)>;
+} // End isAllocatable = 0
+
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_X", 0, 127), AR_X)>;
@@ -184,6 +190,7 @@ def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
R600_TReg32,
R600_ArrayBase,
R600_Addr,
+ R600_KC0, R600_KC1,
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
ALU_CONST, ALU_PARAM, OQAP
)>;
@@ -201,33 +208,3 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
(add (sequence "T%u_XY", 0, 63))>;
-
-//===----------------------------------------------------------------------===//
-// Register classes for indirect addressing
-//===----------------------------------------------------------------------===//
-
-// Super register for all the Indirect Registers. This register class is used
-// by the REG_SEQUENCE instruction to specify the registers to use for direct
-// reads / writes which may be written / read by an indirect address.
-class IndirectSuper<string n, list<Register> subregs> :
- RegisterWithSubRegs<n, subregs> {
- let Namespace = "AMDGPU";
- let SubRegIndices =
- [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
- sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
-}
-
-def IndirectSuperReg : IndirectSuper<"Indirect",
- [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
- TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
- TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
->;
-
-def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
-
-// This register class defines the registers that are the storage units for
-// the "Indirect Addressing" pseudo memory space.
-// XXX: Only use the X channel, until we support wider stack widths
-def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add (sequence "TRegMem%u_X", 0, 16))
->;
diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
index 3768ba0..3258894 100644
--- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
+++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
@@ -35,9 +35,9 @@ class R600TextureIntrinsicsReplacer :
FunctionType *TexSign;
FunctionType *TexQSign;
- void getAdjustementFromTextureTarget(unsigned TextureType, bool hasLOD,
- unsigned SrcSelect[4], unsigned CT[4],
- bool &useShadowVariant) {
+ void getAdjustmentFromTextureTarget(unsigned TextureType, bool hasLOD,
+ unsigned SrcSelect[4], unsigned CT[4],
+ bool &useShadowVariant) {
enum TextureTypes {
TEXTURE_1D = 1,
TEXTURE_2D,
@@ -60,6 +60,7 @@ class R600TextureIntrinsicsReplacer :
switch (TextureType) {
case 0:
+ useShadowVariant = false;
return;
case TEXTURE_RECT:
case TEXTURE_1D:
@@ -93,9 +94,8 @@ class R600TextureIntrinsicsReplacer :
}
if (TextureType == TEXTURE_CUBE_ARRAY ||
- TextureType == TEXTURE_SHADOWCUBE_ARRAY) {
+ TextureType == TEXTURE_SHADOWCUBE_ARRAY)
CT[2] = 0;
- }
if (TextureType == TEXTURE_1D_ARRAY ||
TextureType == TEXTURE_SHADOW1D_ARRAY) {
@@ -114,9 +114,8 @@ class R600TextureIntrinsicsReplacer :
TextureType == TEXTURE_SHADOW2D ||
TextureType == TEXTURE_SHADOWRECT ||
TextureType == TEXTURE_SHADOW1D_ARRAY) &&
- !(hasLOD && useShadowVariant)) {
+ !(hasLOD && useShadowVariant))
SrcSelect[3] = 2;
- }
}
void ReplaceCallInst(CallInst &I, FunctionType *FT, const char *Name,
@@ -174,8 +173,8 @@ class R600TextureIntrinsicsReplacer :
};
bool useShadowVariant;
- getAdjustementFromTextureTarget(TextureType, hasLOD, SrcSelect, CT,
- useShadowVariant);
+ getAdjustmentFromTextureTarget(TextureType, hasLOD, SrcSelect, CT,
+ useShadowVariant);
ReplaceCallInst(I, FT, useShadowVariant?ShadowInt:VanillaInt, SrcSelect,
Offset, ResourceId, SamplerId, CT, Coord);
@@ -198,8 +197,8 @@ class R600TextureIntrinsicsReplacer :
};
bool useShadowVariant;
- getAdjustementFromTextureTarget(TextureType, false, SrcSelect, CT,
- useShadowVariant);
+ getAdjustmentFromTextureTarget(TextureType, false, SrcSelect, CT,
+ useShadowVariant);
ReplaceCallInst(I, TexQSign, "llvm.R600.txf", SrcSelect,
Offset, ResourceId, SamplerId, CT, Coord);
@@ -259,6 +258,9 @@ public:
}
void visitCallInst(CallInst &I) {
+ if (!I.getCalledFunction())
+ return;
+
StringRef Name = I.getCalledFunction()->getName();
if (Name == "llvm.AMDGPU.tex") {
ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.tex", "llvm.R600.texc");
diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
index 147578c..2cbce28 100644
--- a/lib/Target/R600/SIDefines.h
+++ b/lib/Target/R600/SIDefines.h
@@ -11,6 +11,18 @@
#ifndef SIDEFINES_H_
#define SIDEFINES_H_
+namespace SIInstrFlags {
+enum {
+ MIMG = 1 << 3,
+ SMRD = 1 << 4,
+ VOP1 = 1 << 5,
+ VOP2 = 1 << 6,
+ VOP3 = 1 << 7,
+ VOPC = 1 << 8,
+ SALU = 1 << 9
+};
+}
+
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C
#define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8)
diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp
index 435172a..3370c79 100644
--- a/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -23,9 +23,9 @@
/// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
/// BB2:
/// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
-/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
+/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
+///
///
-///
/// The coalescer will begin at BB0 and eliminate its copy, then the resulting
/// code will look like this:
///
@@ -43,7 +43,7 @@
/// Now that the result of the PHI instruction is an SGPR, the register
/// allocator is now forced to constrain the register class of %vreg3 to
/// <sgpr> so we end up with final code like this:
-///
+///
/// BB0:
/// %vreg0 <sgpr> = SCALAR_INST
/// ...
@@ -55,7 +55,7 @@
/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
///
-/// Now this code contains an illegal copy from a VGPR to an SGPR.
+/// Now this code contains an illegal copy from a VGPR to an SGPR.
///
/// In order to avoid this problem, this pass searches for PHI instructions
/// which define a <vsrc> register and constrains its definition class to
@@ -65,10 +65,14 @@
/// ultimately led to the creation of an illegal COPY.
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "sgpr-copies"
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -79,9 +83,16 @@ class SIFixSGPRCopies : public MachineFunctionPass {
private:
static char ID;
- const TargetRegisterClass *inferRegClass(const TargetRegisterInfo *TRI,
+ const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI,
- unsigned Reg) const;
+ unsigned Reg,
+ unsigned SubReg) const;
+ const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI,
+ const MachineRegisterInfo &MRI,
+ unsigned Reg,
+ unsigned SubReg) const;
+ bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
+ const MachineRegisterInfo &MRI) const;
public:
SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
@@ -102,25 +113,41 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) {
return new SIFixSGPRCopies(tm);
}
-/// This functions walks the use/def chains starting with the definition of
-/// \p Reg until it finds an Instruction that isn't a COPY returns
-/// the register class of that instruction.
-const TargetRegisterClass *SIFixSGPRCopies::inferRegClass(
- const TargetRegisterInfo *TRI,
+static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
+ continue;
+
+ if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
+ return true;
+ }
+ return false;
+}
+
+/// This functions walks the use list of Reg until it finds an Instruction
+/// that isn't a COPY returns the register class of that instruction.
+/// \return The register defined by the first non-COPY instruction.
+const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
+ const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI,
- unsigned Reg) const {
+ unsigned Reg,
+ unsigned SubReg) const {
// The Reg parameter to the function must always be defined by either a PHI
// or a COPY, therefore it cannot be a physical register.
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Reg cannot be a physical register");
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ RC = TRI->getSubRegClass(RC, SubReg);
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
E = MRI.use_end(); I != E; ++I) {
switch (I->getOpcode()) {
case AMDGPU::COPY:
- RC = TRI->getCommonSubClass(RC, inferRegClass(TRI, MRI,
- I->getOperand(0).getReg()));
+ RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
+ I->getOperand(0).getReg(),
+ I->getOperand(0).getSubReg()));
break;
}
}
@@ -128,9 +155,48 @@ const TargetRegisterClass *SIFixSGPRCopies::inferRegClass(
return RC;
}
+const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef(
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo &MRI,
+ unsigned Reg,
+ unsigned SubReg) const {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
+ return TRI->getSubRegClass(RC, SubReg);
+ }
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ if (Def->getOpcode() != AMDGPU::COPY) {
+ return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg);
+ }
+
+ return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(),
+ Def->getOperand(1).getSubReg());
+}
+
+bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo &MRI) const {
+
+ unsigned DstReg = Copy.getOperand(0).getReg();
+ unsigned SrcReg = Copy.getOperand(1).getReg();
+ unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
+ const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
+ const TargetRegisterClass *SrcRC;
+
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ DstRC == &AMDGPU::M0RegRegClass)
+ return false;
+
+ SrcRC = inferRegClassFromDef(TRI, MRI, SrcReg, SrcSubReg);
+ return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
+}
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
+ MF.getTarget().getRegisterInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ MF.getTarget().getInstrInfo());
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
@@ -138,13 +204,58 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
- if (MI.getOpcode() != AMDGPU::PHI) {
- continue;
+ if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
+ DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
+ DEBUG(MI.print(dbgs()));
+ TII->moveToVALU(MI);
+
+ }
+
+ switch (MI.getOpcode()) {
+ default: continue;
+ case AMDGPU::PHI: {
+ DEBUG(dbgs() << " Fixing PHI:\n");
+ DEBUG(MI.print(dbgs()));
+
+ for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
+ unsigned Reg = MI.getOperand(i).getReg();
+ const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg,
+ MI.getOperand(0).getSubReg());
+ MRI.constrainRegClass(Reg, RC);
+ }
+ unsigned Reg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
+ MI.getOperand(0).getSubReg());
+ if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) {
+ MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
+ }
+
+ if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
+ break;
+
+ // If a PHI node defines an SGPR and any of its operands are VGPRs,
+ // then we need to move it to the VALU.
+ for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
+ unsigned Reg = MI.getOperand(i).getReg();
+ if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
+ TII->moveToVALU(MI);
+ break;
+ }
+ }
+
+ break;
+ }
+ case AMDGPU::REG_SEQUENCE: {
+ if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
+ !hasVGPROperands(MI, TRI))
+ continue;
+
+ DEBUG(dbgs() << "Fixing REG_SEQUENCE:\n");
+ DEBUG(MI.print(dbgs()));
+
+ TII->moveToVALU(MI);
+ break;
}
- unsigned Reg = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = inferRegClass(TRI, MRI, Reg);
- if (RC == &AMDGPU::VSrc_32RegClass) {
- MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
}
}
}
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index c64027f..d5d2b68 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -34,18 +34,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
- addRegisterClass(MVT::v2i1, &AMDGPU::VReg_64RegClass);
- addRegisterClass(MVT::v4i1, &AMDGPU::VReg_128RegClass);
-
- addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass);
addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass);
- addRegisterClass(MVT::v1i32, &AMDGPU::VSrc_32RegClass);
-
addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass);
addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass);
addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass);
@@ -62,6 +56,21 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
computeRegisterProperties();
+ // Condition Codes
+ setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+
+ setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
+
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
@@ -69,6 +78,32 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::ADD, MVT::i32, Legal);
+ setOperationAction(ISD::ADDC, MVT::i32, Legal);
+ setOperationAction(ISD::ADDE, MVT::i32, Legal);
+
+ setOperationAction(ISD::BITCAST, MVT::i128, Legal);
+
+ // We need to custom lower vector stores from local memory
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
+
+ setOperationAction(ISD::STORE, MVT::v8i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v16i32, Custom);
+
+ // We need to custom lower loads/stores from private memory
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i64, Custom);
+ setOperationAction(ISD::STORE, MVT::i128, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
@@ -78,14 +113,31 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
- setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i64, Expand);
+ setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
+ setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
setTargetDAGCombine(ISD::SELECT_CC);
@@ -102,24 +154,28 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
bool *IsFast) const {
// XXX: This depends on the address space and also we may want to revist
// the alignment values we specify in the DataLayout.
+ if (!VT.isSimple() || VT == MVT::Other)
+ return false;
return VT.bitsGT(MVT::i32);
}
+bool SITargetLowering::shouldSplitVectorElementType(EVT VT) const {
+ return VT.bitsLE(MVT::i16);
+}
-SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT,
+SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc DL, SDValue Chain,
unsigned Offset) const {
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::CONSTANT_ADDRESS);
- EVT ArgVT = MVT::getIntegerVT(VT.getSizeInBits());
SDValue BasePtr = DAG.getCopyFromReg(Chain, DL,
MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(Offset, MVT::i64));
- return DAG.getLoad(VT, DL, Chain, Ptr,
- MachinePointerInfo(UndefValue::get(PtrTy)),
- false, false, false, ArgVT.getSizeInBits() >> 3);
+ return DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, Ptr,
+ MachinePointerInfo(UndefValue::get(PtrTy)), MemVT,
+ false, false, MemVT.getSizeInBits() >> 3);
}
@@ -146,7 +202,8 @@ SDValue SITargetLowering::LowerFormalArguments(
const ISD::InputArg &Arg = Ins[i];
// First check if it's a PS input addr
- if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) {
+ if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
+ !Arg.Flags.isByVal()) {
assert((PSInputNum <= 15) && "Too many PS inputs!");
@@ -177,7 +234,7 @@ SDValue SITargetLowering::LowerFormalArguments(
NewArg.PartOffset += NewArg.VT.getStoreSize();
}
- } else {
+ } else if (Info->ShaderType != ShaderType::COMPUTE) {
Splits.push_back(Arg);
}
}
@@ -200,6 +257,11 @@ SDValue SITargetLowering::LowerFormalArguments(
MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass);
}
+ if (Info->ShaderType == ShaderType::COMPUTE) {
+ getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
+ Splits);
+ }
+
AnalyzeFormalArguments(CCInfo, Splits);
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
@@ -214,9 +276,11 @@ SDValue SITargetLowering::LowerFormalArguments(
EVT VT = VA.getLocVT();
if (VA.isMemLoc()) {
+ VT = Ins[i].VT;
+ EVT MemVT = Splits[i].VT;
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
- SDValue Arg = LowerParameter(DAG, VT, DL, DAG.getRoot(),
+ SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
36 + VA.getLocMemOffset());
InVals.push_back(Arg);
continue;
@@ -320,6 +384,19 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MI->eraseFromParent();
break;
}
+ case AMDGPU::SI_RegisterStorePseudo: {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
+ Reg);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+ MIB.addOperand(MI->getOperand(i));
+
+ MI->eraseFromParent();
+ }
}
return BB;
}
@@ -335,6 +412,24 @@ MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
return MVT::i32;
}
+bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ return false; /* There is V_MAD_F32 for f32 */
+ case MVT::f64:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
@@ -344,9 +439,27 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::ADD: return LowerADD(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::LOAD: {
+ LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
+ if ((Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
+ Op.getValueType().isVector()) {
+ SDValue MergedValues[2] = {
+ SplitVectorLoad(Op, DAG),
+ Load->getChain()
+ };
+ return DAG.getMergeValues(MergedValues, 2, SDLoc(Op));
+ } else {
+ return LowerLOAD(Op, DAG);
+ }
+ }
+
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::ANY_EXTEND: // Fall-through
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: {
@@ -359,23 +472,23 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (IntrinsicID) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case Intrinsic::r600_read_ngroups_x:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 0);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0);
case Intrinsic::r600_read_ngroups_y:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 4);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4);
case Intrinsic::r600_read_ngroups_z:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 8);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8);
case Intrinsic::r600_read_global_size_x:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 12);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12);
case Intrinsic::r600_read_global_size_y:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 16);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16);
case Intrinsic::r600_read_global_size_z:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 20);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20);
case Intrinsic::r600_read_local_size_x:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 24);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24);
case Intrinsic::r600_read_local_size_y:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 28);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28);
case Intrinsic::r600_read_local_size_z:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 32);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32);
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT);
@@ -394,13 +507,102 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case Intrinsic::r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
AMDGPU::VGPR2, VT);
-
+ case AMDGPUIntrinsic::SI_load_const: {
+ SDValue Ops [] = {
+ ResourceDescriptorToi128(Op.getOperand(1), DAG),
+ Op.getOperand(2)
+ };
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
+ VT.getSizeInBits() / 8, 4);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
+ Op->getVTList(), Ops, 2, VT, MMO);
+ }
+ case AMDGPUIntrinsic::SI_sample:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
+ case AMDGPUIntrinsic::SI_sampleb:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
+ case AMDGPUIntrinsic::SI_sampled:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
+ case AMDGPUIntrinsic::SI_samplel:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
+ case AMDGPUIntrinsic::SI_vs_load_input:
+ return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
+ ResourceDescriptorToi128(Op.getOperand(1), DAG),
+ Op.getOperand(2),
+ Op.getOperand(3));
}
}
+
+ case ISD::INTRINSIC_VOID:
+ SDValue Chain = Op.getOperand(0);
+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+ switch (IntrinsicID) {
+ case AMDGPUIntrinsic::SI_tbuffer_store: {
+ SDLoc DL(Op);
+ SDValue Ops [] = {
+ Chain,
+ ResourceDescriptorToi128(Op.getOperand(2), DAG),
+ Op.getOperand(3),
+ Op.getOperand(4),
+ Op.getOperand(5),
+ Op.getOperand(6),
+ Op.getOperand(7),
+ Op.getOperand(8),
+ Op.getOperand(9),
+ Op.getOperand(10),
+ Op.getOperand(11),
+ Op.getOperand(12),
+ Op.getOperand(13),
+ Op.getOperand(14)
+ };
+ EVT VT = Op.getOperand(3).getValueType();
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOStore,
+ VT.getSizeInBits() / 8, 4);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
+ Op->getVTList(), Ops,
+ sizeof(Ops)/sizeof(Ops[0]), VT, MMO);
+ }
+ default:
+ break;
+ }
}
return SDValue();
}
+SDValue SITargetLowering::LowerADD(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Op.getValueType() != MVT::i64)
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue One = DAG.getConstant(1, MVT::i32);
+
+ SDValue Lo0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, Zero);
+ SDValue Hi0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, One);
+
+ SDValue Lo1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, Zero);
+ SDValue Hi1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, One);
+
+ SDVTList VTList = DAG.getVTList(MVT::i32, MVT::Glue);
+
+ SDValue AddLo = DAG.getNode(ISD::ADDC, DL, VTList, Lo0, Lo1);
+ SDValue Carry = AddLo.getValue(1);
+ SDValue AddHi = DAG.getNode(ISD::ADDE, DL, VTList, Hi0, Hi1, Carry);
+
+ return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddLo, AddHi.getValue(0));
+}
+
/// \brief Helper function for LowerBRCOND
static SDNode *findUser(SDValue Value, unsigned Opcode) {
@@ -495,6 +697,53 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
return Chain;
}
+SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ LoadSDNode *Load = cast<LoadSDNode>(Op);
+
+ if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ return SDValue();
+
+ SDValue TruncPtr = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ Load->getBasePtr(), DAG.getConstant(0, MVT::i32));
+ SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
+ DAG.getConstant(2, MVT::i32));
+
+ SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
+ Load->getChain(), Ptr,
+ DAG.getTargetConstant(0, MVT::i32),
+ Op.getOperand(2));
+ SDValue MergedValues[2] = {
+ Ret,
+ Load->getChain()
+ };
+ return DAG.getMergeValues(MergedValues, 2, DL);
+
+}
+
+SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ if (Op.getValueType() == MVT::i128) {
+ return Op;
+ }
+
+ assert(Op.getOpcode() == ISD::UNDEF);
+
+ return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128,
+ DAG.getConstant(0, MVT::i64),
+ DAG.getConstant(0, MVT::i64));
+}
+
+SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
+ const SDValue &Op,
+ SelectionDAG &DAG) const {
+ return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2),
+ ResourceDescriptorToi128(Op.getOperand(3), DAG),
+ Op.getOperand(4));
+}
+
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -529,6 +778,56 @@ SDValue SITargetLowering::LowerSIGN_EXTEND(SDValue Op,
return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), Hi);
}
+SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ EVT VT = Store->getMemoryVT();
+
+ SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
+ if (Ret.getNode())
+ return Ret;
+
+ if (VT.isVector() && VT.getVectorNumElements() >= 8)
+ return SplitVectorStore(Op, DAG);
+
+ if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ return SDValue();
+
+ SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32);
+ SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
+ DAG.getConstant(2, MVT::i32));
+ SDValue Chain = Store->getChain();
+ SmallVector<SDValue, 8> Values;
+
+ if (VT == MVT::i64) {
+ for (unsigned i = 0; i < 2; ++i) {
+ Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ Store->getValue(), DAG.getConstant(i, MVT::i32)));
+ }
+ } else if (VT == MVT::i128) {
+ for (unsigned i = 0; i < 2; ++i) {
+ for (unsigned j = 0; j < 2; ++j) {
+ Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
+ Store->getValue(), DAG.getConstant(i, MVT::i32)),
+ DAG.getConstant(j, MVT::i32)));
+ }
+ }
+ } else {
+ Values.push_back(Store->getValue());
+ }
+
+ for (unsigned i = 0; i < Values.size(); ++i) {
+ SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32,
+ Ptr, DAG.getConstant(i, MVT::i32));
+ Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+ Chain, Values[i], PartPtr,
+ DAG.getTargetConstant(0, MVT::i32));
+ }
+ return Chain;
+}
+
+
SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -555,7 +854,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default: break;
case ISD::SELECT_CC: {
- N->dump();
ConstantSDNode *True, *False;
// i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
@@ -759,8 +1057,8 @@ void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
return;
}
- // This is a conservative aproach, it is possible that we can't determine
- // the correct register class and copy too often, but better save than sorry.
+ // This is a conservative aproach. It is possible that we can't determine the
+ // correct register class and copy too often, but better safe than sorry.
SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(),
Operand.getValueType(), Operand, RC);
@@ -937,7 +1235,9 @@ static unsigned SubIdx2Lane(unsigned Idx) {
void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
SelectionDAG &DAG) const {
SDNode *Users[4] = { };
- unsigned Writemask = 0, Lane = 0;
+ unsigned Lane = 0;
+ unsigned OldDmask = Node->getConstantOperandVal(0);
+ unsigned NewDmask = 0;
// Try to figure out the used register components
for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
@@ -948,29 +1248,42 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
return;
+ // Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used.
+ // Note that subregs are packed, i.e. Lane==0 is the first bit set
+ // in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
+ // set, etc.
Lane = SubIdx2Lane(I->getConstantOperandVal(1));
+ // Set which texture component corresponds to the lane.
+ unsigned Comp;
+ for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) {
+ assert(Dmask);
+ Comp = countTrailingZeros(Dmask);
+ Dmask &= ~(1 << Comp);
+ }
+
// Abort if we have more than one user per component
if (Users[Lane])
return;
Users[Lane] = *I;
- Writemask |= 1 << Lane;
+ NewDmask |= 1 << Comp;
}
- // Abort if all components are used
- if (Writemask == 0xf)
+ // Abort if there's no change
+ if (NewDmask == OldDmask)
return;
// Adjust the writemask in the node
std::vector<SDValue> Ops;
- Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32));
+ Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32));
for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
Ops.push_back(Node->getOperand(i));
Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
// If we only got one lane, replace it with a copy
- if (Writemask == (1U << Lane)) {
+ // (if NewDmask has only one bit set...)
+ if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
SDLoc(), Users[Lane]->getValueType(0),
@@ -1001,9 +1314,11 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
/// \brief Fold the instructions after slecting them
SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
Node = AdjustRegClass(Node, DAG);
- if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1)
+ if (TII->isMIMG(Node->getMachineOpcode()))
adjustWritemask(Node, DAG);
return foldOperands(Node, DAG);
@@ -1013,7 +1328,9 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
/// bits set in the writemask
void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ if (!TII->isMIMG(MI->getOpcode()))
return;
unsigned VReg = MI->getOperand(0).getReg();
@@ -1030,6 +1347,8 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
case 3: RC = &AMDGPU::VReg_96RegClass; break;
}
+ unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
+ MI->setDesc(TII->get(NewOpcode));
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MRI.setRegClass(VReg, RC);
}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index b4202c4..9933ece 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -21,13 +21,19 @@
namespace llvm {
class SITargetLowering : public AMDGPUTargetLowering {
- SDValue LowerParameter(SelectionDAG &DAG, EVT VT, SDLoc DL,
+ SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL,
SDValue Chain, unsigned Offset) const;
+ SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
+ SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const;
bool foldImm(SDValue &Operand, int32_t &Immediate,
bool &ScalarSlotUsed) const;
const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
@@ -44,6 +50,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
public:
SITargetLowering(TargetMachine &tm);
bool allowsUnalignedMemoryAccesses(EVT VT, bool *IsFast) const;
+ virtual bool shouldSplitVectorElementType(EVT VT) const;
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
@@ -55,6 +62,7 @@ public:
MachineBasicBlock * BB) const;
virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
virtual MVT getScalarShiftAmountTy(EVT VT) const;
+ virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index ba202e3..7ef662e 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -134,14 +134,19 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
// LGKM may uses larger values
if (TSFlags & SIInstrFlags::LGKM_CNT) {
- MachineOperand &Op = MI.getOperand(0);
- if (!Op.isReg())
- Op = MI.getOperand(1);
- assert(Op.isReg() && "First LGKM operand must be a register!");
+ if (TII->isSMRD(MI.getOpcode())) {
- unsigned Reg = Op.getReg();
- unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
- Result.Named.LGKM = Size > 4 ? 2 : 1;
+ MachineOperand &Op = MI.getOperand(0);
+ assert(Op.isReg() && "First LGKM operand must be a register!");
+
+ unsigned Reg = Op.getReg();
+ unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
+ Result.Named.LGKM = Size > 4 ? 2 : 1;
+
+ } else {
+ // DS
+ Result.Named.LGKM = 1;
+ }
} else {
Result.Named.LGKM = 0;
@@ -181,7 +186,7 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
- if (!Op.isReg())
+ if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
return std::make_pair(0, 0);
unsigned Reg = Op.getReg();
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 434aa7e..53ebaaf 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -17,10 +17,24 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> VM_CNT = 0;
field bits<1> EXP_CNT = 0;
field bits<1> LGKM_CNT = 0;
+ field bits<1> MIMG = 0;
+ field bits<1> SMRD = 0;
+ field bits<1> VOP1 = 0;
+ field bits<1> VOP2 = 0;
+ field bits<1> VOP3 = 0;
+ field bits<1> VOPC = 0;
+ field bits<1> SALU = 0;
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
let TSFlags{2} = LGKM_CNT;
+ let TSFlags{3} = MIMG;
+ let TSFlags{4} = SMRD;
+ let TSFlags{5} = VOP1;
+ let TSFlags{6} = VOP2;
+ let TSFlags{7} = VOP3;
+ let TSFlags{8} = VOPC;
+ let TSFlags{9} = SALU;
}
class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -55,6 +69,7 @@ class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -73,6 +88,7 @@ class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -90,6 +106,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -106,6 +123,7 @@ class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
@@ -123,6 +141,7 @@ class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
@@ -140,6 +159,7 @@ class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
let Inst{31-27} = 0x18; //encoding
let LGKM_CNT = 1;
+ let SMRD = 1;
}
//===----------------------------------------------------------------------===//
@@ -162,6 +182,8 @@ class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP1 = 1;
}
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -180,6 +202,8 @@ class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP2 = 1;
}
class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -208,6 +232,8 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP3 = 1;
}
class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -234,6 +260,8 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP3 = 1;
}
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
@@ -251,6 +279,7 @@ class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let VOPC = 1;
}
class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -414,6 +443,7 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let VM_CNT = 1;
let EXP_CNT = 1;
+ let MIMG = 1;
}
def EXP : Enc64<
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 551ae86..ab55c1b 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -15,10 +15,10 @@
#include "SIInstrInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "SIDefines.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrDesc.h"
-#include <stdio.h>
using namespace llvm;
@@ -31,6 +31,10 @@ const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
return RI;
}
+//===----------------------------------------------------------------------===//
+// TargetInstrInfo callbacks
+//===----------------------------------------------------------------------===//
+
void
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
@@ -118,14 +122,14 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
- AMDGPU::SReg_32RegClass.contains(SrcReg));
+ AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
} else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
- AMDGPU::SReg_64RegClass.contains(SrcReg));
+ AMDGPU::SReg_64RegClass.contains(SrcReg));
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_1;
@@ -136,19 +140,19 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
- AMDGPU::SReg_128RegClass.contains(SrcReg));
+ AMDGPU::SReg_128RegClass.contains(SrcReg));
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_3;
} else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
- AMDGPU::SReg_256RegClass.contains(SrcReg));
+ AMDGPU::SReg_256RegClass.contains(SrcReg));
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_7;
} else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
- AMDGPU::SReg_512RegClass.contains(SrcReg));
+ AMDGPU::SReg_512RegClass.contains(SrcReg));
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_15;
@@ -168,7 +172,6 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
-
int NewOpc;
// Try to map original to commuted opcode
@@ -185,11 +188,36 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
- if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg() ||
- !MI->getOperand(2).isReg())
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg())
return 0;
- MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ // Cannot commute VOP2 if src0 is SGPR.
+ if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg())))
+ return 0;
+
+ if (!MI->getOperand(2).isReg()) {
+ // XXX: Commute instructions with FPImm operands
+ if (NewMI || MI->getOperand(2).isFPImm() ||
+ (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
+ return 0;
+ }
+
+ // XXX: Commute VOP3 instructions with abs and neg set.
+ if (isVOP3(MI->getOpcode()) &&
+ (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::abs)).getImm() ||
+ MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::neg)).getImm()))
+ return 0;
+
+ unsigned Reg = MI->getOperand(1).getReg();
+ MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
+ MI->getOperand(2).ChangeToRegister(Reg, false);
+ } else {
+ MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ }
if (MI)
MI->setDesc(get(commuteOpcode(MI->getOpcode())));
@@ -197,15 +225,12 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
return MI;
}
-MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const {
- MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_B32_e32), DebugLoc());
- MachineInstrBuilder MIB(*MF, MI);
- MIB.addReg(DstReg, RegState::Define);
- MIB.addImm(Imm);
-
- return MI;
-
+MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg,
+ unsigned SrcReg) const {
+ return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
+ DstReg) .addReg(SrcReg);
}
bool SIInstrInfo::isMov(unsigned Opcode) const {
@@ -224,32 +249,397 @@ SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
return RC != &AMDGPU::EXECRegRegClass;
}
-//===----------------------------------------------------------------------===//
-// Indirect addressing callbacks
-//===----------------------------------------------------------------------===//
+int SIInstrInfo::isMIMG(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::MIMG;
+}
-unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
- unsigned Channel) const {
- assert(Channel == 0);
- return RegIndex;
+int SIInstrInfo::isSMRD(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SMRD;
+}
+
+bool SIInstrInfo::isVOP1(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP1;
+}
+
+bool SIInstrInfo::isVOP2(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP2;
+}
+
+bool SIInstrInfo::isVOP3(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP3;
+}
+
+bool SIInstrInfo::isVOPC(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOPC;
+}
+
+bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU;
+}
+
+bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
+ if(MO.isImm()) {
+ return MO.getImm() >= -16 && MO.getImm() <= 64;
+ }
+ if (MO.isFPImm()) {
+ return MO.getFPImm()->isExactlyValue(0.0) ||
+ MO.getFPImm()->isExactlyValue(0.5) ||
+ MO.getFPImm()->isExactlyValue(-0.5) ||
+ MO.getFPImm()->isExactlyValue(1.0) ||
+ MO.getFPImm()->isExactlyValue(-1.0) ||
+ MO.getFPImm()->isExactlyValue(2.0) ||
+ MO.getFPImm()->isExactlyValue(-2.0) ||
+ MO.getFPImm()->isExactlyValue(4.0) ||
+ MO.getFPImm()->isExactlyValue(-4.0);
+ }
+ return false;
+}
+
+bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
+ return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
+}
+
+bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const {
+ uint16_t Opcode = MI->getOpcode();
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+ int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+ // Verify VOP*
+ if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
+ unsigned ConstantBusCount = 0;
+ unsigned SGPRUsed = AMDGPU::NoRegister;
+ for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+
+ // EXEC register uses the constant bus.
+ if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
+ ++ConstantBusCount;
+
+ // SGPRs use the constant bus
+ if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
+ (!MO.isImplicit() &&
+ (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
+ AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
+ if (SGPRUsed != MO.getReg()) {
+ ++ConstantBusCount;
+ SGPRUsed = MO.getReg();
+ }
+ }
+ }
+ // Literal constants use the constant bus.
+ if (isLiteralConstant(MO))
+ ++ConstantBusCount;
+ }
+ if (ConstantBusCount > 1) {
+ ErrInfo = "VOP* instruction uses the constant bus more than once";
+ return false;
+ }
+ }
+
+ // Verify SRC1 for VOP2 and VOPC
+ if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
+ const MachineOperand &Src1 = MI->getOperand(Src1Idx);
+ if (Src1.isImm() || Src1.isFPImm()) {
+ ErrInfo = "VOP[2C] src1 cannot be an immediate.";
+ return false;
+ }
+ }
+
+ // Verify VOP3
+ if (isVOP3(Opcode)) {
+ if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
+ ErrInfo = "VOP3 src0 cannot be a literal constant.";
+ return false;
+ }
+ if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
+ ErrInfo = "VOP3 src1 cannot be a literal constant.";
+ return false;
+ }
+ if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
+ ErrInfo = "VOP3 src2 cannot be a literal constant.";
+ return false;
+ }
+ }
+ return true;
+}
+
+unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default: return AMDGPU::INSTRUCTION_LIST_END;
+ case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
+ case AMDGPU::COPY: return AMDGPU::COPY;
+ case AMDGPU::PHI: return AMDGPU::PHI;
+ case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
+ case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
+ case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
+ case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
+ case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
+ case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
+ case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
+ case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
+ case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
+ case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
+ }
+}
+
+bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
+ return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
+}
+
+const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ const MCInstrDesc &Desc = get(MI.getOpcode());
+ if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
+ Desc.OpInfo[OpNo].RegClass == -1)
+ return MRI.getRegClass(MI.getOperand(OpNo).getReg());
+
+ unsigned RCID = Desc.OpInfo[OpNo].RegClass;
+ return RI.getRegClass(RCID);
+}
+
+bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::REG_SEQUENCE:
+ return RI.hasVGPRs(getOpRegClass(MI, 0));
+ default:
+ return RI.hasVGPRs(getOpRegClass(MI, OpNo));
+ }
}
+void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
+ MachineBasicBlock::iterator I = MI;
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
+ const TargetRegisterClass *RC = RI.getRegClass(RCID);
+ unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+ if (MO.isReg()) {
+ Opcode = AMDGPU::COPY;
+ } else if (RI.isSGPRClass(RC)) {
+ Opcode = AMDGPU::S_MOV_B32;
+ }
+
+ const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
+ unsigned Reg = MRI.createVirtualRegister(VRC);
+ BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
+ Reg).addOperand(MO);
+ MO.ChangeToRegister(Reg, false);
+}
+
+void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src0);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src1);
+ int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src2);
+
+ // Legalize VOP2
+ if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
+ MachineOperand &Src0 = MI->getOperand(Src0Idx);
+ MachineOperand &Src1 = MI->getOperand(Src1Idx);
+
+ // If the instruction implicitly reads VCC, we can't have any SGPR operands,
+ // so move any.
+ bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI);
+ if (ReadsVCC && Src0.isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) {
+ legalizeOpWithMove(MI, Src0Idx);
+ return;
+ }
+
+ if (ReadsVCC && Src1.isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
+ legalizeOpWithMove(MI, Src1Idx);
+ return;
+ }
+
+ // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must
+ // be the first operand, and there can only be one.
+ if (Src1.isImm() || Src1.isFPImm() ||
+ (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
+ if (MI->isCommutable()) {
+ if (commuteInstruction(MI))
+ return;
+ }
+ legalizeOpWithMove(MI, Src1Idx);
+ }
+ }
+
+ // XXX - Do any VOP3 instructions read VCC?
+ // Legalize VOP3
+ if (isVOP3(MI->getOpcode())) {
+ int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
+ unsigned SGPRReg = AMDGPU::NoRegister;
+ for (unsigned i = 0; i < 3; ++i) {
+ int Idx = VOP3Idx[i];
+ if (Idx == -1)
+ continue;
+ MachineOperand &MO = MI->getOperand(Idx);
+
+ if (MO.isReg()) {
+ if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+ continue; // VGPRs are legal
+
+ assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
+
+ if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
+ SGPRReg = MO.getReg();
+ // We can use one SGPR in each VOP3 instruction.
+ continue;
+ }
+ } else if (!isLiteralConstant(MO)) {
+ // If it is not a register and not a literal constant, then it must be
+ // an inline constant which is always legal.
+ continue;
+ }
+ // If we make it this far, then the operand is not legal and we must
+ // legalize it.
+ legalizeOpWithMove(MI, Idx);
+ }
+ }
+
+ // Legalize REG_SEQUENCE
+ // The register class of the operands much be the same type as the register
+ // class of the output.
+ if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ const TargetRegisterClass *RC = NULL, *SRC = NULL, *VRC = NULL;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
+ if (!MI->getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+ continue;
+ const TargetRegisterClass *OpRC =
+ MRI.getRegClass(MI->getOperand(i).getReg());
+ if (RI.hasVGPRs(OpRC)) {
+ VRC = OpRC;
+ } else {
+ SRC = OpRC;
+ }
+ }
+
+ // If any of the operands are VGPR registers, then they all most be
+ // otherwise we will create illegal VGPR->SGPR copies when legalizing
+ // them.
+ if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
+ if (!VRC) {
+ assert(SRC);
+ VRC = RI.getEquivalentVGPRClass(SRC);
+ }
+ RC = VRC;
+ } else {
+ RC = SRC;
+ }
-int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
- llvm_unreachable("Unimplemented");
+ // Update all the operands so they have the same type.
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
+ if (!MI->getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+ continue;
+ unsigned DstReg = MRI.createVirtualRegister(RC);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(AMDGPU::COPY), DstReg)
+ .addOperand(MI->getOperand(i));
+ MI->getOperand(i).setReg(DstReg);
+ }
+ }
}
-int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
- llvm_unreachable("Unimplemented");
+void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
+ SmallVector<MachineInstr *, 128> Worklist;
+ Worklist.push_back(&TopInst);
+
+ while (!Worklist.empty()) {
+ MachineInstr *Inst = Worklist.pop_back_val();
+ unsigned NewOpcode = getVALUOp(*Inst);
+ if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
+ continue;
+
+ MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo();
+
+ // Use the new VALU Opcode.
+ const MCInstrDesc &NewDesc = get(NewOpcode);
+ Inst->setDesc(NewDesc);
+
+ // Remove any references to SCC. Vector instructions can't read from it, and
+ // We're just about to add the implicit use / defs of VCC, and we don't want
+ // both.
+ for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
+ MachineOperand &Op = Inst->getOperand(i);
+ if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
+ Inst->RemoveOperand(i);
+ }
+
+ // Add the implict and explicit register definitions.
+ if (NewDesc.ImplicitUses) {
+ for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
+ unsigned Reg = NewDesc.ImplicitUses[i];
+ Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
+ }
+ }
+
+ if (NewDesc.ImplicitDefs) {
+ for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
+ unsigned Reg = NewDesc.ImplicitDefs[i];
+ Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+
+ legalizeOperands(Inst);
+
+ // Update the destination register class.
+ const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
+
+ switch (Inst->getOpcode()) {
+ // For target instructions, getOpRegClass just returns the virtual
+ // register class associated with the operand, so we need to find an
+ // equivalent VGPR register class in order to move the instruction to the
+ // VALU.
+ case AMDGPU::COPY:
+ case AMDGPU::PHI:
+ case AMDGPU::REG_SEQUENCE:
+ if (RI.hasVGPRs(NewDstRC))
+ continue;
+ NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+ if (!NewDstRC)
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ unsigned DstReg = Inst->getOperand(0).getReg();
+ unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
+ MRI.replaceRegWith(DstReg, NewDstReg);
+
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
+ E = MRI.use_end(); I != E; ++I) {
+ MachineInstr &UseMI = *I;
+ if (!canReadVGPR(UseMI, I.getOperandNo())) {
+ Worklist.push_back(&UseMI);
+ }
+ }
+ }
}
-const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
- unsigned SourceReg) const {
- llvm_unreachable("Unimplemented");
+//===----------------------------------------------------------------------===//
+// Indirect addressing callbacks
+//===----------------------------------------------------------------------===//
+
+unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const {
+ assert(Channel == 0);
+ return RegIndex;
}
-const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
- llvm_unreachable("Unimplemented");
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
+ return &AMDGPU::VReg_32RegClass;
}
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
@@ -257,7 +647,17 @@ MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
MachineBasicBlock::iterator I,
unsigned ValueReg,
unsigned Address, unsigned OffsetReg) const {
- llvm_unreachable("Unimplemented");
+ const DebugLoc &DL = MBB->findDebugLoc(I);
+ unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
+ getIndirectIndexBegin(*MBB->getParent()));
+
+ return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
+ .addReg(IndirectBaseReg, RegState::Define)
+ .addOperand(I->getOperand(0))
+ .addReg(IndirectBaseReg)
+ .addReg(OffsetReg)
+ .addImm(0)
+ .addReg(ValueReg);
}
MachineInstrBuilder SIInstrInfo::buildIndirectRead(
@@ -265,9 +665,43 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead(
MachineBasicBlock::iterator I,
unsigned ValueReg,
unsigned Address, unsigned OffsetReg) const {
- llvm_unreachable("Unimplemented");
+ const DebugLoc &DL = MBB->findDebugLoc(I);
+ unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
+ getIndirectIndexBegin(*MBB->getParent()));
+
+ return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
+ .addOperand(I->getOperand(0))
+ .addOperand(I->getOperand(1))
+ .addReg(IndirectBaseReg)
+ .addReg(OffsetReg)
+ .addImm(0);
+
}
-const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
- llvm_unreachable("Unimplemented");
+void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
+ const MachineFunction &MF) const {
+ int End = getIndirectIndexEnd(MF);
+ int Begin = getIndirectIndexBegin(MF);
+
+ if (End == -1)
+ return;
+
+
+ for (int Index = Begin; Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 87eff4d..4af6348 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
+//===-- SIInstrInfo.h - SI Instruction Info Interface -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -25,6 +25,14 @@ class SIInstrInfo : public AMDGPUInstrInfo {
private:
const SIRegisterInfo RI;
+ MachineInstrBuilder buildIndirectIndexLoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned OffsetVGPR,
+ unsigned MovRelOp,
+ unsigned Dst,
+ unsigned Src0) const;
+ // If you add or remove instructions from this function, you will
+
public:
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
@@ -40,25 +48,65 @@ public:
virtual MachineInstr *commuteInstruction(MachineInstr *MI,
bool NewMI=false) const;
- virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const;
-
virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
+ MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg) const;
virtual bool isMov(unsigned Opcode) const;
virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
-
- virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
-
- virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+ int isMIMG(uint16_t Opcode) const;
+ int isSMRD(uint16_t Opcode) const;
+ bool isVOP1(uint16_t Opcode) const;
+ bool isVOP2(uint16_t Opcode) const;
+ bool isVOP3(uint16_t Opcode) const;
+ bool isVOPC(uint16_t Opcode) const;
+ bool isInlineConstant(const MachineOperand &MO) const;
+ bool isLiteralConstant(const MachineOperand &MO) const;
+
+ virtual bool verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const;
+
+ bool isSALUInstr(const MachineInstr &MI) const;
+ static unsigned getVALUOp(const MachineInstr &MI);
+ bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const;
+
+ /// \brief Return the correct register class for \p OpNo. For target-specific
+ /// instructions, this will return the register class that has been defined
+ /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
+ /// the register class of its machine operand.
+ /// to infer the correct register class base on the other operands.
+ const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
+ unsigned OpNo) const;\
+
+ /// \returns true if it is legal for the operand at index \p OpNo
+ /// to read a VGPR.
+ bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const;
+
+ /// \brief Legalize the \p OpIndex operand of this instruction by inserting
+ /// a MOV. For example:
+ /// ADD_I32_e32 VGPR0, 15
+ /// to
+ /// MOV VGPR1, 15
+ /// ADD_I32_e32 VGPR0, VGPR1
+ ///
+ /// If the operand being legalized is a register, then a COPY will be used
+ /// instead of MOV.
+ void legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const;
+
+ /// \brief Legalize all operands in this instruction. This function may
+ /// create new instruction and insert them before \p MI.
+ void legalizeOperands(MachineInstr *MI) const;
+
+ /// \brief Replace this instruction's opcode with the equivalent VALU
+ /// opcode. This function will also move the users of \p MI to the
+ /// VALU if necessary.
+ void moveToVALU(MachineInstr &MI) const;
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
unsigned Channel) const;
- virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
- unsigned SourceReg) const;
-
- virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+ virtual const TargetRegisterClass *getIndirectAddrRegClass() const;
virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
@@ -71,16 +119,18 @@ public:
unsigned ValueReg,
unsigned Address,
unsigned OffsetReg) const;
+ void reserveIndirectRegisters(BitVector &Reserved,
+ const MachineFunction &MF) const;
- virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
- };
+ void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
+ unsigned SavReg, unsigned IndexReg) const;
+};
namespace AMDGPU {
int getVOPe64(uint16_t Opcode);
int getCommuteRev(uint16_t Opcode);
int getCommuteOrig(uint16_t Opcode);
- int isMIMG(uint16_t Opcode);
} // End namespace AMDGPU
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 52af79c..4cd0daa 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -16,6 +16,45 @@ def SIadd64bit32bit : SDNode<"ISD::ADD",
SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]>
>;
+def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
+ SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>,
+ [SDNPMayLoad, SDNPMemOperand]
+>;
+
+def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
+ SDTypeProfile<0, 13,
+ [SDTCisVT<0, i128>, // rsrc(SGPR)
+ SDTCisVT<1, iAny>, // vdata(VGPR)
+ SDTCisVT<2, i32>, // num_channels(imm)
+ SDTCisVT<3, i32>, // vaddr(VGPR)
+ SDTCisVT<4, i32>, // soffset(SGPR)
+ SDTCisVT<5, i32>, // inst_offset(imm)
+ SDTCisVT<6, i32>, // dfmt(imm)
+ SDTCisVT<7, i32>, // nfmt(imm)
+ SDTCisVT<8, i32>, // offen(imm)
+ SDTCisVT<9, i32>, // idxen(imm)
+ SDTCisVT<10, i32>, // glc(imm)
+ SDTCisVT<11, i32>, // slc(imm)
+ SDTCisVT<12, i32> // tfe(imm)
+ ]>,
+ [SDNPMayStore, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>,
+ SDTCisVT<3, i32>]>
+>;
+
+class SDSample<string opcode> : SDNode <opcode,
+ SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
+ SDTCisVT<3, i128>, SDTCisVT<4, i32>]>
+>;
+
+def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
+def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
+def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
+def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
+
// Transformation function, extract the lower 32bit of a 64bit immediate
def LO32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
@@ -45,6 +84,14 @@ def IMM8bitDWORD : ImmLeaf <
}]>
>;
+def as_i1imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i1);
+}]>;
+
+def as_i8imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i8);
+}]>;
+
def as_i16imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i16);
}]>;
@@ -58,6 +105,26 @@ class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
(*(const SITargetLowering *)getTargetLowering()).analyzeImmediate(N) == 0;
}]>;
+class SGPRImm <dag frag> : PatLeaf<frag, [{
+ if (TM.getSubtarget<AMDGPUSubtarget>().getGeneration() <
+ AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ return false;
+ }
+ const SIRegisterInfo *SIRI =
+ static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+ for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
+ U != E; ++U) {
+ if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) {
+ return true;
+ }
+ }
+ return false;
+}]>;
+
+def FRAMEri64 : Operand<iPTR> {
+ let MIOperandInfo = (ops SReg_32:$ptr, i32imm:$index);
+}
+
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
@@ -109,6 +176,11 @@ class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
opName#" $dst, $src0, $src1", pattern
>;
+class SOP2_SHIFT_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+ op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+
class SOPC_32 <bits<7> op, string opName, list<dag> pattern> : SOPC <
op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
opName#" $dst, $src0, $src1", pattern
@@ -184,6 +256,12 @@ multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
: VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+multiclass VOP1_32_64 <bits<8> op, string opName, list<dag> pattern>
+ : VOP1_Helper <op, VReg_32, VSrc_64, opName, pattern>;
+
+multiclass VOP1_64_32 <bits<8> op, string opName, list<dag> pattern>
+ : VOP1_Helper <op, VReg_64, VSrc_32, opName, pattern>;
+
multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
string opName, list<dag> pattern, string revOp> {
def _e32 : VOP2 <
@@ -320,6 +398,18 @@ class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
let vdst = 0;
}
+class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS <
+ op,
+ (outs rc:$vdst),
+ (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, i8imm:$offset0,
+ i8imm:$offset1),
+ asm#" $gds, $vdst, $addr, $data0, $offset0, $offset1, [M0]",
+ []> {
+ let mayStore = 1;
+ let mayLoad = 1;
+ let data1 = 0;
+}
+
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs),
@@ -358,9 +448,9 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
}
}
-class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
- ValueType VT> :
- MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
+class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
+ MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
+ i16imm:$offset),
name#" $vdata, $srsrc + $vaddr + $offset",
[]> {
@@ -391,11 +481,18 @@ class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF
let mayStore = 0;
}
-class MIMG_NoSampler_Helper <bits<7> op, string asm> : MIMG <
+class MIMG_Mask <string op, int channels> {
+ string Op = op;
+ int Channels = channels;
+}
+
+class MIMG_NoSampler_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ RegisterClass src_rc> : MIMG <
op,
- (outs VReg_128:$vdata),
+ (outs dst_rc:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
- i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
SReg_256:$srsrc),
asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
#" $tfe, $lwe, $slc, $vaddr, $srsrc",
@@ -406,11 +503,31 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm> : MIMG <
let hasPostISelHook = 1;
}
-class MIMG_Sampler_Helper <bits<7> op, string asm> : MIMG <
+multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ int channels> {
+ def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_32>,
+ MIMG_Mask<asm#"_V1", channels>;
+ def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>,
+ MIMG_Mask<asm#"_V2", channels>;
+ def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>,
+ MIMG_Mask<asm#"_V4", channels>;
+}
+
+multiclass MIMG_NoSampler <bits<7> op, string asm> {
+ defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VReg_32, 1>;
+ defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2>;
+ defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3>;
+ defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4>;
+}
+
+class MIMG_Sampler_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ RegisterClass src_rc> : MIMG <
op,
- (outs VReg_128:$vdata),
+ (outs dst_rc:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
- i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
SReg_256:$srsrc, SReg_128:$ssamp),
asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
#" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
@@ -420,6 +537,28 @@ class MIMG_Sampler_Helper <bits<7> op, string asm> : MIMG <
let hasPostISelHook = 1;
}
+multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ int channels> {
+ def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_32>,
+ MIMG_Mask<asm#"_V1", channels>;
+ def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64>,
+ MIMG_Mask<asm#"_V2", channels>;
+ def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128>,
+ MIMG_Mask<asm#"_V4", channels>;
+ def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256>,
+ MIMG_Mask<asm#"_V8", channels>;
+ def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512>,
+ MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Sampler <bits<7> op, string asm> {
+ defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VReg_32, 1>;
+ defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2>;
+ defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3>;
+ defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
+}
+
//===----------------------------------------------------------------------===//
// Vector instruction mappings
//===----------------------------------------------------------------------===//
@@ -442,6 +581,14 @@ def getCommuteRev : InstrMapping {
let ValueCols = [["0"]];
}
+def getMaskedMIMGOp : InstrMapping {
+ let FilterClass = "MIMG_Mask";
+ let RowFields = ["Op"];
+ let ColFields = ["Channels"];
+ let KeyCol = ["4"];
+ let ValueCols = [["1"], ["2"], ["3"] ];
+}
+
// Maps an commuted opcode to its original version
def getCommuteOrig : InstrMapping {
let FilterClass = "VOP2_REV";
@@ -451,13 +598,4 @@ def getCommuteOrig : InstrMapping {
let ValueCols = [["1"]];
}
-// Test if the supplied opcode is an MIMG instruction
-def isMIMG : InstrMapping {
- let FilterClass = "MIMG";
- let RowFields = ["Inst"];
- let ColFields = ["Size"];
- let KeyCol = ["8"];
- let ValueCols = [["8"]];
-}
-
include "SIInstructions.td"
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 500d15e..76f05eb 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -23,7 +23,9 @@ def InterpSlot : Operand<i32> {
}
def isSI : Predicate<"Subtarget.getGeneration() "
- "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+ ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+
+def WAIT_FLAG : InstFlag<"printWaitFlag">;
let Predicates = [isSI] in {
@@ -126,8 +128,11 @@ def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
} // End isCompare = 1
-def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
-def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
+let Defs = [SCC], isCommutable = 1 in {
+ def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
+ def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
+}
+
//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
@@ -138,19 +143,19 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
let isCompare = 1 in {
defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">;
-defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_LT>;
-defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_EQ>;
-defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_LE>;
-defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_GT>;
-defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", f32, COND_NE>;
-defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_GE>;
-defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32">;
-defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32">;
+defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_OLT>;
+defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_OEQ>;
+defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_OLE>;
+defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_OGT>;
+defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32">;
+defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_OGE>;
+defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", f32, COND_O>;
+defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", f32, COND_UO>;
defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32">;
defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32">;
defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32">;
defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32">;
-defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_NE>;
+defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_UNE>;
defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
@@ -176,19 +181,19 @@ defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
-defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_LT>;
-defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", f64, COND_EQ>;
-defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", f64, COND_LE>;
-defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", f64, COND_GT>;
+defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_OLT>;
+defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", f64, COND_OEQ>;
+defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", f64, COND_OLE>;
+defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", f64, COND_OGT>;
defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">;
-defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", f64, COND_GE>;
-defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64">;
-defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64">;
+defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", f64, COND_OGE>;
+defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", f64, COND_O>;
+defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", f64, COND_UO>;
defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">;
defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">;
defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">;
defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">;
-defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_NE>;
+defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_UNE>;
defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
@@ -290,12 +295,12 @@ defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">;
-defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_LT>;
+defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_SLT>;
defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>;
-defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_LE>;
-defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_GT>;
+defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_SLE>;
+defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_SGT>;
defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
-defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_GE>;
+defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_SGE>;
defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
let hasSideEffects = 1, Defs = [EXEC] in {
@@ -312,12 +317,12 @@ defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
-defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64">;
-defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64">;
-defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64">;
-defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64">;
-defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64">;
-defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64">;
+defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", i64, COND_SLT>;
+defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", i64, COND_EQ>;
+defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", i64, COND_SLE>;
+defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", i64, COND_SGT>;
+defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", i64, COND_NE>;
+defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", i64, COND_SGE>;
defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
let hasSideEffects = 1, Defs = [EXEC] in {
@@ -334,12 +339,12 @@ defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
-defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32">;
-defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32">;
-defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32">;
-defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32">;
-defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32">;
-defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32">;
+defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", i32, COND_ULT>;
+defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", i32, COND_EQ>;
+defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", i32, COND_ULE>;
+defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", i32, COND_UGT>;
+defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", i32, COND_NE>;
+defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", i32, COND_UGE>;
defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
let hasSideEffects = 1, Defs = [EXEC] in {
@@ -356,12 +361,12 @@ defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
-defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64">;
-defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64">;
-defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64">;
-defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64">;
-defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64">;
-defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64">;
+defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", i64, COND_ULT>;
+defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", i64, COND_EQ>;
+defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", i64, COND_ULE>;
+defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", i64, COND_UGT>;
+defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", i64, COND_NE>;
+defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", i64, COND_UGE>;
defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
let hasSideEffects = 1, Defs = [EXEC] in {
@@ -391,8 +396,16 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
} // End isCompare = 1
+def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>;
+def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>;
def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
+def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
+def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
+def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>;
+def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>;
+def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>;
+def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>;
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
@@ -409,19 +422,25 @@ defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <0x0000000b, "BUFFER_LOAD_SSHORT", V
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
-//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
-//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
+
+def BUFFER_STORE_BYTE : MUBUF_Store_Helper <
+ 0x00000018, "BUFFER_STORE_BYTE", VReg_32
+>;
+
+def BUFFER_STORE_SHORT : MUBUF_Store_Helper <
+ 0x0000001a, "BUFFER_STORE_SHORT", VReg_32
+>;
def BUFFER_STORE_DWORD : MUBUF_Store_Helper <
- 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32
+ 0x0000001c, "BUFFER_STORE_DWORD", VReg_32
>;
def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
- 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, i64
+ 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64
>;
def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
- 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128, v4i32
+ 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128
>;
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
@@ -463,21 +482,24 @@ def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
-//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
-//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
-//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
-//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
+def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>;
+def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>;
+def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>;
+def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>;
let mayLoad = 1 in {
-defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>;
+// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
+// SMRD instructions, because the SGPR_32 register class does not include M0
+// and writing to M0 from an SMRD instruction will hang the GPU.
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>;
defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
- 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
+ 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32
>;
defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
@@ -500,8 +522,8 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
-//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
-def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">;
+defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "IMAGE_LOAD">;
+defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "IMAGE_LOAD_MIP">;
//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
@@ -510,7 +532,7 @@ def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">;
//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
-def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">;
+defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "IMAGE_GET_RESINFO">;
//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
@@ -528,20 +550,20 @@ def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">;
//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
-def IMAGE_SAMPLE : MIMG_Sampler_Helper <0x00000020, "IMAGE_SAMPLE">;
+defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "IMAGE_SAMPLE">;
//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
-def IMAGE_SAMPLE_D : MIMG_Sampler_Helper <0x00000022, "IMAGE_SAMPLE_D">;
+defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "IMAGE_SAMPLE_D">;
//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
-def IMAGE_SAMPLE_L : MIMG_Sampler_Helper <0x00000024, "IMAGE_SAMPLE_L">;
-def IMAGE_SAMPLE_B : MIMG_Sampler_Helper <0x00000025, "IMAGE_SAMPLE_B">;
+defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "IMAGE_SAMPLE_L">;
+defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "IMAGE_SAMPLE_B">;
//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
-def IMAGE_SAMPLE_C : MIMG_Sampler_Helper <0x00000028, "IMAGE_SAMPLE_C">;
+defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "IMAGE_SAMPLE_C">;
//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
-def IMAGE_SAMPLE_C_D : MIMG_Sampler_Helper <0x0000002a, "IMAGE_SAMPLE_C_D">;
+defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "IMAGE_SAMPLE_C_D">;
//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
-def IMAGE_SAMPLE_C_L : MIMG_Sampler_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
-def IMAGE_SAMPLE_C_B : MIMG_Sampler_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
+defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "IMAGE_SAMPLE_C_L">;
+defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
@@ -603,15 +625,21 @@ defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
} // End neverHasSideEffects = 1, isMoveImm = 1
defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
-//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
-//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
+defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64",
+ [(set i32:$dst, (fp_to_sint f64:$src0))]
+>;
+defm V_CVT_F64_I32 : VOP1_64_32 <0x00000004, "V_CVT_F64_I32",
+ [(set f64:$dst, (sint_to_fp i32:$src0))]
+>;
defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
[(set f32:$dst, (sint_to_fp i32:$src0))]
>;
defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32",
[(set f32:$dst, (uint_to_fp i32:$src0))]
>;
-defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32",
+ [(set i32:$dst, (fp_to_uint f32:$src0))]
+>;
defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
[(set i32:$dst, (fp_to_sint f32:$src0))]
>;
@@ -621,8 +649,12 @@ defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
-//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>;
-//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>;
+defm V_CVT_F32_F64 : VOP1_32_64 <0x0000000f, "V_CVT_F32_F64",
+ [(set f32:$dst, (fround f64:$src0))]
+>;
+defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32",
+ [(set f64:$dst, (fextend f32:$src0))]
+>;
//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
@@ -791,7 +823,7 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
let mayStore = 1;
}
-def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
+def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
[]
>;
} // End hasSideEffects
@@ -827,6 +859,11 @@ def : Pat <
(V_CNDMASK_B32_e64 $src0, $src1, $src2)
>;
+def : Pat <
+ (i32 (trunc i64:$val)),
+ (EXTRACT_SUBREG $val, sub0)
+>;
+
//use two V_CNDMASK_B32_e64 instructions for f64
def : Pat <
(f64 (select i1:$src2, f64:$src1, f64:$src0)),
@@ -910,9 +947,13 @@ defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
+let hasPostISelHook = 1 in {
+
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
>;
+
+}
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
@@ -936,16 +977,13 @@ defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
-defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
- [(set i32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
->;
-
-defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
- [(set i32:$dst, (sub i32:$src0, i32:$src1))]
->;
+// No patterns so that the scalar instructions are always selected.
+// The scalar versions will be replaced with vector when needed later.
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", []>;
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", []>;
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
-let Uses = [VCC] in { // Carry-out comes from VCC
+let Uses = [VCC] in { // Carry-in comes from VCC
defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
@@ -999,8 +1037,12 @@ def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
defm : BFIPatterns <V_BFI_B32>;
-def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
-def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
+def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32",
+ [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))]
+>;
+def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64",
+ [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
+>;
//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
def : ROTRPattern <V_ALIGNBIT_B32>;
@@ -1087,12 +1129,31 @@ def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
+
+let Defs = [SCC] in { // Carry out goes to SCC
+let isCommutable = 1 in {
def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
+def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32",
+ [(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))]
+>;
+} // End isCommutable = 1
+
def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
-def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>;
-def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>;
-def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>;
-def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>;
+def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32",
+ [(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))]
+>;
+
+let Uses = [SCC] in { // Carry in comes from SCC
+let isCommutable = 1 in {
+def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32",
+ [(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
+} // End isCommutable = 1
+
+def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32",
+ [(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
+} // End Uses = [SCC]
+} // End Defs = [SCC]
+
def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
@@ -1124,7 +1185,9 @@ def : Pat <
(S_OR_B64 $src0, $src1)
>;
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
-def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
+def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
+ [(set i1:$dst, (xor i1:$src0, i1:$src1))]
+>;
def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
@@ -1135,12 +1198,31 @@ def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
-def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>;
-def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>;
-def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>;
-def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>;
-def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>;
-def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>;
+
+// Use added complexity so these patterns are preferred to the VALU patterns.
+let AddedComplexity = 1 in {
+
+def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32",
+ [(set i32:$dst, (shl i32:$src0, i32:$src1))]
+>;
+def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64",
+ [(set i64:$dst, (shl i64:$src0, i32:$src1))]
+>;
+def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32",
+ [(set i32:$dst, (srl i32:$src0, i32:$src1))]
+>;
+def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64",
+ [(set i64:$dst, (srl i64:$src0, i32:$src1))]
+>;
+def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32",
+ [(set i32:$dst, (sra i32:$src0, i32:$src1))]
+>;
+def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64",
+ [(set i64:$dst, (sra i64:$src0, i32:$src1))]
+>;
+
+} // End AddedComplexity = 1
+
def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
@@ -1160,7 +1242,7 @@ def LOAD_CONST : AMDGPUShaderInst <
[(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
>;
-// SI Psuedo instructions. These are used by the CFG structurizer pass
+// SI pseudo instructions. These are used by the CFG structurizer pass
// and should be lowered to ISA instructions prior to codegen.
let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
@@ -1233,6 +1315,36 @@ def SI_KILL : InstSI <
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+//defm SI_ : RegisterLoadStore <VReg_32, FRAMEri64, ADDRIndirect>;
+
+let UseNamedOperandTable = 1 in {
+
+def SI_RegisterLoad : AMDGPUShaderInst <
+ (outs VReg_32:$dst, SReg_64:$temp),
+ (ins FRAMEri64:$addr, i32imm:$chan),
+ "", []
+> {
+ let isRegisterLoad = 1;
+ let mayLoad = 1;
+}
+
+class SIRegStore<dag outs> : AMDGPUShaderInst <
+ outs,
+ (ins VReg_32:$val, FRAMEri64:$addr, i32imm:$chan),
+ "", []
+> {
+ let isRegisterStore = 1;
+ let mayStore = 1;
+}
+
+let usesCustomInserter = 1 in {
+def SI_RegisterStorePseudo : SIRegStore<(outs)>;
+} // End usesCustomInserter = 1
+def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
+
+
+} // End UseNamedOperandTable = 1
+
def SI_INDIRECT_SRC : InstSI <
(outs VReg_32:$dst, SReg_64:$temp),
(ins unknown:$src, VSrc_32:$idx, i32imm:$off),
@@ -1249,6 +1361,7 @@ class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
let Constraints = "$src = $dst";
}
+def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VReg_32>;
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
@@ -1258,7 +1371,7 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
let usesCustomInserter = 1 in {
-// This psuedo instruction takes a pointer as input and outputs a resource
+// This pseudo instruction takes a pointer as input and outputs a resource
// constant that can be used with the ADDR64 MUBUF instructions.
def SI_ADDR64_RSRC : InstSI <
(outs SReg_128:$srsrc),
@@ -1289,7 +1402,7 @@ def : Pat <
/* int_SI_vs_load_input */
def : Pat<
- (int_SI_vs_load_input v16i8:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
+ (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset)
>;
@@ -1310,67 +1423,85 @@ def : Pat <
/********** Image sampling patterns **********/
/********** ======================= **********/
-/* int_SI_sample for simple 1D texture lookup */
+/* SIsample for simple 1D texture lookup */
def : Pat <
- (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
- (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
+ (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
+ (IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SamplePattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
+class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, i128:$sampler, imm),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SampleRectPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_RECT),
+class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT),
(opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SampleArrayPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_ARRAY),
+class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SampleShadowPattern<Intrinsic name, MIMG opcode,
+class SampleShadowPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW),
+ (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
+class SampleShadowArrayPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW_ARRAY),
+ (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-/* int_SI_sample* for texture lookups consuming more address parameters */
-multiclass SamplePatterns<ValueType addr_type> {
- def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
- def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
- def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
- def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
- def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
-
- def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
- def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
- def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
- def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
-
- def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
- def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
- def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
- def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
-
- def : SamplePattern <int_SI_sampled, IMAGE_SAMPLE_D, addr_type>;
- def : SampleArrayPattern <int_SI_sampled, IMAGE_SAMPLE_D, addr_type>;
- def : SampleShadowPattern <int_SI_sampled, IMAGE_SAMPLE_C_D, addr_type>;
- def : SampleShadowArrayPattern <int_SI_sampled, IMAGE_SAMPLE_C_D, addr_type>;
+/* SIsample* for texture lookups consuming more address parameters */
+multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l,
+ MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
+MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
+ def : SamplePattern <SIsample, sample, addr_type>;
+ def : SampleRectPattern <SIsample, sample, addr_type>;
+ def : SampleArrayPattern <SIsample, sample, addr_type>;
+ def : SampleShadowPattern <SIsample, sample_c, addr_type>;
+ def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>;
+
+ def : SamplePattern <SIsamplel, sample_l, addr_type>;
+ def : SampleArrayPattern <SIsamplel, sample_l, addr_type>;
+ def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>;
+ def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>;
+
+ def : SamplePattern <SIsampleb, sample_b, addr_type>;
+ def : SampleArrayPattern <SIsampleb, sample_b, addr_type>;
+ def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>;
+ def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>;
+
+ def : SamplePattern <SIsampled, sample_d, addr_type>;
+ def : SampleArrayPattern <SIsampled, sample_d, addr_type>;
+ def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>;
+ def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>;
}
-defm : SamplePatterns<v2i32>;
-defm : SamplePatterns<v4i32>;
-defm : SamplePatterns<v8i32>;
-defm : SamplePatterns<v16i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V2, IMAGE_SAMPLE_C_V4_V2,
+ IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2,
+ IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2,
+ IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2,
+ v2i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V4, IMAGE_SAMPLE_C_V4_V4,
+ IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4,
+ IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4,
+ IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4,
+ v4i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V8, IMAGE_SAMPLE_C_V4_V8,
+ IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8,
+ IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8,
+ IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8,
+ v8i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16,
+ IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16,
+ IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16,
+ IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16,
+ v16i32>;
/* int_SI_imageload for texture fetches consuming varying address parameters */
class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
@@ -1383,23 +1514,46 @@ class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> :
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
>;
-multiclass ImageLoadPatterns<ValueType addr_type> {
- def : ImageLoadPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>;
- def : ImageLoadArrayPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>;
+class ImageLoadMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+ (name addr_type:$addr, v32i8:$rsrc, TEX_MSAA),
+ (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+class ImageLoadArrayMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+ (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY_MSAA),
+ (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+multiclass ImageLoadPatterns<MIMG opcode, ValueType addr_type> {
+ def : ImageLoadPattern <int_SI_imageload, opcode, addr_type>;
+ def : ImageLoadArrayPattern <int_SI_imageload, opcode, addr_type>;
+}
+
+multiclass ImageLoadMSAAPatterns<MIMG opcode, ValueType addr_type> {
+ def : ImageLoadMSAAPattern <int_SI_imageload, opcode, addr_type>;
+ def : ImageLoadArrayMSAAPattern <int_SI_imageload, opcode, addr_type>;
}
-defm : ImageLoadPatterns<v2i32>;
-defm : ImageLoadPatterns<v4i32>;
+defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V2, v2i32>;
+defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V4, v4i32>;
+
+defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V2, v2i32>;
+defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V4, v4i32>;
/* Image resource information */
def : Pat <
(int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm),
- (IMAGE_GET_RESINFO 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+ (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
>;
def : Pat <
(int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY),
- (IMAGE_GET_RESINFO 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+ (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+>;
+
+def : Pat <
+ (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY_MSAA),
+ (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
>;
/********** ============================================ **********/
@@ -1470,16 +1624,6 @@ foreach Index = 0-15 in {
>;
}
-def : Vector1_Build <v1i32, i32, VReg_32>;
-def : Vector2_Build <v2i32, i32>;
-def : Vector2_Build <v2f32, f32>;
-def : Vector4_Build <v4i32, i32>;
-def : Vector4_Build <v4f32, f32>;
-def : Vector8_Build <v8i32, i32>;
-def : Vector8_Build <v8f32, f32>;
-def : Vector16_Build <v16i32, i32>;
-def : Vector16_Build <v16f32, f32>;
-
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <i32, f32, VReg_32>;
@@ -1492,9 +1636,17 @@ def : BitConvert <f64, i64, VReg_64>;
def : BitConvert <v2f32, v2i32, VReg_64>;
def : BitConvert <v2i32, v2f32, VReg_64>;
+def : BitConvert <v2i32, i64, VReg_64>;
def : BitConvert <v4f32, v4i32, VReg_128>;
def : BitConvert <v4i32, v4f32, VReg_128>;
+def : BitConvert <v4i32, i128, VReg_128>;
+def : BitConvert <i128, v4i32, VReg_128>;
+
+def : BitConvert <v8i32, v32i8, SReg_256>;
+def : BitConvert <v32i8, v8i32, SReg_256>;
+def : BitConvert <v8i32, v32i8, VReg_256>;
+def : BitConvert <v32i8, v8i32, VReg_256>;
/********** =================== **********/
/********** Src & Dst modifiers **********/
@@ -1523,6 +1675,16 @@ def : Pat <
/********** ================== **********/
def : Pat <
+ (SGPRImm<(i32 imm)>:$imm),
+ (S_MOV_B32 imm:$imm)
+>;
+
+def : Pat <
+ (SGPRImm<(f32 fpimm)>:$imm),
+ (S_MOV_B32 fpimm:$imm)
+>;
+
+def : Pat <
(i32 imm:$imm),
(V_MOV_B32_e32 imm:$imm)
>;
@@ -1634,19 +1796,19 @@ def : Pat <
// 1. Offset as 8bit DWORD immediate
def : Pat <
- (int_SI_load_const v16i8:$sbase, IMM8bitDWORD:$offset),
+ (SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
(S_BUFFER_LOAD_DWORD_IMM $sbase, IMM8bitDWORD:$offset)
>;
// 2. Offset loaded in an 32bit SGPR
def : Pat <
- (int_SI_load_const v16i8:$sbase, imm:$offset),
+ (SIload_constant i128:$sbase, imm:$offset),
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
>;
// 3. Offset in an 32Bit VGPR
def : Pat <
- (int_SI_load_const v16i8:$sbase, i32:$voff),
+ (SIload_constant i128:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff)
>;
@@ -1677,17 +1839,36 @@ def : Pat <
/********** Load/Store Patterns **********/
/********** ======================= **********/
-def : Pat <
- (local_load i64:$src0),
- (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0),
- (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0))
+class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
+ (frag i32:$src0),
+ (vt (inst 0, $src0, $src0, $src0, 0, 0))
>;
+def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
+def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
+def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
+def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
+def : DSReadPat <DS_READ_B32, i32, local_load>;
def : Pat <
- (local_store i32:$src1, i64:$src0),
- (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0)
+ (local_load i32:$src0),
+ (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0))
+>;
+
+class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
+ (frag i32:$src1, i32:$src0),
+ (inst 0, $src0, $src1, $src1, 0, 0)
>;
+def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
+def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
+def : DSWritePat <DS_WRITE_B32, i32, local_store>;
+
+def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
+ (DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>;
+
+def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val),
+ (DS_SUB_U32_RTN 0, $ptr, $val, 0, 0)>;
+
/********** ================== **********/
/********** SMRD Patterns **********/
/********** ================== **********/
@@ -1717,8 +1898,11 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
+defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, i128>;
+defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
//===----------------------------------------------------------------------===//
// MUBUF Patterns
@@ -1766,23 +1950,46 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32,
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32,
global_load, constant_load>;
-multiclass MUBUFStore_Pattern <MUBUF Instr, ValueType vt> {
+multiclass MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> {
def : Pat <
- (global_store vt:$value, i64:$ptr),
+ (st vt:$value, i64:$ptr),
(Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, 0)
>;
def : Pat <
- (global_store vt:$value, (add i64:$ptr, i64:$offset)),
+ (st vt:$value, (add i64:$ptr, i64:$offset)),
(Instr $value, (SI_ADDR64_RSRC $ptr), $offset, 0)
>;
}
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORD, i32>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE, i32, truncstorei8_global>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT, i32, truncstorei16_global>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_DWORD, i32, global_store>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
+
+//===----------------------------------------------------------------------===//
+// MTBUF Patterns
+//===----------------------------------------------------------------------===//
+
+// TBUFFER_STORE_FORMAT_*, addr64=0
+class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
+ (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
+ i32:$soffset, imm:$inst_offset, imm:$dfmt,
+ imm:$nfmt, imm:$offen, imm:$idxen,
+ imm:$glc, imm:$slc, imm:$tfe),
+ (opcode
+ $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
+ (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
+ (as_i1imm $slc), (as_i1imm $tfe), $soffset)
+>;
+
+def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
+def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
+def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
+def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
/********** ====================== **********/
/********** Indirect adressing **********/
@@ -1834,6 +2041,37 @@ def : Pat<
(V_CMP_U_F32_e64 $src0, $src1)
>;
+//===----------------------------------------------------------------------===//
+// Miscellaneous Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+ (i64 (trunc i128:$x)),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 (EXTRACT_SUBREG $x, sub0)), sub0),
+ (i32 (EXTRACT_SUBREG $x, sub1)), sub1)
+>;
+
+def : Pat <
+ (i32 (trunc i64:$a)),
+ (EXTRACT_SUBREG $a, sub0)
+>;
+
+// V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector
+// case, the sgpr-copies pass will fix this to use the vector version.
+def : Pat <
+ (i32 (addc i32:$src0, i32:$src1)),
+ (S_ADD_I32 $src0, $src1)
+>;
+
+def : Pat <
+ (or i64:$a, i64:$b),
+ (INSERT_SUBREG
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (V_OR_B32_e32 (EXTRACT_SUBREG $a, sub0), (EXTRACT_SUBREG $b, sub0)), sub0),
+ (V_OR_B32_e32 (EXTRACT_SUBREG $a, sub1), (EXTRACT_SUBREG $b, sub1)), sub1)
+>;
+
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 2fa073e..7fcc964 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -17,10 +17,28 @@ let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
- def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
+ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
- class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
+ def int_SI_tbuffer_store : Intrinsic <
+ [],
+ [llvm_anyint_ty, // rsrc(SGPR)
+ llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32
+ llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW
+ llvm_i32_ty, // vaddr(VGPR)
+ llvm_i32_ty, // soffset(SGPR)
+ llvm_i32_ty, // inst_offset(imm)
+ llvm_i32_ty, // dfmt(imm)
+ llvm_i32_ty, // nfmt(imm)
+ llvm_i32_ty, // offen(imm)
+ llvm_i32_ty, // idxen(imm)
+ llvm_i32_ty, // glc(imm)
+ llvm_i32_ty, // slc(imm)
+ llvm_i32_ty], // tfe(imm)
+ []>;
+
+ class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_sample : Sample;
def int_SI_sampleb : Sample;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index c2e8f02..958763d 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -377,10 +377,13 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Vec = MI.getOperand(2).getReg();
unsigned Off = MI.getOperand(4).getImm();
+ unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
+ if (!SubReg)
+ SubReg = Vec;
- MachineInstr *MovRel =
+ MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
- .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+ .addReg(SubReg + Off)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Vec, RegState::Implicit);
@@ -395,10 +398,13 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Off = MI.getOperand(4).getImm();
unsigned Val = MI.getOperand(5).getReg();
+ unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
+ if (!SubReg)
+ SubReg = Dst;
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
- .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+ .addReg(SubReg + Off, RegState::Define)
.addReg(Val)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Dst, RegState::Implicit);
@@ -409,6 +415,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
bool NeedM0 = false;
@@ -476,6 +483,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
IndirectSrc(MI);
break;
+ case AMDGPU::SI_INDIRECT_DST_V1:
case AMDGPU::SI_INDIRECT_DST_V2:
case AMDGPU::SI_INDIRECT_DST_V4:
case AMDGPU::SI_INDIRECT_DST_V8:
@@ -487,6 +495,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
NeedWQM = true;
// Fall through
case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_ADD_U32_RTN:
NeedM0 = true;
break;
@@ -508,7 +517,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
AMDGPU::M0).addImm(0xffffffff);
}
- if (NeedWQM) {
+ if (NeedWQM && MFI->ShaderType != ShaderType::COMPUTE) {
MachineBasicBlock &MBB = MF.front();
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
AMDGPU::EXEC).addReg(AMDGPU::EXEC);
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
index ee0e307..071f9fa 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -13,6 +13,10 @@
using namespace llvm;
+
+// Pin the vtable to this file.
+void SIMachineFunctionInfo::anchor() {}
+
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
PSInputAddr(0) { }
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
index 6da9f7f..2f1961c 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -22,6 +22,7 @@ namespace llvm {
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo : public AMDGPUMachineFunction {
+ virtual void anchor();
public:
SIMachineFunctionInfo(const MachineFunction &MF);
unsigned PSInputAddr;
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 50fd4c7..ed0bbaf 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -15,6 +15,7 @@
#include "SIRegisterInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "SIInstrInfo.h"
using namespace llvm;
@@ -25,6 +26,10 @@ SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm)
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ Reserved.set(AMDGPU::EXEC);
+ Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(TM.getInstrInfo());
+ TII->reserveIndirectRegisters(Reserved, MF);
return Reserved;
}
@@ -50,6 +55,10 @@ const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
}
}
+unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
+ return getEncodingValue(Reg);
+}
+
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
assert(!TargetRegisterInfo::isVirtualRegister(Reg));
@@ -70,3 +79,53 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
}
return NULL;
}
+
+bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) const {
+ if (!RC) {
+ return false;
+ }
+ return !hasVGPRs(RC);
+}
+
+bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
+ return getCommonSubClass(&AMDGPU::VReg_32RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_512RegClass, RC);
+}
+
+const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
+ const TargetRegisterClass *SRC) const {
+ if (hasVGPRs(SRC)) {
+ return SRC;
+ } else if (SRC == &AMDGPU::SCCRegRegClass) {
+ return &AMDGPU::VCCRegRegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
+ return &AMDGPU::VReg_32RegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
+ return &AMDGPU::VReg_64RegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) {
+ return &AMDGPU::VReg_128RegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) {
+ return &AMDGPU::VReg_256RegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) {
+ return &AMDGPU::VReg_512RegClass;
+ }
+ return NULL;
+}
+
+const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
+ const TargetRegisterClass *RC, unsigned SubIdx) const {
+ if (SubIdx == AMDGPU::NoSubRegister)
+ return RC;
+
+ // If this register has a sub-register, we can safely assume it is a 32-bit
+ // register, becuase all of SI's sub-registers are 32-bit.
+ if (isSGPRClass(RC)) {
+ return &AMDGPU::SGPR_32RegClass;
+ } else {
+ return &AMDGPU::VGPR_32RegClass;
+ }
+}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index d0df4f9..8148f7f 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -42,9 +42,27 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// CFGStructurizer
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+ virtual unsigned getHWRegIndex(unsigned Reg) const;
+
/// \brief Return the 'base' register class for this register.
/// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc.
const TargetRegisterClass *getPhysRegClass(unsigned Reg) const;
+
+ /// \returns true if this class contains only SGPR registers
+ bool isSGPRClass(const TargetRegisterClass *RC) const;
+
+ /// \returns true if this class contains VGPR registers.
+ bool hasVGPRs(const TargetRegisterClass *RC) const;
+
+ /// \returns A VGPR reg class with the same width as \p SRC
+ const TargetRegisterClass *getEquivalentVGPRClass(
+ const TargetRegisterClass *SRC) const;
+
+ /// \returns The register class that is used for a sub-register of \p RC for
+ /// the given \p SubIdx. If \p SubIdx equals NoSubRegister, \p RC will
+ /// be returned.
+ const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
+ unsigned SubIdx) const;
};
} // End namespace llvm
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 292b9d2..49bdbc9 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -43,7 +43,7 @@ def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "SGPR%u", 0, 101))>;
// SGPR 64-bit registers
-def SGPR_64 : RegisterTuples<[sub0, sub1],
+def SGPR_64Regs : RegisterTuples<[sub0, sub1],
[(add (decimate (trunc SGPR_32, 101), 2)),
(add (decimate (shl SGPR_32, 1), 2))]>;
@@ -153,15 +153,17 @@ def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add SGPR_32, M0Reg)
>;
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64], 64, (add SGPR_64Regs)>;
+
def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
- (add SGPR_64, VCCReg, EXECReg)
+ (add SGPR_64Regs, VCCReg, EXECReg)
>;
-def SReg_128 : RegisterClass<"AMDGPU", [v16i8, i128], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [i128, v4i32], 128, (add SGPR_128)>;
-def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
+def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
-def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>;
+def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 512, (add SGPR_512)>;
// Register class for all vector registers (VGPRs + Interploation Registers)
def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
@@ -172,9 +174,9 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
let Size = 96;
}
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, i128], 128, (add VGPR_128)>;
-def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
+def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp
new file mode 100644
index 0000000..f194d8b
--- /dev/null
+++ b/lib/Target/R600/SITypeRewriter.cpp
@@ -0,0 +1,162 @@
+//===-- SITypeRewriter.cpp - Remove unwanted types ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass removes performs the following type substitution on all
+/// non-compute shaders:
+///
+/// v16i8 => i128
+/// - v16i8 is used for constant memory resource descriptors. This type is
+/// legal for some compute APIs, and we don't want to declare it as legal
+/// in the backend, because we want the legalizer to expand all v16i8
+/// operations.
+/// v1* => *
+/// - Having v1* types complicates the legalizer and we can easily replace
+/// - them with the element type.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/InstVisitor.h"
+
+using namespace llvm;
+
+namespace {
+
+class SITypeRewriter : public FunctionPass,
+ public InstVisitor<SITypeRewriter> {
+
+ static char ID;
+ Module *Mod;
+ Type *v16i8;
+ Type *i128;
+
+public:
+ SITypeRewriter() : FunctionPass(ID) { }
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+ virtual const char *getPassName() const {
+ return "SI Type Rewriter";
+ }
+ void visitLoadInst(LoadInst &I);
+ void visitCallInst(CallInst &I);
+ void visitBitCast(BitCastInst &I);
+};
+
+} // End anonymous namespace
+
+char SITypeRewriter::ID = 0;
+
+bool SITypeRewriter::doInitialization(Module &M) {
+ Mod = &M;
+ v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
+ i128 = Type::getIntNTy(M.getContext(), 128);
+ return false;
+}
+
+bool SITypeRewriter::runOnFunction(Function &F) {
+ AttributeSet Set = F.getAttributes();
+ Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, "ShaderType");
+
+ unsigned ShaderType = ShaderType::COMPUTE;
+ if (A.isStringAttribute()) {
+ StringRef Str = A.getValueAsString();
+ Str.getAsInteger(0, ShaderType);
+ }
+ if (ShaderType != ShaderType::COMPUTE) {
+ visit(F);
+ }
+
+ visit(F);
+
+ return false;
+}
+
+void SITypeRewriter::visitLoadInst(LoadInst &I) {
+ Value *Ptr = I.getPointerOperand();
+ Type *PtrTy = Ptr->getType();
+ Type *ElemTy = PtrTy->getPointerElementType();
+ IRBuilder<> Builder(&I);
+ if (ElemTy == v16i8) {
+ Value *BitCast = Builder.CreateBitCast(Ptr, Type::getIntNPtrTy(I.getContext(), 128, 2));
+ LoadInst *Load = Builder.CreateLoad(BitCast);
+ SmallVector <std::pair<unsigned, MDNode*>, 8> MD;
+ I.getAllMetadataOtherThanDebugLoc(MD);
+ for (unsigned i = 0, e = MD.size(); i != e; ++i) {
+ Load->setMetadata(MD[i].first, MD[i].second);
+ }
+ Value *BitCastLoad = Builder.CreateBitCast(Load, I.getType());
+ I.replaceAllUsesWith(BitCastLoad);
+ I.eraseFromParent();
+ }
+}
+
+void SITypeRewriter::visitCallInst(CallInst &I) {
+ IRBuilder<> Builder(&I);
+ SmallVector <Value*, 8> Args;
+ SmallVector <Type*, 8> Types;
+ bool NeedToReplace = false;
+ Function *F = I.getCalledFunction();
+ std::string Name = F->getName().str();
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ Value *Arg = I.getArgOperand(i);
+ if (Arg->getType() == v16i8) {
+ Args.push_back(Builder.CreateBitCast(Arg, i128));
+ Types.push_back(i128);
+ NeedToReplace = true;
+ Name = Name + ".i128";
+ } else if (Arg->getType()->isVectorTy() &&
+ Arg->getType()->getVectorNumElements() == 1 &&
+ Arg->getType()->getVectorElementType() ==
+ Type::getInt32Ty(I.getContext())){
+ Type *ElementTy = Arg->getType()->getVectorElementType();
+ std::string TypeName = "i32";
+ InsertElementInst *Def = dyn_cast<InsertElementInst>(Arg);
+ assert(Def);
+ Args.push_back(Def->getOperand(1));
+ Types.push_back(ElementTy);
+ std::string VecTypeName = "v1" + TypeName;
+ Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName);
+ NeedToReplace = true;
+ } else {
+ Args.push_back(Arg);
+ Types.push_back(Arg->getType());
+ }
+ }
+
+ if (!NeedToReplace) {
+ return;
+ }
+ Function *NewF = Mod->getFunction(Name);
+ if (!NewF) {
+ NewF = Function::Create(FunctionType::get(F->getReturnType(), Types, false), GlobalValue::ExternalLinkage, Name, Mod);
+ NewF->setAttributes(F->getAttributes());
+ }
+ I.replaceAllUsesWith(Builder.CreateCall(NewF, Args));
+ I.eraseFromParent();
+}
+
+void SITypeRewriter::visitBitCast(BitCastInst &I) {
+ IRBuilder<> Builder(&I);
+ if (I.getDestTy() != i128) {
+ return;
+ }
+
+ if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
+ if (Op->getSrcTy() == i128) {
+ I.replaceAllUsesWith(Op->getOperand(0));
+ I.eraseFromParent();
+ }
+ }
+}
+
+FunctionPass *llvm::createSITypeRewriter() {
+ return new SITypeRewriter();
+}
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index acf7496..6339394 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -2,6 +2,7 @@ set(LLVM_TARGET_DEFINITIONS Sparc.td)
tablegen(LLVM SparcGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM SparcGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM SparcGenCodeEmitter.inc -gen-emitter)
tablegen(LLVM SparcGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM SparcGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM SparcGenSubtargetInfo.inc -gen-subtarget)
@@ -20,6 +21,8 @@ add_llvm_target(SparcCodeGen
SparcSubtarget.cpp
SparcTargetMachine.cpp
SparcSelectionDAGInfo.cpp
+ SparcJITInfo.cpp
+ SparcCodeEmitter.cpp
)
add_dependencies(LLVMSparcCodeGen SparcCommonTableGen intrinsics_gen)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index b101751..9a0466a 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "delay-slot-filler"
#include "Sparc.h"
+#include "SparcSubtarget.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -39,10 +40,13 @@ namespace {
/// layout, etc.
///
TargetMachine &TM;
+ const SparcSubtarget *Subtarget;
static char ID;
Filler(TargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm) { }
+ : MachineFunctionPass(ID), TM(tm),
+ Subtarget(&TM.getSubtarget<SparcSubtarget>()) {
+ }
virtual const char *getPassName() const {
return "SPARC Delay Slot Filler";
@@ -102,6 +106,8 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
MachineBasicBlock::iterator MI = I;
++I;
@@ -114,6 +120,14 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
continue;
}
+ if (!Subtarget->isV9() &&
+ (MI->getOpcode() == SP::FCMPS || MI->getOpcode() == SP::FCMPD
+ || MI->getOpcode() == SP::FCMPQ)) {
+ BuildMI(MBB, I, MI->getDebugLoc(), TII->get(SP::NOP));
+ Changed = true;
+ continue;
+ }
+
// If MI has no delay slot, skip.
if (!MI->hasDelaySlot())
continue;
@@ -126,7 +140,6 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
++FilledSlots;
Changed = true;
- const TargetInstrInfo *TII = TM.getInstrInfo();
if (D == MBB.end())
BuildMI(MBB, I, MI->getDebugLoc(), TII->get(SP::NOP));
else
@@ -156,7 +169,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
if (slot == MBB.begin())
return MBB.end();
- if (slot->getOpcode() == SP::RET)
+ if (slot->getOpcode() == SP::RET || slot->getOpcode() == SP::TLS_CALL)
return MBB.end();
if (slot->getOpcode() == SP::RETL) {
@@ -342,6 +355,7 @@ bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
case SP::CALL: structSizeOpNum = 1; break;
case SP::JMPLrr:
case SP::JMPLri: structSizeOpNum = 2; break;
+ case SP::TLS_CALL: return false;
}
const MachineOperand &MO = I->getOperand(structSizeOpNum);
diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt
index 7d54d32..fd8e5d9 100644
--- a/lib/Target/Sparc/LLVMBuild.txt
+++ b/lib/Target/Sparc/LLVMBuild.txt
@@ -23,6 +23,7 @@ type = TargetGroup
name = Sparc
parent = Target
has_asmprinter = 1
+has_jit = 1
[component_1]
type = Library
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
index aac0e8d..f3caeaa 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
@@ -53,7 +53,27 @@ enum TOF {
// Extract bits 41-32 of an address.
// Assembler: %hm(addr)
- MO_HM
+ MO_HM,
+
+ // TargetFlags for Thread Local Storage.
+ MO_TLS_GD_HI22,
+ MO_TLS_GD_LO10,
+ MO_TLS_GD_ADD,
+ MO_TLS_GD_CALL,
+ MO_TLS_LDM_HI22,
+ MO_TLS_LDM_LO10,
+ MO_TLS_LDM_ADD,
+ MO_TLS_LDM_CALL,
+ MO_TLS_LDO_HIX22,
+ MO_TLS_LDO_LOX10,
+ MO_TLS_LDO_ADD,
+ MO_TLS_IE_HI22,
+ MO_TLS_IE_LO10,
+ MO_TLS_IE_LD,
+ MO_TLS_IE_LDX,
+ MO_TLS_IE_ADD,
+ MO_TLS_LE_HIX22,
+ MO_TLS_LE_LOX10
};
} // end namespace SPII
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 5a52abe..baac36b 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -21,23 +21,26 @@ void SparcELFMCAsmInfo::anchor() { }
SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
IsLittleEndian = false;
Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::sparcv9) {
+ bool isV9 = (TheTriple.getArch() == Triple::sparcv9);
+
+ if (isV9) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
- Data64bitsDirective = 0; // .xword is only supported by V9.
+ // .xword is only supported by V9.
+ Data64bitsDirective = (isV9) ? "\t.xword\t" : 0;
ZeroDirective = "\t.skip\t";
CommentString = "!";
HasLEB128 = true;
SupportsDebugInformation = true;
-
+
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
- WeakRefDirective = "\t.weak\t";
-
PrivateGlobalPrefix = ".L";
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index 621e8ff..1e58e37 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -14,12 +14,12 @@
#ifndef SPARCTARGETASMINFO_H
#define SPARCTARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class StringRef;
- class SparcELFMCAsmInfo : public MCAsmInfo {
+ class SparcELFMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit SparcELFMCAsmInfo(StringRef TT);
diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile
index 4b81ada..c171db7 100644
--- a/lib/Target/Sparc/Makefile
+++ b/lib/Target/Sparc/Makefile
@@ -14,7 +14,8 @@ TARGET = Sparc
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = SparcGenRegisterInfo.inc SparcGenInstrInfo.inc \
SparcGenAsmWriter.inc SparcGenDAGISel.inc \
- SparcGenSubtargetInfo.inc SparcGenCallingConv.inc
+ SparcGenSubtargetInfo.inc SparcGenCallingConv.inc \
+ SparcGenCodeEmitter.inc
DIRS = TargetInfo MCTargetDesc
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index 98563db..f44b604 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -26,6 +26,8 @@ namespace llvm {
FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createSparcJITCodeEmitterPass(SparcTargetMachine &TM,
+ JITCodeEmitter &JCE);
} // end namespace llvm;
@@ -103,5 +105,22 @@ namespace llvm {
}
llvm_unreachable("Invalid cond code");
}
+
+ inline static unsigned HI22(int64_t imm) {
+ return (unsigned)((imm >> 10) & ((1 << 22)-1));
+ }
+
+ inline static unsigned LO10(int64_t imm) {
+ return (unsigned)(imm & 0x3FF);
+ }
+
+ inline static unsigned HIX22(int64_t imm) {
+ return HI22(~imm);
+ }
+
+ inline static unsigned LOX10(int64_t imm) {
+ return ~LO10(~imm);
+ }
+
} // end namespace llvm
#endif
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index d42c40f..0df48f6 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -30,6 +30,10 @@ def FeatureVIS
: SubtargetFeature<"vis", "IsVIS", "true",
"Enable UltraSPARC Visual Instruction Set extensions">;
+def FeatureHardQuad
+ : SubtargetFeature<"hard-quad-float", "HasHardQuad", "true",
+ "Enable quad-word floating point instructions">;
+
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 3fe2b44..d06c894 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -43,6 +44,7 @@ namespace {
const char *Modifier = 0);
void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+ virtual void EmitFunctionBodyStart();
virtual void EmitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream OS(Str);
@@ -63,11 +65,35 @@ namespace {
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
const;
+ void EmitGlobalRegisterDecl(unsigned reg) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "\t.register "
+ << "%" << StringRef(getRegisterName(reg)).lower()
+ << ", "
+ << ((reg == SP::G6 || reg == SP::G7)? "#ignore" : "#scratch");
+ OutStreamer.EmitRawText(OS.str());
+ }
+
};
} // end of anonymous namespace
#include "SparcGenAsmWriter.inc"
+void SparcAsmPrinter::EmitFunctionBodyStart() {
+ if (!TM.getSubtarget<SparcSubtarget>().is64Bit())
+ return;
+
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const unsigned globalRegs[] = { SP::G2, SP::G3, SP::G6, SP::G7, 0 };
+ for (unsigned i = 0; globalRegs[i] != 0; ++i) {
+ unsigned reg = globalRegs[i];
+ if (MRI.use_empty(reg))
+ continue;
+ EmitGlobalRegisterDecl(reg);
+ }
+}
+
void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand (opNum);
@@ -79,11 +105,37 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
assert(TF == SPII::MO_NO_FLAG &&
"Cannot handle target flags on call address");
else if (MI->getOpcode() == SP::SETHIi)
- assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH) &&
+ assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH
+ || TF == SPII::MO_TLS_GD_HI22
+ || TF == SPII::MO_TLS_LDM_HI22
+ || TF == SPII::MO_TLS_LDO_HIX22
+ || TF == SPII::MO_TLS_IE_HI22
+ || TF == SPII::MO_TLS_LE_HIX22) &&
"Invalid target flags for address operand on sethi");
+ else if (MI->getOpcode() == SP::TLS_CALL)
+ assert((TF == SPII::MO_NO_FLAG
+ || TF == SPII::MO_TLS_GD_CALL
+ || TF == SPII::MO_TLS_LDM_CALL) &&
+ "Cannot handle target flags on tls call address");
+ else if (MI->getOpcode() == SP::TLS_ADDrr)
+ assert((TF == SPII::MO_TLS_GD_ADD || TF == SPII::MO_TLS_LDM_ADD
+ || TF == SPII::MO_TLS_LDO_ADD || TF == SPII::MO_TLS_IE_ADD) &&
+ "Cannot handle target flags on add for TLS");
+ else if (MI->getOpcode() == SP::TLS_LDrr)
+ assert(TF == SPII::MO_TLS_IE_LD &&
+ "Cannot handle target flags on ld for TLS");
+ else if (MI->getOpcode() == SP::TLS_LDXrr)
+ assert(TF == SPII::MO_TLS_IE_LDX &&
+ "Cannot handle target flags on ldx for TLS");
+ else if (MI->getOpcode() == SP::XORri)
+ assert((TF == SPII::MO_TLS_LDO_LOX10 || TF == SPII::MO_TLS_LE_LOX10) &&
+ "Cannot handle target flags on xor for TLS");
else
- assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44 ||
- TF == SPII::MO_HM) &&
+ assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44
+ || TF == SPII::MO_HM
+ || TF == SPII::MO_TLS_GD_LO10
+ || TF == SPII::MO_TLS_LDM_LO10
+ || TF == SPII::MO_TLS_IE_LO10 ) &&
"Invalid target flags for small address operand");
}
#endif
@@ -102,6 +154,24 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
case SPII::MO_L44: O << "%l44("; break;
case SPII::MO_HH: O << "%hh("; break;
case SPII::MO_HM: O << "%hm("; break;
+ case SPII::MO_TLS_GD_HI22: O << "%tgd_hi22("; break;
+ case SPII::MO_TLS_GD_LO10: O << "%tgd_lo10("; break;
+ case SPII::MO_TLS_GD_ADD: O << "%tgd_add("; break;
+ case SPII::MO_TLS_GD_CALL: O << "%tgd_call("; break;
+ case SPII::MO_TLS_LDM_HI22: O << "%tldm_hi22("; break;
+ case SPII::MO_TLS_LDM_LO10: O << "%tldm_lo10("; break;
+ case SPII::MO_TLS_LDM_ADD: O << "%tldm_add("; break;
+ case SPII::MO_TLS_LDM_CALL: O << "%tldm_call("; break;
+ case SPII::MO_TLS_LDO_HIX22: O << "%tldo_hix22("; break;
+ case SPII::MO_TLS_LDO_LOX10: O << "%tldo_lox10("; break;
+ case SPII::MO_TLS_LDO_ADD: O << "%tldo_add("; break;
+ case SPII::MO_TLS_IE_HI22: O << "%tie_hi22("; break;
+ case SPII::MO_TLS_IE_LO10: O << "%tie_lo10("; break;
+ case SPII::MO_TLS_IE_LD: O << "%tie_ld("; break;
+ case SPII::MO_TLS_IE_LDX: O << "%tie_ldx("; break;
+ case SPII::MO_TLS_IE_ADD: O << "%tie_add("; break;
+ case SPII::MO_TLS_LE_HIX22: O << "%tle_hix22("; break;
+ case SPII::MO_TLS_LE_LOX10: O << "%tle_lox10("; break;
}
switch (MO.getType()) {
@@ -116,7 +186,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
O << *MO.getMBB()->getSymbol();
return;
case MachineOperand::MO_GlobalAddress:
- O << *Mang->getSymbol(MO.getGlobal());
+ O << *getSymbol(MO.getGlobal());
break;
case MachineOperand::MO_BlockAddress:
O << GetBlockAddressSymbol(MO.getBlockAddress())->getName();
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index a181bcf..acd4ec2 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -117,3 +117,14 @@ def CC_Sparc64 : CallingConv<[
// arguments whether they are passed in registers or not.
CCCustom<"CC_Sparc64_Full">
]>;
+
+// Callee-saved registers are handled by the register window mechanism.
+def CSR : CalleeSavedRegs<(add)> {
+ let OtherPreserved = (add (sequence "I%u", 0, 7),
+ (sequence "L%u", 0, 7));
+}
+
+// Callee-saved registers for calls with ReturnsTwice attribute.
+def RTCSR : CalleeSavedRegs<(add)> {
+ let OtherPreserved = (add I6, I7);
+}
diff --git a/lib/Target/Sparc/SparcCodeEmitter.cpp b/lib/Target/Sparc/SparcCodeEmitter.cpp
new file mode 100644
index 0000000..9bfe31f
--- /dev/null
+++ b/lib/Target/Sparc/SparcCodeEmitter.cpp
@@ -0,0 +1,245 @@
+//===-- Sparc/SparcCodeEmitter.cpp - Convert Sparc Code to Machine Code ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the Sparc machine instructions
+// into relocatable machine code.
+//
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "Sparc.h"
+#include "MCTargetDesc/SparcBaseInfo.h"
+#include "SparcRelocations.h"
+#include "SparcTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+class SparcCodeEmitter : public MachineFunctionPass {
+ SparcJITInfo *JTI;
+ const SparcInstrInfo *II;
+ const DataLayout *TD;
+ const SparcSubtarget *Subtarget;
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ const std::vector<MachineConstantPoolEntry> *MCPEs;
+ bool IsPIC;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo> ();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+public:
+ SparcCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), JTI(0), II(0), TD(0),
+ TM(tm), MCE(mce), MCPEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Sparc Machine Code Emitter";
+ }
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+ void emitInstruction(MachineBasicBlock::instr_iterator MI,
+ MachineBasicBlock &MBB);
+
+private:
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ void emitWord(unsigned Word);
+
+ unsigned getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
+};
+} // end anonymous namespace.
+
+char SparcCodeEmitter::ID = 0;
+
+bool SparcCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ SparcTargetMachine &Target = static_cast<SparcTargetMachine &>(
+ const_cast<TargetMachine &>(MF.getTarget()));
+
+ JTI = Target.getJITInfo();
+ II = Target.getInstrInfo();
+ TD = Target.getDataLayout();
+ Subtarget = &TM.getSubtarget<SparcSubtarget> ();
+ MCPEs = &MF.getConstantPool()->getConstants();
+ JTI->Initialize(MF, IsPIC);
+ MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ());
+
+ do {
+ DEBUG(errs() << "JITTing function '"
+ << MF.getName() << "'\n");
+ MCE.startFunction(MF);
+
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB){
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
+ E = MBB->instr_end(); I != E;)
+ emitInstruction(*I++, *MBB);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+void SparcCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI,
+ MachineBasicBlock &MBB) {
+ DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI);
+
+ MCE.processDebugLoc(MI->getDebugLoc(), true);
+
+ ++NumEmitted;
+
+ switch (MI->getOpcode()) {
+ default: {
+ emitWord(getBinaryCodeForInstr(*MI));
+ break;
+ }
+ case TargetOpcode::INLINEASM: {
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI->getOperand(0).getSymbolName()[0]) {
+ report_fatal_error("JIT does not support inline asm!");
+ }
+ break;
+ }
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL: {
+ MCE.emitLabel(MI->getOperand(0).getMCSymbol());
+ break;
+ }
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL: {
+ // Do nothing.
+ break;
+ }
+ case SP::GETPCX: {
+ report_fatal_error("JIT does not support pseudo instruction GETPCX yet!");
+ break;
+ }
+ }
+
+ MCE.processDebugLoc(MI->getDebugLoc(), false);
+}
+
+void SparcCodeEmitter::emitWord(unsigned Word) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Word) << "\n");
+ MCE.emitWordBE(Word);
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned SparcCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO));
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
+ else if (MO.isCPI())
+ emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
+ else
+ llvm_unreachable("Unable to encode MachineOperand!");
+ return 0;
+}
+unsigned SparcCodeEmitter::getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+
+ unsigned TF = MO.getTargetFlags();
+ switch (TF) {
+ default:
+ case SPII::MO_NO_FLAG: break;
+ case SPII::MO_LO: return SP::reloc_sparc_lo;
+ case SPII::MO_HI: return SP::reloc_sparc_hi;
+ case SPII::MO_H44:
+ case SPII::MO_M44:
+ case SPII::MO_L44:
+ case SPII::MO_HH:
+ case SPII::MO_HM: assert(0 && "FIXME: Implement Medium/Large code model.");
+ }
+
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ default: break;
+ case SP::CALL: return SP::reloc_sparc_pc30;
+ case SP::BA:
+ case SP::BCOND:
+ case SP::FBCOND: return SP::reloc_sparc_pc22;
+ case SP::BPXCC: return SP::reloc_sparc_pc19;
+ }
+ llvm_unreachable("unknown reloc!");
+}
+
+void SparcCodeEmitter::emitGlobalAddress(const GlobalValue *GV,
+ unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), 0,
+ true));
+}
+
+void SparcCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES, 0, 0));
+}
+
+void SparcCodeEmitter::
+emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, 0, false));
+}
+
+void SparcCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+ unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, BB));
+}
+
+
+/// createSparcJITCodeEmitterPass - Return a pass that emits the collected Sparc
+/// code to the specified MCE object.
+FunctionPass *llvm::createSparcJITCodeEmitterPass(SparcTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new SparcCodeEmitter(TM, JCE);
+}
+
+#include "SparcGenCodeEmitter.inc"
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 536e466..c75998a 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -33,6 +33,51 @@ DisableLeafProc("disable-sparc-leaf-proc",
cl::Hidden);
+void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int NumBytes,
+ unsigned ADDrr,
+ unsigned ADDri) const {
+
+ DebugLoc dl = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ if (NumBytes >= -4096 && NumBytes < 4096) {
+ BuildMI(MBB, MBBI, dl, TII.get(ADDri), SP::O6)
+ .addReg(SP::O6).addImm(NumBytes);
+ return;
+ }
+
+ // Emit this the hard way. This clobbers G1 which we always know is
+ // available here.
+ if (NumBytes >= 0) {
+ // Emit nonnegative numbers with sethi + or.
+ // sethi %hi(NumBytes), %g1
+ // or %g1, %lo(NumBytes), %g1
+ // add %sp, %g1, %sp
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1)
+ .addImm(HI22(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+ .addReg(SP::G1).addImm(LO10(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(ADDrr), SP::O6)
+ .addReg(SP::O6).addReg(SP::G1);
+ return ;
+ }
+
+ // Emit negative numbers with sethi + xor.
+ // sethi %hix(NumBytes), %g1
+ // xor %g1, %lox(NumBytes), %g1
+ // add %sp, %g1, %sp
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1)
+ .addImm(HIX22(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(SP::XORri), SP::G1)
+ .addReg(SP::G1).addImm(LOX10(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(ADDrr), SP::O6)
+ .addReg(SP::O6).addReg(SP::G1);
+}
+
void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
@@ -55,21 +100,27 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
SAVErr = SP::ADDrr;
}
NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes);
-
- if (NumBytes >= -4096) {
- BuildMI(MBB, MBBI, dl, TII.get(SAVEri), SP::O6)
- .addReg(SP::O6).addImm(NumBytes);
- } else {
- // Emit this the hard way. This clobbers G1 which we always know is
- // available here.
- unsigned OffHi = (unsigned)NumBytes >> 10U;
- BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
- // Emit G1 = G1 + I6
- BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
- .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
- BuildMI(MBB, MBBI, dl, TII.get(SAVErr), SP::O6)
- .addReg(SP::O6).addReg(SP::G1);
- }
+ emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri);
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(SP::PROLOG_LABEL)).addSym(FrameLabel);
+
+ unsigned regFP = MRI->getDwarfRegNum(SP::I6, true);
+
+ // Emit ".cfi_def_cfa_register 30".
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(FrameLabel,
+ regFP));
+ // Emit ".cfi_window_save".
+ MMI.addFrameInst(MCCFIInstruction::createWindowSave(FrameLabel));
+
+ unsigned regInRA = MRI->getDwarfRegNum(SP::I7, true);
+ unsigned regOutRA = MRI->getDwarfRegNum(SP::O7, true);
+ // Emit ".cfi_register 15, 31".
+ MMI.addFrameInst(MCCFIInstruction::createRegister(FrameLabel,
+ regOutRA,
+ regInRA));
}
void SparcFrameLowering::
@@ -77,15 +128,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
if (!hasReservedCallFrame(MF)) {
MachineInstr &MI = *I;
- DebugLoc DL = MI.getDebugLoc();
int Size = MI.getOperand(0).getImm();
if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
Size = -Size;
- const SparcInstrInfo &TII =
- *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+
if (Size)
- BuildMI(MBB, I, DL, TII.get(SP::ADDri), SP::O6).addReg(SP::O6)
- .addImm(Size);
+ emitSPAdjustment(MF, MBB, I, Size, SP::ADDrr, SP::ADDri);
}
MBB.erase(I);
}
@@ -112,21 +160,7 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
return;
NumBytes = SubTarget.getAdjustedFrameSize(NumBytes);
-
- if (NumBytes < 4096) {
- BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6)
- .addReg(SP::O6).addImm(NumBytes);
- } else {
- // Emit this the hard way. This clobbers G1 which we always know is
- // available here.
- unsigned OffHi = (unsigned)NumBytes >> 10U;
- BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
- // Emit G1 = G1 + I6
- BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
- .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
- BuildMI(MBB, MBBI, dl, TII.get(SP::ADDrr), SP::O6)
- .addReg(SP::O6).addReg(SP::G1);
- }
+ emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
}
bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 8eaef59..072fde3 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -49,6 +49,14 @@ private:
// Returns true if MF is a leaf procedure.
bool isLeafProc(MachineFunction &MF) const;
+
+
+ // Emits code for adjusting SP in function prologue/epilogue.
+ void emitSPAdjustment(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int NumBytes, unsigned ADDrr, unsigned ADDri) const;
+
};
} // End llvm namespace
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index db62151..b012bfd 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -80,7 +80,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
return true;
}
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
return false; // direct calls.
if (Addr.getOpcode() == ISD::ADD) {
@@ -117,7 +118,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
if (Addr.getOpcode() == ISD::FrameIndex) return false;
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
return false; // direct calls.
if (Addr.getOpcode() == ISD::ADD) {
@@ -139,8 +141,10 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
- if (N->isMachineOpcode())
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
return NULL; // Already selected.
+ }
switch (N->getOpcode()) {
default: break;
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 4b0fa67..64625f7 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -14,6 +14,7 @@
#include "SparcISelLowering.h"
#include "SparcMachineFunctionInfo.h"
+#include "SparcRegisterInfo.h"
#include "SparcTargetMachine.h"
#include "MCTargetDesc/SparcBaseInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -648,6 +649,27 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
return LowerCall_32(CLI, InVals);
}
+static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee,
+ ImmutableCallSite *CS) {
+ if (CS)
+ return CS->hasFnAttr(Attribute::ReturnsTwice);
+
+ const Function *CalleeFn = 0;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ CalleeFn = dyn_cast<Function>(G->getGlobal());
+ } else if (ExternalSymbolSDNode *E =
+ dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const Function *Fn = DAG.getMachineFunction().getFunction();
+ const Module *M = Fn->getParent();
+ const char *CalleeName = E->getSymbol();
+ CalleeFn = M->getFunction(CalleeName);
+ }
+
+ if (!CalleeFn)
+ return false;
+ return CalleeFn->hasFnAttribute(Attribute::ReturnsTwice);
+}
+
// Lower a call for the 32-bit ABI.
SDValue
SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
@@ -861,6 +883,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
}
unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;
+ bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CS);
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
@@ -880,6 +903,16 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first),
RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const SparcRegisterInfo *TRI =
+ ((const SparcTargetMachine&)getTargetMachine()).getRegisterInfo();
+ const uint32_t *Mask = ((hasReturnsTwice)
+ ? TRI->getRTCallPreservedMask(CallConv)
+ : TRI->getCallPreservedMask(CallConv));
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -908,6 +941,23 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
return Chain;
}
+// This functions returns true if CalleeName is a ABI function that returns
+// a long double (fp128).
+static bool isFP128ABICall(const char *CalleeName)
+{
+ static const char *const ABICalls[] =
+ { "_Q_add", "_Q_sub", "_Q_mul", "_Q_div",
+ "_Q_sqrt", "_Q_neg",
+ "_Q_itoq", "_Q_stoq", "_Q_dtoq", "_Q_utoq",
+ "_Q_lltoq", "_Q_ulltoq",
+ 0
+ };
+ for (const char * const *I = ABICalls; *I != 0; ++I)
+ if (strcmp(CalleeName, *I) == 0)
+ return true;
+ return false;
+}
+
unsigned
SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
{
@@ -918,7 +968,10 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
dyn_cast<ExternalSymbolSDNode>(Callee)) {
const Function *Fn = DAG.getMachineFunction().getFunction();
const Module *M = Fn->getParent();
- CalleeFn = M->getFunction(E->getSymbol());
+ const char *CalleeName = E->getSymbol();
+ CalleeFn = M->getFunction(CalleeName);
+ if (!CalleeFn && isFP128ABICall(CalleeName))
+ return 16; // Return sizeof(fp128)
}
if (!CalleeFn)
@@ -983,6 +1036,9 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
SDLoc DL = CLI.DL;
SDValue Chain = CLI.Chain;
+ // Sparc target does not yet support tail call optimization.
+ CLI.IsTailCall = false;
+
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
@@ -1099,6 +1155,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
SDValue Callee = CLI.Callee;
+ bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CS);
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy());
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
@@ -1112,6 +1169,15 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
+ // Add a register mask operand representing the call-preserved registers.
+ const SparcRegisterInfo *TRI =
+ ((const SparcTargetMachine&)getTargetMachine()).getRegisterInfo();
+ const uint32_t *Mask = ((hasReturnsTwice)
+ ? TRI->getRTCallPreservedMask(CLI.CallConv)
+ : TRI->getCallPreservedMask(CLI.CallConv));
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
// Make sure the CopyToReg nodes are glued to the call instruction which
// consumes the registers.
if (InGlue.getNode())
@@ -1244,15 +1310,21 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
+ addRegisterClass(MVT::f128, &SP::QFPRegsRegClass);
if (Subtarget->is64Bit())
addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
// Turn FP extload into load/fextend
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+
// Sparc doesn't have i1 sign extending load
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
// Turn FP truncstore into trunc + store.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
// Custom legalize GlobalAddress nodes into LO/HI parts.
setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom);
@@ -1271,13 +1343,25 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ // ... nor does SparcV9.
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ }
+
// Custom expand fp<->sint
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
- // Expand fp<->uint
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ // Custom Expand fp<->uint
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
@@ -1286,9 +1370,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f128, Expand);
+
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::SETCC, MVT::f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::f128, Expand);
// Sparc doesn't have BRCOND either, it has BR_CC.
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
@@ -1297,18 +1384,34 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f128, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::ADDC, MVT::i64, Custom);
+ setOperationAction(ISD::ADDE, MVT::i64, Custom);
+ setOperationAction(ISD::SUBC, MVT::i64, Custom);
+ setOperationAction(ISD::SUBE, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::f64, Expand);
setOperationAction(ISD::BITCAST, MVT::i64, Expand);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
setOperationAction(ISD::SETCC, MVT::i64, Expand);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+
+ setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+ setOperationAction(ISD::CTTZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ROTL , MVT::i64, Expand);
+ setOperationAction(ISD::ROTR , MVT::i64, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
}
// FIXME: There are instructions available for ATOMIC_FENCE
@@ -1321,6 +1424,11 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FABS, MVT::f64, Custom);
}
+ setOperationAction(ISD::FSIN , MVT::f128, Expand);
+ setOperationAction(ISD::FCOS , MVT::f128, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FREM , MVT::f128, Expand);
+ setOperationAction(ISD::FMA , MVT::f128, Expand);
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
@@ -1339,8 +1447,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f128, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
@@ -1352,7 +1462,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ }
// VASTART needs to be custom lowered to use the VarArgsFrameIndex.
setOperationAction(ISD::VASTART , MVT::Other, Custom);
@@ -1366,14 +1481,100 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
- // No debug info support yet.
- setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ setExceptionPointerRegister(SP::I0);
+ setExceptionSelectorRegister(SP::I1);
setStackPointerRegisterToSaveRestore(SP::O6);
if (Subtarget->isV9())
setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+ if (Subtarget->isV9() && Subtarget->hasHardQuad()) {
+ setOperationAction(ISD::LOAD, MVT::f128, Legal);
+ setOperationAction(ISD::STORE, MVT::f128, Legal);
+ } else {
+ setOperationAction(ISD::LOAD, MVT::f128, Custom);
+ setOperationAction(ISD::STORE, MVT::f128, Custom);
+ }
+
+ if (Subtarget->hasHardQuad()) {
+ setOperationAction(ISD::FADD, MVT::f128, Legal);
+ setOperationAction(ISD::FSUB, MVT::f128, Legal);
+ setOperationAction(ISD::FMUL, MVT::f128, Legal);
+ setOperationAction(ISD::FDIV, MVT::f128, Legal);
+ setOperationAction(ISD::FSQRT, MVT::f128, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
+ if (Subtarget->isV9()) {
+ setOperationAction(ISD::FNEG, MVT::f128, Legal);
+ setOperationAction(ISD::FABS, MVT::f128, Legal);
+ } else {
+ setOperationAction(ISD::FNEG, MVT::f128, Custom);
+ setOperationAction(ISD::FABS, MVT::f128, Custom);
+ }
+
+ if (!Subtarget->is64Bit()) {
+ setLibcallName(RTLIB::FPTOSINT_F128_I64, "_Q_qtoll");
+ setLibcallName(RTLIB::FPTOUINT_F128_I64, "_Q_qtoull");
+ setLibcallName(RTLIB::SINTTOFP_I64_F128, "_Q_lltoq");
+ setLibcallName(RTLIB::UINTTOFP_I64_F128, "_Q_ulltoq");
+ }
+
+ } else {
+ // Custom legalize f128 operations.
+
+ setOperationAction(ISD::FADD, MVT::f128, Custom);
+ setOperationAction(ISD::FSUB, MVT::f128, Custom);
+ setOperationAction(ISD::FMUL, MVT::f128, Custom);
+ setOperationAction(ISD::FDIV, MVT::f128, Custom);
+ setOperationAction(ISD::FSQRT, MVT::f128, Custom);
+ setOperationAction(ISD::FNEG, MVT::f128, Custom);
+ setOperationAction(ISD::FABS, MVT::f128, Custom);
+
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+
+ // Setup Runtime library names.
+ if (Subtarget->is64Bit()) {
+ setLibcallName(RTLIB::ADD_F128, "_Qp_add");
+ setLibcallName(RTLIB::SUB_F128, "_Qp_sub");
+ setLibcallName(RTLIB::MUL_F128, "_Qp_mul");
+ setLibcallName(RTLIB::DIV_F128, "_Qp_div");
+ setLibcallName(RTLIB::SQRT_F128, "_Qp_sqrt");
+ setLibcallName(RTLIB::FPTOSINT_F128_I32, "_Qp_qtoi");
+ setLibcallName(RTLIB::FPTOUINT_F128_I32, "_Qp_qtoui");
+ setLibcallName(RTLIB::SINTTOFP_I32_F128, "_Qp_itoq");
+ setLibcallName(RTLIB::UINTTOFP_I32_F128, "_Qp_uitoq");
+ setLibcallName(RTLIB::FPTOSINT_F128_I64, "_Qp_qtox");
+ setLibcallName(RTLIB::FPTOUINT_F128_I64, "_Qp_qtoux");
+ setLibcallName(RTLIB::SINTTOFP_I64_F128, "_Qp_xtoq");
+ setLibcallName(RTLIB::UINTTOFP_I64_F128, "_Qp_uxtoq");
+ setLibcallName(RTLIB::FPEXT_F32_F128, "_Qp_stoq");
+ setLibcallName(RTLIB::FPEXT_F64_F128, "_Qp_dtoq");
+ setLibcallName(RTLIB::FPROUND_F128_F32, "_Qp_qtos");
+ setLibcallName(RTLIB::FPROUND_F128_F64, "_Qp_qtod");
+ } else {
+ setLibcallName(RTLIB::ADD_F128, "_Q_add");
+ setLibcallName(RTLIB::SUB_F128, "_Q_sub");
+ setLibcallName(RTLIB::MUL_F128, "_Q_mul");
+ setLibcallName(RTLIB::DIV_F128, "_Q_div");
+ setLibcallName(RTLIB::SQRT_F128, "_Q_sqrt");
+ setLibcallName(RTLIB::FPTOSINT_F128_I32, "_Q_qtoi");
+ setLibcallName(RTLIB::FPTOUINT_F128_I32, "_Q_qtou");
+ setLibcallName(RTLIB::SINTTOFP_I32_F128, "_Q_itoq");
+ setLibcallName(RTLIB::UINTTOFP_I32_F128, "_Q_utoq");
+ setLibcallName(RTLIB::FPTOSINT_F128_I64, "_Q_qtoll");
+ setLibcallName(RTLIB::FPTOUINT_F128_I64, "_Q_qtoull");
+ setLibcallName(RTLIB::SINTTOFP_I64_F128, "_Q_lltoq");
+ setLibcallName(RTLIB::UINTTOFP_I64_F128, "_Q_ulltoq");
+ setLibcallName(RTLIB::FPEXT_F32_F128, "_Q_stoq");
+ setLibcallName(RTLIB::FPEXT_F64_F128, "_Q_dtoq");
+ setLibcallName(RTLIB::FPROUND_F128_F32, "_Q_qtos");
+ setLibcallName(RTLIB::FPROUND_F128_F64, "_Q_qtod");
+ }
+ }
+
setMinFunctionAlignment(2);
computeRegisterProperties();
@@ -1394,13 +1595,24 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
case SPISD::Lo: return "SPISD::Lo";
case SPISD::FTOI: return "SPISD::FTOI";
case SPISD::ITOF: return "SPISD::ITOF";
+ case SPISD::FTOX: return "SPISD::FTOX";
+ case SPISD::XTOF: return "SPISD::XTOF";
case SPISD::CALL: return "SPISD::CALL";
case SPISD::RET_FLAG: return "SPISD::RET_FLAG";
case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
case SPISD::FLUSHW: return "SPISD::FLUSHW";
+ case SPISD::TLS_ADD: return "SPISD::TLS_ADD";
+ case SPISD::TLS_LD: return "SPISD::TLS_LD";
+ case SPISD::TLS_CALL: return "SPISD::TLS_CALL";
}
}
+EVT SparcTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
/// be zero. Op is expected to be a target specific node. Used by DAG
/// combiner.
@@ -1505,6 +1717,10 @@ SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue HiLo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, VT);
SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo);
+ // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
+ // function has calls.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setHasCalls(true);
return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
MachinePointerInfo::getGOT(), false, false, false, 0);
}
@@ -1513,6 +1729,7 @@ SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
switch(getTargetMachine().getCodeModel()) {
default:
llvm_unreachable("Unsupported absolute code model");
+ case CodeModel::JITDefault:
case CodeModel::Small:
// abs32.
return makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
@@ -1549,23 +1766,430 @@ SDValue SparcTargetLowering::LowerBlockAddress(SDValue Op,
return makeAddress(Op, DAG);
}
-static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ SDLoc DL(GA);
+ const GlobalValue *GV = GA->getGlobal();
+ EVT PtrVT = getPointerTy();
+
+ TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+
+ if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) {
+ unsigned HiTF = ((model == TLSModel::GeneralDynamic)? SPII::MO_TLS_GD_HI22
+ : SPII::MO_TLS_LDM_HI22);
+ unsigned LoTF = ((model == TLSModel::GeneralDynamic)? SPII::MO_TLS_GD_LO10
+ : SPII::MO_TLS_LDM_LO10);
+ unsigned addTF = ((model == TLSModel::GeneralDynamic)? SPII::MO_TLS_GD_ADD
+ : SPII::MO_TLS_LDM_ADD);
+ unsigned callTF = ((model == TLSModel::GeneralDynamic)? SPII::MO_TLS_GD_CALL
+ : SPII::MO_TLS_LDM_CALL);
+
+ SDValue HiLo = makeHiLoPair(Op, HiTF, LoTF, DAG);
+ SDValue Base = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, PtrVT);
+ SDValue Argument = DAG.getNode(SPISD::TLS_ADD, DL, PtrVT, Base, HiLo,
+ withTargetFlags(Op, addTF, DAG));
+
+ SDValue Chain = DAG.getEntryNode();
+ SDValue InFlag;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(1, true), DL);
+ Chain = DAG.getCopyToReg(Chain, DL, SP::O0, Argument, InFlag);
+ InFlag = Chain.getValue(1);
+ SDValue Callee = DAG.getTargetExternalSymbol("__tls_get_addr", PtrVT);
+ SDValue Symbol = withTargetFlags(Op, callTF, DAG);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ Ops.push_back(Symbol);
+ Ops.push_back(DAG.getRegister(SP::O0, PtrVT));
+ const uint32_t *Mask = getTargetMachine()
+ .getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(1, true),
+ DAG.getIntPtrConstant(0, true), InFlag, DL);
+ InFlag = Chain.getValue(1);
+ SDValue Ret = DAG.getCopyFromReg(Chain, DL, SP::O0, PtrVT, InFlag);
+
+ if (model != TLSModel::LocalDynamic)
+ return Ret;
+
+ SDValue Hi = DAG.getNode(SPISD::Hi, DL, PtrVT,
+ withTargetFlags(Op, SPII::MO_TLS_LDO_HIX22, DAG));
+ SDValue Lo = DAG.getNode(SPISD::Lo, DL, PtrVT,
+ withTargetFlags(Op, SPII::MO_TLS_LDO_LOX10, DAG));
+ HiLo = DAG.getNode(ISD::XOR, DL, PtrVT, Hi, Lo);
+ return DAG.getNode(SPISD::TLS_ADD, DL, PtrVT, Ret, HiLo,
+ withTargetFlags(Op, SPII::MO_TLS_LDO_ADD, DAG));
+ }
+
+ if (model == TLSModel::InitialExec) {
+ unsigned ldTF = ((PtrVT == MVT::i64)? SPII::MO_TLS_IE_LDX
+ : SPII::MO_TLS_IE_LD);
+
+ SDValue Base = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, PtrVT);
+
+ // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
+ // function has calls.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setHasCalls(true);
+
+ SDValue TGA = makeHiLoPair(Op,
+ SPII::MO_TLS_IE_HI22, SPII::MO_TLS_IE_LO10, DAG);
+ SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base, TGA);
+ SDValue Offset = DAG.getNode(SPISD::TLS_LD,
+ DL, PtrVT, Ptr,
+ withTargetFlags(Op, ldTF, DAG));
+ return DAG.getNode(SPISD::TLS_ADD, DL, PtrVT,
+ DAG.getRegister(SP::G7, PtrVT), Offset,
+ withTargetFlags(Op, SPII::MO_TLS_IE_ADD, DAG));
+ }
+
+ assert(model == TLSModel::LocalExec);
+ SDValue Hi = DAG.getNode(SPISD::Hi, DL, PtrVT,
+ withTargetFlags(Op, SPII::MO_TLS_LE_HIX22, DAG));
+ SDValue Lo = DAG.getNode(SPISD::Lo, DL, PtrVT,
+ withTargetFlags(Op, SPII::MO_TLS_LE_LOX10, DAG));
+ SDValue Offset = DAG.getNode(ISD::XOR, DL, PtrVT, Hi, Lo);
+
+ return DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getRegister(SP::G7, PtrVT), Offset);
+}
+
+SDValue
+SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain, ArgListTy &Args,
+ SDValue Arg, SDLoc DL,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ ArgListEntry Entry;
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+
+ if (ArgTy->isFP128Ty()) {
+ // Create a stack object and pass the pointer to the library function.
+ int FI = MFI->CreateStackObject(16, 8, false);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ Chain = DAG.getStore(Chain,
+ DL,
+ Entry.Node,
+ FIPtr,
+ MachinePointerInfo(),
+ false,
+ false,
+ 8);
+
+ Entry.Node = FIPtr;
+ Entry.Ty = PointerType::getUnqual(ArgTy);
+ }
+ Args.push_back(Entry);
+ return Chain;
+}
+
+SDValue
+SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
+ const char *LibFuncName,
+ unsigned numArgs) const {
+
+ ArgListTy Args;
+
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+
+ SDValue Callee = DAG.getExternalSymbol(LibFuncName, getPointerTy());
+ Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
+ Type *RetTyABI = RetTy;
+ SDValue Chain = DAG.getEntryNode();
+ SDValue RetPtr;
+
+ if (RetTy->isFP128Ty()) {
+ // Create a Stack Object to receive the return value of type f128.
+ ArgListEntry Entry;
+ int RetFI = MFI->CreateStackObject(16, 8, false);
+ RetPtr = DAG.getFrameIndex(RetFI, getPointerTy());
+ Entry.Node = RetPtr;
+ Entry.Ty = PointerType::getUnqual(RetTy);
+ if (!Subtarget->is64Bit())
+ Entry.isSRet = true;
+ Entry.isReturned = false;
+ Args.push_back(Entry);
+ RetTyABI = Type::getVoidTy(*DAG.getContext());
+ }
+
+ assert(Op->getNumOperands() >= numArgs && "Not enough operands!");
+ for (unsigned i = 0, e = numArgs; i != e; ++i) {
+ Chain = LowerF128_LibCallArg(Chain, Args, Op.getOperand(i), SDLoc(Op), DAG);
+ }
+ TargetLowering::
+ CallLoweringInfo CLI(Chain,
+ RetTyABI,
+ false, false, false, false,
+ 0, CallingConv::C,
+ false, false, true,
+ Callee, Args, DAG, SDLoc(Op));
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+ // chain is in second result.
+ if (RetTyABI == RetTy)
+ return CallInfo.first;
+
+ assert (RetTy->isFP128Ty() && "Unexpected return type!");
+
+ Chain = CallInfo.second;
+
+ // Load RetPtr to get the return value.
+ return DAG.getLoad(Op.getValueType(),
+ SDLoc(Op),
+ Chain,
+ RetPtr,
+ MachinePointerInfo(),
+ false, false, false, 8);
+}
+
+SDValue
+SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
+ unsigned &SPCC,
+ SDLoc DL,
+ SelectionDAG &DAG) const {
+
+ const char *LibCall = 0;
+ bool is64Bit = Subtarget->is64Bit();
+ switch(SPCC) {
+ default: llvm_unreachable("Unhandled conditional code!");
+ case SPCC::FCC_E : LibCall = is64Bit? "_Qp_feq" : "_Q_feq"; break;
+ case SPCC::FCC_NE : LibCall = is64Bit? "_Qp_fne" : "_Q_fne"; break;
+ case SPCC::FCC_L : LibCall = is64Bit? "_Qp_flt" : "_Q_flt"; break;
+ case SPCC::FCC_G : LibCall = is64Bit? "_Qp_fgt" : "_Q_fgt"; break;
+ case SPCC::FCC_LE : LibCall = is64Bit? "_Qp_fle" : "_Q_fle"; break;
+ case SPCC::FCC_GE : LibCall = is64Bit? "_Qp_fge" : "_Q_fge"; break;
+ case SPCC::FCC_UL :
+ case SPCC::FCC_ULE:
+ case SPCC::FCC_UG :
+ case SPCC::FCC_UGE:
+ case SPCC::FCC_U :
+ case SPCC::FCC_O :
+ case SPCC::FCC_LG :
+ case SPCC::FCC_UE : LibCall = is64Bit? "_Qp_cmp" : "_Q_cmp"; break;
+ }
+
+ SDValue Callee = DAG.getExternalSymbol(LibCall, getPointerTy());
+ Type *RetTy = Type::getInt32Ty(*DAG.getContext());
+ ArgListTy Args;
+ SDValue Chain = DAG.getEntryNode();
+ Chain = LowerF128_LibCallArg(Chain, Args, LHS, DL, DAG);
+ Chain = LowerF128_LibCallArg(Chain, Args, RHS, DL, DAG);
+
+ TargetLowering::
+ CallLoweringInfo CLI(Chain,
+ RetTy,
+ false, false, false, false,
+ 0, CallingConv::C,
+ false, false, true,
+ Callee, Args, DAG, DL);
+
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+ // result is in first, and chain is in second result.
+ SDValue Result = CallInfo.first;
+
+ switch(SPCC) {
+ default: {
+ SDValue RHS = DAG.getTargetConstant(0, Result.getValueType());
+ SPCC = SPCC::ICC_NE;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_UL : {
+ SDValue Mask = DAG.getTargetConstant(1, Result.getValueType());
+ Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
+ SDValue RHS = DAG.getTargetConstant(0, Result.getValueType());
+ SPCC = SPCC::ICC_NE;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_ULE: {
+ SDValue RHS = DAG.getTargetConstant(2, Result.getValueType());
+ SPCC = SPCC::ICC_NE;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_UG : {
+ SDValue RHS = DAG.getTargetConstant(1, Result.getValueType());
+ SPCC = SPCC::ICC_G;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_UGE: {
+ SDValue RHS = DAG.getTargetConstant(1, Result.getValueType());
+ SPCC = SPCC::ICC_NE;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+
+ case SPCC::FCC_U : {
+ SDValue RHS = DAG.getTargetConstant(3, Result.getValueType());
+ SPCC = SPCC::ICC_E;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_O : {
+ SDValue RHS = DAG.getTargetConstant(3, Result.getValueType());
+ SPCC = SPCC::ICC_NE;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_LG : {
+ SDValue Mask = DAG.getTargetConstant(3, Result.getValueType());
+ Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
+ SDValue RHS = DAG.getTargetConstant(0, Result.getValueType());
+ SPCC = SPCC::ICC_NE;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_UE : {
+ SDValue Mask = DAG.getTargetConstant(3, Result.getValueType());
+ Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
+ SDValue RHS = DAG.getTargetConstant(0, Result.getValueType());
+ SPCC = SPCC::ICC_E;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ }
+}
+
+static SDValue
+LowerF128_FPEXTEND(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI) {
+
+ if (Op.getOperand(0).getValueType() == MVT::f64)
+ return TLI.LowerF128Op(Op, DAG,
+ TLI.getLibcallName(RTLIB::FPEXT_F64_F128), 1);
+
+ if (Op.getOperand(0).getValueType() == MVT::f32)
+ return TLI.LowerF128Op(Op, DAG,
+ TLI.getLibcallName(RTLIB::FPEXT_F32_F128), 1);
+
+ llvm_unreachable("fpextend with non-float operand!");
+ return SDValue(0, 0);
+}
+
+static SDValue
+LowerF128_FPROUND(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI) {
+ // FP_ROUND on f64 and f32 are legal.
+ if (Op.getOperand(0).getValueType() != MVT::f128)
+ return Op;
+
+ if (Op.getValueType() == MVT::f64)
+ return TLI.LowerF128Op(Op, DAG,
+ TLI.getLibcallName(RTLIB::FPROUND_F128_F64), 1);
+ if (Op.getValueType() == MVT::f32)
+ return TLI.LowerF128Op(Op, DAG,
+ TLI.getLibcallName(RTLIB::FPROUND_F128_F32), 1);
+
+ llvm_unreachable("fpround to non-float!");
+ return SDValue(0, 0);
+}
+
+static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool hasHardQuad) {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ assert(VT == MVT::i32 || VT == MVT::i64);
+
+ // Expand f128 operations to fp128 abi calls.
+ if (Op.getOperand(0).getValueType() == MVT::f128
+ && (!hasHardQuad || !TLI.isTypeLegal(VT))) {
+ const char *libName = TLI.getLibcallName(VT == MVT::i32
+ ? RTLIB::FPTOSINT_F128_I32
+ : RTLIB::FPTOSINT_F128_I64);
+ return TLI.LowerF128Op(Op, DAG, libName, 1);
+ }
+
+ // Expand if the resulting type is illegal.
+ if (!TLI.isTypeLegal(VT))
+ return SDValue(0, 0);
+
+ // Otherwise, Convert the fp value to integer in an FP register.
+ if (VT == MVT::i32)
+ Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
+ else
+ Op = DAG.getNode(SPISD::FTOX, dl, MVT::f64, Op.getOperand(0));
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+}
+
+static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool hasHardQuad) {
+ SDLoc dl(Op);
+ EVT OpVT = Op.getOperand(0).getValueType();
+ assert(OpVT == MVT::i32 || (OpVT == MVT::i64));
+
+ EVT floatVT = (OpVT == MVT::i32) ? MVT::f32 : MVT::f64;
+
+ // Expand f128 operations to fp128 ABI calls.
+ if (Op.getValueType() == MVT::f128
+ && (!hasHardQuad || !TLI.isTypeLegal(OpVT))) {
+ const char *libName = TLI.getLibcallName(OpVT == MVT::i32
+ ? RTLIB::SINTTOFP_I32_F128
+ : RTLIB::SINTTOFP_I64_F128);
+ return TLI.LowerF128Op(Op, DAG, libName, 1);
+ }
+
+ // Expand if the operand type is illegal.
+ if (!TLI.isTypeLegal(OpVT))
+ return SDValue(0, 0);
+
+ // Otherwise, Convert the int value to FP in an FP register.
+ SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, floatVT, Op.getOperand(0));
+ unsigned opcode = (OpVT == MVT::i32)? SPISD::ITOF : SPISD::XTOF;
+ return DAG.getNode(opcode, dl, Op.getValueType(), Tmp);
+}
+
+static SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool hasHardQuad) {
SDLoc dl(Op);
- // Convert the fp value to integer in an FP register.
- assert(Op.getValueType() == MVT::i32);
- Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
- return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+ EVT VT = Op.getValueType();
+
+ // Expand if it does not involve f128 or the target has support for
+ // quad floating point instructions and the resulting type is legal.
+ if (Op.getOperand(0).getValueType() != MVT::f128 ||
+ (hasHardQuad && TLI.isTypeLegal(VT)))
+ return SDValue(0, 0);
+
+ assert(VT == MVT::i32 || VT == MVT::i64);
+
+ return TLI.LowerF128Op(Op, DAG,
+ TLI.getLibcallName(VT == MVT::i32
+ ? RTLIB::FPTOUINT_F128_I32
+ : RTLIB::FPTOUINT_F128_I64),
+ 1);
}
-static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool hasHardQuad) {
SDLoc dl(Op);
- assert(Op.getOperand(0).getValueType() == MVT::i32);
- SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
- // Convert the int value to FP in an FP register.
- return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp);
+ EVT OpVT = Op.getOperand(0).getValueType();
+ assert(OpVT == MVT::i32 || OpVT == MVT::i64);
+
+ // Expand if it does not involve f128 or the target has support for
+ // quad floating point instructions and the operand type is legal.
+ if (Op.getValueType() != MVT::f128 || (hasHardQuad && TLI.isTypeLegal(OpVT)))
+ return SDValue(0, 0);
+
+ return TLI.LowerF128Op(Op, DAG,
+ TLI.getLibcallName(OpVT == MVT::i32
+ ? RTLIB::UINTTOFP_I32_F128
+ : RTLIB::UINTTOFP_I64_F128),
+ 1);
}
-static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool hasHardQuad) {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue LHS = Op.getOperand(2);
@@ -1586,15 +2210,23 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
// 32-bit compares use the icc flags, 64-bit uses the xcc flags.
Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC;
} else {
- CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
- if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
- Opc = SPISD::BRFCC;
+ if (!hasHardQuad && LHS.getValueType() == MVT::f128) {
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ CompareFlag = TLI.LowerF128Compare(LHS, RHS, SPCC, dl, DAG);
+ Opc = SPISD::BRICC;
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ Opc = SPISD::BRFCC;
+ }
}
return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest,
DAG.getConstant(SPCC, MVT::i32), CompareFlag);
}
-static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool hasHardQuad) {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
@@ -1614,9 +2246,15 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
SPISD::SELECT_ICC : SPISD::SELECT_XCC;
if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
} else {
- CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
- Opc = SPISD::SELECT_FCC;
- if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ if (!hasHardQuad && LHS.getValueType() == MVT::f128) {
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ CompareFlag = TLI.LowerF128Compare(LHS, RHS, SPCC, dl, DAG);
+ Opc = SPISD::SELECT_ICC;
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+ Opc = SPISD::SELECT_FCC;
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ }
}
return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
DAG.getConstant(SPCC, MVT::i32), CompareFlag);
@@ -1665,20 +2303,25 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
std::min(PtrVT.getSizeInBits(), VT.getSizeInBits())/8);
}
-static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
+ const SparcSubtarget *Subtarget) {
SDValue Chain = Op.getOperand(0); // Legalize the chain.
SDValue Size = Op.getOperand(1); // Legalize the size.
+ EVT VT = Size->getValueType(0);
SDLoc dl(Op);
unsigned SPReg = SP::O6;
- SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
- SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain
// The resultant pointer is actually 16 words from the bottom of the stack,
// to provide a register spill area.
- SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
- DAG.getConstant(96, MVT::i32));
+ unsigned regSpillArea = Subtarget->is64Bit() ? 128 : 96;
+ regSpillArea += Subtarget->getStackPointerBias();
+
+ SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP,
+ DAG.getConstant(regSpillArea, VT));
SDValue Ops[2] = { NewVal, Chain };
return DAG.getMergeValues(Ops, 2, dl);
}
@@ -1759,12 +2402,12 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
return RetAddr;
}
-static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG)
+static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG, unsigned opcode)
{
SDLoc dl(Op);
assert(Op.getValueType() == MVT::f64 && "LowerF64Op called on non-double!");
- assert(Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS);
+ assert(opcode == ISD::FNEG || opcode == ISD::FABS);
// Lower fneg/fabs on f64 to fneg/fabs on f32.
// fneg f64 => fneg f32:sub_even, fmov f32:sub_odd.
@@ -1776,7 +2419,7 @@ static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG)
SDValue Lo32 = DAG.getTargetExtractSubreg(SP::sub_odd, dl, MVT::f32,
SrcReg64);
- Hi32 = DAG.getNode(Op.getOpcode(), dl, MVT::f32, Hi32);
+ Hi32 = DAG.getNode(opcode, dl, MVT::f32, Hi32);
SDValue DstReg64 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, MVT::f64), 0);
@@ -1787,28 +2430,244 @@ static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG)
return DstReg64;
}
+// Lower a f128 load into two f64 loads.
+static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
+{
+ SDLoc dl(Op);
+ LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
+ assert(LdNode && LdNode->getOffset().getOpcode() == ISD::UNDEF
+ && "Unexpected node type");
+
+ unsigned alignment = LdNode->getAlignment();
+ if (alignment > 8)
+ alignment = 8;
+
+ SDValue Hi64 = DAG.getLoad(MVT::f64,
+ dl,
+ LdNode->getChain(),
+ LdNode->getBasePtr(),
+ LdNode->getPointerInfo(),
+ false, false, false, alignment);
+ EVT addrVT = LdNode->getBasePtr().getValueType();
+ SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT,
+ LdNode->getBasePtr(),
+ DAG.getConstant(8, addrVT));
+ SDValue Lo64 = DAG.getLoad(MVT::f64,
+ dl,
+ LdNode->getChain(),
+ LoPtr,
+ LdNode->getPointerInfo(),
+ false, false, false, alignment);
+
+ SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, MVT::i32);
+ SDValue SubRegOdd = DAG.getTargetConstant(SP::sub_odd64, MVT::i32);
+
+ SDNode *InFP128 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, MVT::f128);
+ InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
+ MVT::f128,
+ SDValue(InFP128, 0),
+ Hi64,
+ SubRegEven);
+ InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
+ MVT::f128,
+ SDValue(InFP128, 0),
+ Lo64,
+ SubRegOdd);
+ SDValue OutChains[2] = { SDValue(Hi64.getNode(), 1),
+ SDValue(Lo64.getNode(), 1) };
+ SDValue OutChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], 2);
+ SDValue Ops[2] = {SDValue(InFP128,0), OutChain};
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+// Lower a f128 store into two f64 stores.
+static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
+ SDLoc dl(Op);
+ StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
+ assert(StNode && StNode->getOffset().getOpcode() == ISD::UNDEF
+ && "Unexpected node type");
+ SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, MVT::i32);
+ SDValue SubRegOdd = DAG.getTargetConstant(SP::sub_odd64, MVT::i32);
+
+ SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl,
+ MVT::f64,
+ StNode->getValue(),
+ SubRegEven);
+ SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl,
+ MVT::f64,
+ StNode->getValue(),
+ SubRegOdd);
+
+ unsigned alignment = StNode->getAlignment();
+ if (alignment > 8)
+ alignment = 8;
+
+ SDValue OutChains[2];
+ OutChains[0] = DAG.getStore(StNode->getChain(),
+ dl,
+ SDValue(Hi64, 0),
+ StNode->getBasePtr(),
+ MachinePointerInfo(),
+ false, false, alignment);
+ EVT addrVT = StNode->getBasePtr().getValueType();
+ SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT,
+ StNode->getBasePtr(),
+ DAG.getConstant(8, addrVT));
+ OutChains[1] = DAG.getStore(StNode->getChain(),
+ dl,
+ SDValue(Lo64, 0),
+ LoPtr,
+ MachinePointerInfo(),
+ false, false, alignment);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], 2);
+}
+
+static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool is64Bit) {
+ if (Op.getValueType() == MVT::f64)
+ return LowerF64Op(Op, DAG, ISD::FNEG);
+ if (Op.getValueType() == MVT::f128)
+ return TLI.LowerF128Op(Op, DAG, ((is64Bit) ? "_Qp_neg" : "_Q_neg"), 1);
+ return Op;
+}
+
+static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG, bool isV9) {
+ if (Op.getValueType() == MVT::f64)
+ return LowerF64Op(Op, DAG, ISD::FABS);
+ if (Op.getValueType() != MVT::f128)
+ return Op;
+
+ // Lower fabs on f128 to fabs on f64
+ // fabs f128 => fabs f64:sub_even64, fmov f64:sub_odd64
+
+ SDLoc dl(Op);
+ SDValue SrcReg128 = Op.getOperand(0);
+ SDValue Hi64 = DAG.getTargetExtractSubreg(SP::sub_even64, dl, MVT::f64,
+ SrcReg128);
+ SDValue Lo64 = DAG.getTargetExtractSubreg(SP::sub_odd64, dl, MVT::f64,
+ SrcReg128);
+ if (isV9)
+ Hi64 = DAG.getNode(Op.getOpcode(), dl, MVT::f64, Hi64);
+ else
+ Hi64 = LowerF64Op(Hi64, DAG, ISD::FABS);
+
+ SDValue DstReg128 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, MVT::f128), 0);
+ DstReg128 = DAG.getTargetInsertSubreg(SP::sub_even64, dl, MVT::f128,
+ DstReg128, Hi64);
+ DstReg128 = DAG.getTargetInsertSubreg(SP::sub_odd64, dl, MVT::f128,
+ DstReg128, Lo64);
+ return DstReg128;
+}
+
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+
+ if (Op.getValueType() != MVT::i64)
+ return Op;
+
+ SDLoc dl(Op);
+ SDValue Src1 = Op.getOperand(0);
+ SDValue Src1Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src1);
+ SDValue Src1Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Src1,
+ DAG.getConstant(32, MVT::i64));
+ Src1Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src1Hi);
+
+ SDValue Src2 = Op.getOperand(1);
+ SDValue Src2Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src2);
+ SDValue Src2Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Src2,
+ DAG.getConstant(32, MVT::i64));
+ Src2Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src2Hi);
+
+
+ bool hasChain = false;
+ unsigned hiOpc = Op.getOpcode();
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case ISD::ADDC: hiOpc = ISD::ADDE; break;
+ case ISD::ADDE: hasChain = true; break;
+ case ISD::SUBC: hiOpc = ISD::SUBE; break;
+ case ISD::SUBE: hasChain = true; break;
+ }
+ SDValue Lo;
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Glue);
+ if (hasChain) {
+ Lo = DAG.getNode(Op.getOpcode(), dl, VTs, Src1Lo, Src2Lo,
+ Op.getOperand(2));
+ } else {
+ Lo = DAG.getNode(Op.getOpcode(), dl, VTs, Src1Lo, Src2Lo);
+ }
+ SDValue Hi = DAG.getNode(hiOpc, dl, VTs, Src1Hi, Src2Hi, Lo.getValue(1));
+ SDValue Carry = Hi.getValue(1);
+
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Lo);
+ Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Hi);
+ Hi = DAG.getNode(ISD::SHL, dl, MVT::i64, Hi,
+ DAG.getConstant(32, MVT::i64));
+
+ SDValue Dst = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, Lo);
+ SDValue Ops[2] = { Dst, Carry };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
SDValue SparcTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+
+ bool hasHardQuad = Subtarget->hasHardQuad();
+ bool is64Bit = Subtarget->is64Bit();
+ bool isV9 = Subtarget->isV9();
+
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
- case ISD::FNEG:
- case ISD::FABS: return LowerF64Op(Op, DAG);
-
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG, *this);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::GlobalTLSAddress:
- llvm_unreachable("TLS not implemented for Sparc.");
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
- case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
- case ISD::BR_CC: return LowerBR_CC(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG, *this,
+ hasHardQuad);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG, *this,
+ hasHardQuad);
+ case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG, *this,
+ hasHardQuad);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG, *this,
+ hasHardQuad);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG, *this,
+ hasHardQuad);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, *this,
+ hasHardQuad);
case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
case ISD::VAARG: return LowerVAARG(Op, DAG);
- case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
+ Subtarget);
+
+ case ISD::LOAD: return LowerF128Load(Op, DAG);
+ case ISD::STORE: return LowerF128Store(Op, DAG);
+ case ISD::FADD: return LowerF128Op(Op, DAG,
+ getLibcallName(RTLIB::ADD_F128), 2);
+ case ISD::FSUB: return LowerF128Op(Op, DAG,
+ getLibcallName(RTLIB::SUB_F128), 2);
+ case ISD::FMUL: return LowerF128Op(Op, DAG,
+ getLibcallName(RTLIB::MUL_F128), 2);
+ case ISD::FDIV: return LowerF128Op(Op, DAG,
+ getLibcallName(RTLIB::DIV_F128), 2);
+ case ISD::FSQRT: return LowerF128Op(Op, DAG,
+ getLibcallName(RTLIB::SQRT_F128),1);
+ case ISD::FNEG: return LowerFNEG(Op, DAG, *this, is64Bit);
+ case ISD::FABS: return LowerFABS(Op, DAG, isV9);
+ case ISD::FP_EXTEND: return LowerF128_FPEXTEND(Op, DAG, *this);
+ case ISD::FP_ROUND: return LowerF128_FPROUND(Op, DAG, *this);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
}
}
@@ -1825,11 +2684,13 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case SP::SELECT_CC_Int_ICC:
case SP::SELECT_CC_FP_ICC:
case SP::SELECT_CC_DFP_ICC:
+ case SP::SELECT_CC_QFP_ICC:
BROpcode = SP::BCOND;
break;
case SP::SELECT_CC_Int_FCC:
case SP::SELECT_CC_FP_FCC:
case SP::SELECT_CC_DFP_FCC:
+ case SP::SELECT_CC_QFP_FCC:
BROpcode = SP::FBCOND;
break;
}
@@ -1924,3 +2785,50 @@ SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The Sparc target isn't yet aware of offsets.
return false;
}
+
+void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>& Results,
+ SelectionDAG &DAG) const {
+
+ SDLoc dl(N);
+
+ RTLIB::Libcall libCall = RTLIB::UNKNOWN_LIBCALL;
+
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ // Custom lower only if it involves f128 or i64.
+ if (N->getOperand(0).getValueType() != MVT::f128
+ || N->getValueType(0) != MVT::i64)
+ return;
+ libCall = ((N->getOpcode() == ISD::FP_TO_SINT)
+ ? RTLIB::FPTOSINT_F128_I64
+ : RTLIB::FPTOUINT_F128_I64);
+
+ Results.push_back(LowerF128Op(SDValue(N, 0),
+ DAG,
+ getLibcallName(libCall),
+ 1));
+ return;
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // Custom lower only if it involves f128 or i64.
+ if (N->getValueType(0) != MVT::f128
+ || N->getOperand(0).getValueType() != MVT::i64)
+ return;
+
+ libCall = ((N->getOpcode() == ISD::SINT_TO_FP)
+ ? RTLIB::SINTTOFP_I64_F128
+ : RTLIB::UINTTOFP_I64_F128);
+
+ Results.push_back(LowerF128Op(SDValue(N, 0),
+ DAG,
+ getLibcallName(libCall),
+ 1));
+ return;
+ }
+}
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 261c25a..2659fc8 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -37,11 +37,17 @@ namespace llvm {
FTOI, // FP to Int within a FP register.
ITOF, // Int to FP within a FP register.
+ FTOX, // FP to Int64 within a FP register.
+ XTOF, // Int64 to FP within a FP register.
CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
- GLOBAL_BASE_REG, // Global base reg for PIC
- FLUSHW // FLUSH register windows to stack
+ GLOBAL_BASE_REG, // Global base reg for PIC.
+ FLUSHW, // FLUSH register windows to stack.
+
+ TLS_ADD, // For Thread Local Storage (TLS).
+ TLS_LD,
+ TLS_CALL
};
}
@@ -73,6 +79,9 @@ namespace llvm {
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
+
virtual SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
@@ -119,6 +128,7 @@ namespace llvm {
SDLoc DL, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -127,6 +137,27 @@ namespace llvm {
SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
SelectionDAG &DAG) const;
SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerF128_LibCallArg(SDValue Chain, ArgListTy &Args,
+ SDValue Arg, SDLoc DL,
+ SelectionDAG &DAG) const;
+ SDValue LowerF128Op(SDValue Op, SelectionDAG &DAG,
+ const char *LibFuncName,
+ unsigned numArgs) const;
+ SDValue LowerF128Compare(SDValue LHS, SDValue RHS,
+ unsigned &SPCC,
+ SDLoc DL,
+ SelectionDAG &DAG) const;
+
+ bool ShouldShrinkFPConstant(EVT VT) const {
+ // Do not shrink FP constpool if VT == MVT::f128.
+ // (ldd, call _Q_fdtoq) is more expensive than two ldds.
+ return VT != MVT::f128;
+ }
+
+ virtual void ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>& Results,
+ SelectionDAG &DAG) const;
};
} // end namespace llvm
diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td
index 47658ee..8656de5 100644
--- a/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/lib/Target/Sparc/SparcInstr64Bit.td
@@ -153,15 +153,11 @@ def : Pat<(xor i64:$a, (not i64:$b)), (XNORrr $a, $b)>;
def : Pat<(add i64:$a, i64:$b), (ADDrr $a, $b)>;
def : Pat<(sub i64:$a, i64:$b), (SUBrr $a, $b)>;
-// Add/sub with carry were renamed to addc/subc in SPARC v9.
-def : Pat<(adde i64:$a, i64:$b), (ADDXrr $a, $b)>;
-def : Pat<(sube i64:$a, i64:$b), (SUBXrr $a, $b)>;
-
-def : Pat<(addc i64:$a, i64:$b), (ADDCCrr $a, $b)>;
-def : Pat<(subc i64:$a, i64:$b), (SUBCCrr $a, $b)>;
-
def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>;
+def : Pat<(tlsadd i64:$a, i64:$b, tglobaltlsaddr:$sym),
+ (TLS_ADDrr $a, $b, $sym)>;
+
// Register-immediate instructions.
def : Pat<(and i64:$a, (i64 simm13:$b)), (ANDri $a, (as_i32imm $b))>;
@@ -173,6 +169,14 @@ def : Pat<(sub i64:$a, (i64 simm13:$b)), (SUBri $a, (as_i32imm $b))>;
def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>;
+def : Pat<(ctpop i64:$src), (POPCrr $src)>;
+
+// "LEA" form of add
+def LEAX_ADDri : F3_2<2, 0b000000,
+ (outs I64Regs:$dst), (ins MEMri:$addr),
+ "add ${addr:arith}, $dst",
+ [(set iPTR:$dst, ADDRri:$addr)]>;
+
} // Predicates = [Is64Bit]
@@ -237,6 +241,12 @@ def LDXri : F3_2<3, 0b001011,
(outs I64Regs:$dst), (ins MEMri:$addr),
"ldx [$addr], $dst",
[(set i64:$dst, (load ADDRri:$addr))]>;
+let mayLoad = 1 in
+ def TLS_LDXrr : F3_1<3, 0b001011,
+ (outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym),
+ "ldx [$addr], $dst, $sym",
+ [(set i64:$dst,
+ (tlsld ADDRrr:$addr, tglobaltlsaddr:$sym))]>;
// Extending loads to i64.
def : Pat<(i64 (zextloadi1 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
@@ -312,9 +322,9 @@ def : Pat<(store (i64 0), ADDRri:$dst), (STXri ADDRri:$dst, (i64 G0))>;
let Predicates = [Is64Bit] in {
let Uses = [ICC] in
-def BPXCC : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
- "b$cc %xcc, $dst",
- [(SPbrxcc bb:$dst, imm:$cc)]>;
+def BPXCC : BranchSP<(ins brtarget:$imm22, CCOp:$cond),
+ "b$cond %xcc, $imm22",
+ [(SPbrxcc bb:$imm22, imm:$cond)]>;
// Conditional moves on %xcc.
let Uses = [ICC], Constraints = "$f = $rd" in {
@@ -340,6 +350,42 @@ def FMOVD_XCC : Pseudo<(outs DFPRegs:$rd),
(SPselectxcc f64:$rs2, f64:$f, imm:$cond))]>;
} // Uses, Constraints
+//===----------------------------------------------------------------------===//
+// 64-bit Floating Point Conversions.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+
+def FXTOS : F3_3u<2, 0b110100, 0b010000100,
+ (outs FPRegs:$dst), (ins DFPRegs:$src),
+ "fxtos $src, $dst",
+ [(set FPRegs:$dst, (SPxtof DFPRegs:$src))]>;
+def FXTOD : F3_3u<2, 0b110100, 0b010001000,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fxtod $src, $dst",
+ [(set DFPRegs:$dst, (SPxtof DFPRegs:$src))]>;
+def FXTOQ : F3_3u<2, 0b110100, 0b010001100,
+ (outs QFPRegs:$dst), (ins DFPRegs:$src),
+ "fxtoq $src, $dst",
+ [(set QFPRegs:$dst, (SPxtof DFPRegs:$src))]>,
+ Requires<[HasHardQuad]>;
+
+def FSTOX : F3_3u<2, 0b110100, 0b010000001,
+ (outs DFPRegs:$dst), (ins FPRegs:$src),
+ "fstox $src, $dst",
+ [(set DFPRegs:$dst, (SPftox FPRegs:$src))]>;
+def FDTOX : F3_3u<2, 0b110100, 0b010000010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fdtox $src, $dst",
+ [(set DFPRegs:$dst, (SPftox DFPRegs:$src))]>;
+def FQTOX : F3_3u<2, 0b110100, 0b010000011,
+ (outs DFPRegs:$dst), (ins QFPRegs:$src),
+ "fqtox $src, $dst",
+ [(set DFPRegs:$dst, (SPftox QFPRegs:$src))]>,
+ Requires<[HasHardQuad]>;
+
+} // Predicates = [Is64Bit]
+
def : Pat<(SPselectxcc i64:$t, i64:$f, imm:$cond),
(MOVXCCrr $t, $f, imm:$cond)>;
def : Pat<(SPselectxcc (i64 simm11:$t), i64:$f, imm:$cond),
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
index 6cdf6bc..afa2874 100644
--- a/lib/Target/Sparc/SparcInstrFormats.td
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -47,12 +47,11 @@ class F2_1<bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{29-25} = rd;
}
-class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr,
+class F2_2<bits<3> op2Val, dag outs, dag ins, string asmstr,
list<dag> pattern> : F2<outs, ins, asmstr, pattern> {
bits<4> cond;
bit annul = 0; // currently unused
- let cond = condVal;
let op2 = op2Val;
let Inst{29} = annul;
@@ -112,6 +111,32 @@ class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
let Inst{4-0} = rs2;
}
+// floating-point unary operations.
+class F3_3u<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+ let rs1 = 0;
+
+ let Inst{13-5} = opfval; // fp opcode
+ let Inst{4-0} = rs2;
+}
+
+// floating-point compares.
+class F3_3c<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+ let rd = 0;
+
+ let Inst{13-5} = opfval; // fp opcode
+ let Inst{4-0} = rs2;
+}
+
// Shift by register rs2.
class F3_Sr<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
@@ -150,3 +175,59 @@ multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode,
!strconcat(OpcStr, " $rs, $shcnt, $rd"),
[(set VT:$rd, (OpNode VT:$rs, (i32 imm:$shcnt)))]>;
}
+
+class F4<bits<6> op3, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern> {
+ bits<5> rd;
+
+ let op = 2;
+ let Inst{29-25} = rd;
+ let Inst{24-19} = op3;
+}
+
+
+class F4_1<bits<6> op3, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : F4<op3, outs, ins, asmstr, pattern> {
+
+ bits<3> cc;
+ bits<4> cond;
+ bits<5> rs2;
+
+ let Inst{4-0} = rs2;
+ let Inst{11} = cc{0};
+ let Inst{12} = cc{1};
+ let Inst{13} = 0;
+ let Inst{17-14} = cond;
+ let Inst{18} = cc{2};
+
+}
+
+class F4_2<bits<6> op3, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : F4<op3, outs, ins, asmstr, pattern> {
+ bits<3> cc;
+ bits<4> cond;
+ bits<11> simm11;
+
+ let Inst{10-0} = simm11;
+ let Inst{11} = cc{0};
+ let Inst{12} = cc{1};
+ let Inst{13} = 1;
+ let Inst{17-14} = cond;
+ let Inst{18} = cc{2};
+}
+
+class F4_3<bits<6> op3, bits<6> opf_low, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : F4<op3, outs, ins, asmstr, pattern> {
+ bits<4> cond;
+ bits<3> opf_cc;
+ bits<5> rs2;
+
+ let Inst{18} = 0;
+ let Inst{17-14} = cond;
+ let Inst{13-11} = opf_cc;
+ let Inst{10-5} = opf_low;
+ let Inst{4-0} = rs2;
+}
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 6c14bc9..c10b5b3 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -24,11 +24,15 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "SparcGenInstrInfo.inc"
using namespace llvm;
+
+// Pin the vtable to this file.
+void SparcInstrInfo::anchor() {}
+
SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
: SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
RI(ST), Subtarget(ST) {
@@ -44,7 +48,8 @@ unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
if (MI->getOpcode() == SP::LDri ||
MI->getOpcode() == SP::LDXri ||
MI->getOpcode() == SP::LDFri ||
- MI->getOpcode() == SP::LDDFri) {
+ MI->getOpcode() == SP::LDDFri ||
+ MI->getOpcode() == SP::LDQFri) {
if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
MI->getOperand(2).getImm() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
@@ -64,7 +69,8 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
if (MI->getOpcode() == SP::STri ||
MI->getOpcode() == SP::STXri ||
MI->getOpcode() == SP::STFri ||
- MI->getOpcode() == SP::STDFri) {
+ MI->getOpcode() == SP::STDFri ||
+ MI->getOpcode() == SP::STQFri) {
if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
MI->getOperand(1).getImm() == 0) {
FrameIndex = MI->getOperand(0).getIndex();
@@ -100,14 +106,14 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
case SPCC::FCC_U: return SPCC::FCC_O;
case SPCC::FCC_O: return SPCC::FCC_U;
- case SPCC::FCC_G: return SPCC::FCC_LE;
- case SPCC::FCC_LE: return SPCC::FCC_G;
- case SPCC::FCC_UG: return SPCC::FCC_ULE;
- case SPCC::FCC_ULE: return SPCC::FCC_UG;
- case SPCC::FCC_L: return SPCC::FCC_GE;
- case SPCC::FCC_GE: return SPCC::FCC_L;
- case SPCC::FCC_UL: return SPCC::FCC_UGE;
- case SPCC::FCC_UGE: return SPCC::FCC_UL;
+ case SPCC::FCC_G: return SPCC::FCC_ULE;
+ case SPCC::FCC_LE: return SPCC::FCC_UG;
+ case SPCC::FCC_UG: return SPCC::FCC_LE;
+ case SPCC::FCC_ULE: return SPCC::FCC_G;
+ case SPCC::FCC_L: return SPCC::FCC_UGE;
+ case SPCC::FCC_GE: return SPCC::FCC_UL;
+ case SPCC::FCC_UL: return SPCC::FCC_GE;
+ case SPCC::FCC_UGE: return SPCC::FCC_L;
case SPCC::FCC_LG: return SPCC::FCC_UE;
case SPCC::FCC_UE: return SPCC::FCC_LG;
case SPCC::FCC_NE: return SPCC::FCC_E;
@@ -273,6 +279,16 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
+ unsigned numSubRegs = 0;
+ unsigned movOpc = 0;
+ const unsigned *subRegIdx = 0;
+
+ const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd };
+ const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 };
+ const unsigned QFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd,
+ SP::sub_odd64_then_sub_even,
+ SP::sub_odd64_then_sub_odd };
+
if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -285,23 +301,47 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
// Use two FMOVS instructions.
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- MachineInstr *MovMI = 0;
- unsigned subRegIdx[] = {SP::sub_even, SP::sub_odd};
- for (unsigned i = 0; i != 2; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]);
- unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]);
- assert(Dst && Src && "Bad sub-register");
-
- MovMI = BuildMI(MBB, I, DL, get(SP::FMOVS), Dst).addReg(Src);
+ subRegIdx = DFP_FP_SubRegsIdx;
+ numSubRegs = 2;
+ movOpc = SP::FMOVS;
+ }
+ } else if (SP::QFPRegsRegClass.contains(DestReg, SrcReg)) {
+ if (Subtarget.isV9()) {
+ if (Subtarget.hasHardQuad()) {
+ BuildMI(MBB, I, DL, get(SP::FMOVQ), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ // Use two FMOVD instructions.
+ subRegIdx = QFP_DFP_SubRegsIdx;
+ numSubRegs = 2;
+ movOpc = SP::FMOVD;
}
- // Add implicit super-register defs and kills to the last MovMI.
- MovMI->addRegisterDefined(DestReg, TRI);
- if (KillSrc)
- MovMI->addRegisterKilled(SrcReg, TRI);
+ } else {
+ // Use four FMOVS instructions.
+ subRegIdx = QFP_FP_SubRegsIdx;
+ numSubRegs = 4;
+ movOpc = SP::FMOVS;
}
} else
llvm_unreachable("Impossible reg-to-reg copy");
+
+ if (numSubRegs == 0 || subRegIdx == 0 || movOpc == 0)
+ return;
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineInstr *MovMI = 0;
+
+ for (unsigned i = 0; i != numSubRegs; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]);
+ unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]);
+ assert(Dst && Src && "Bad sub-register");
+
+ MovMI = BuildMI(MBB, I, DL, get(movOpc), Dst).addReg(Src);
+ }
+ // Add implicit super-register defs and kills to the last MovMI.
+ MovMI->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ MovMI->addRegisterKilled(SrcReg, TRI);
}
void SparcInstrInfo::
@@ -321,7 +361,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectAlignment(FI));
// On the order of operands here: think "[FrameIdx + 0] = SrcReg".
- if (RC == &SP::I64RegsRegClass)
+ if (RC == &SP::I64RegsRegClass)
BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else if (RC == &SP::IntRegsRegClass)
@@ -330,9 +370,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
else if (RC == &SP::FPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
- else if (RC == &SP::DFPRegsRegClass)
+ else if (SP::DFPRegsRegClass.hasSubClassEq(RC))
BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ else if (SP::QFPRegsRegClass.hasSubClassEq(RC))
+ // Use STQFri irrespective of its legality. If STQ is not legal, it will be
+ // lowered into two STDs in eliminateFrameIndex.
+ BuildMI(MBB, I, DL, get(SP::STQFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else
llvm_unreachable("Can't store this register to stack slot");
}
@@ -362,9 +407,14 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
else if (RC == &SP::FPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0)
.addMemOperand(MMO);
- else if (RC == &SP::DFPRegsRegClass)
+ else if (SP::DFPRegsRegClass.hasSubClassEq(RC))
BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0)
.addMemOperand(MMO);
+ else if (SP::QFPRegsRegClass.hasSubClassEq(RC))
+ // Use LDQFri irrespective of its legality. If LDQ is not legal, it will be
+ // lowered into two LDDs in eliminateFrameIndex.
+ BuildMI(MBB, I, DL, get(SP::LDQFri), DestReg).addFrameIndex(FI).addImm(0)
+ .addMemOperand(MMO);
else
llvm_unreachable("Can't load this register from stack slot");
}
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index d0b220b..a86cbcb 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -37,6 +37,7 @@ namespace SPII {
class SparcInstrInfo : public SparcGenInstrInfo {
const SparcRegisterInfo RI;
const SparcSubtarget& Subtarget;
+ virtual void anchor();
public:
explicit SparcInstrInfo(SparcSubtarget &ST);
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index d4cac4d..ef7a114 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -39,6 +39,10 @@ def HasNoV9 : Predicate<"!Subtarget.isV9()">;
// HasVIS - This is true when the target processor has VIS extensions.
def HasVIS : Predicate<"Subtarget.isVIS()">;
+// HasHardQuad - This is true when the target processor supports quad floating
+// point instructions.
+def HasHardQuad : Predicate<"Subtarget.hasHardQuad()">;
+
// UseDeprecatedInsts - This predicate is true when the target processor is a
// V8, or when it is V9 but the V8 deprecated instructions are efficient enough
// to use when appropriate. In either of these cases, the instruction selector
@@ -81,6 +85,8 @@ def MEMri : Operand<iPTR> {
let MIOperandInfo = (ops ptr_rc, i32imm);
}
+def TLSSym : Operand<iPTR>;
+
// Branch targets have OtherVT type.
def brtarget : Operand<OtherVT>;
def calltarget : Operand<i32>;
@@ -101,6 +107,15 @@ def SDTSPFTOI :
SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
def SDTSPITOF :
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDTSPFTOX :
+SDTypeProfile<1, 1, [SDTCisVT<0, f64>, SDTCisFP<1>]>;
+def SDTSPXTOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f64>]>;
+
+def SDTSPtlsadd :
+SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDTSPtlsld :
+SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>;
def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
@@ -113,6 +128,8 @@ def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>;
def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>;
+def SPftox : SDNode<"SPISD::FTOX", SDTSPFTOX>;
+def SPxtof : SDNode<"SPISD::XTOF", SDTSPXTOF>;
def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>;
@@ -140,6 +157,12 @@ def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
[SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
+def tlsadd : SDNode<"SPISD::TLS_ADD", SDTSPtlsadd>;
+def tlsld : SDNode<"SPISD::TLS_LD", SDTSPtlsld>;
+def tlscall : SDNode<"SPISD::TLS_CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
def getPCX : Operand<i32> {
let PrintMethod = "printGetPCX";
}
@@ -242,8 +265,9 @@ let hasSideEffects = 1, mayStore = 1 in {
[(flushw)]>;
}
-def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val),
- "unimp $val", []>;
+let rd = 0 in
+ def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val),
+ "unimp $val", []>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence. This has to handle all
@@ -263,6 +287,11 @@ let Uses = [ICC], usesCustomInserter = 1 in {
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
"; SELECT_CC_DFP_ICC PSEUDO!",
[(set f64:$dst, (SPselecticc f64:$T, f64:$F, imm:$Cond))]>;
+
+ def SELECT_CC_QFP_ICC
+ : Pseudo<(outs QFPRegs:$dst), (ins QFPRegs:$T, QFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_QFP_ICC PSEUDO!",
+ [(set f128:$dst, (SPselecticc f128:$T, f128:$F, imm:$Cond))]>;
}
let usesCustomInserter = 1, Uses = [FCC] in {
@@ -280,17 +309,21 @@ let usesCustomInserter = 1, Uses = [FCC] in {
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
"; SELECT_CC_DFP_FCC PSEUDO!",
[(set f64:$dst, (SPselectfcc f64:$T, f64:$F, imm:$Cond))]>;
+ def SELECT_CC_QFP_FCC
+ : Pseudo<(outs QFPRegs:$dst), (ins QFPRegs:$T, QFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_QFP_FCC PSEUDO!",
+ [(set f128:$dst, (SPselectfcc f128:$T, f128:$F, imm:$Cond))]>;
}
// Section A.3 - Synthetic Instructions, p. 85
// special cases of JMPL:
let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
- let rd = O7.Num, rs1 = G0.Num in
+ let rd = 0, rs1 = 15 in
def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
"jmp %o7+$val", [(retflag simm13:$val)]>;
- let rd = I7.Num, rs1 = G0.Num in
+ let rd = 0, rs1 = 31 in
def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
"jmp %i7+$val", []>;
}
@@ -354,56 +387,76 @@ def LDDFri : F3_2<3, 0b100011,
(outs DFPRegs:$dst), (ins MEMri:$addr),
"ldd [$addr], $dst",
[(set f64:$dst, (load ADDRri:$addr))]>;
+def LDQFrr : F3_1<3, 0b100010,
+ (outs QFPRegs:$dst), (ins MEMrr:$addr),
+ "ldq [$addr], $dst",
+ [(set f128:$dst, (load ADDRrr:$addr))]>,
+ Requires<[HasV9, HasHardQuad]>;
+def LDQFri : F3_2<3, 0b100010,
+ (outs QFPRegs:$dst), (ins MEMri:$addr),
+ "ldq [$addr], $dst",
+ [(set f128:$dst, (load ADDRri:$addr))]>,
+ Requires<[HasV9, HasHardQuad]>;
// Section B.4 - Store Integer Instructions, p. 95
def STBrr : F3_1<3, 0b000101,
- (outs), (ins MEMrr:$addr, IntRegs:$src),
- "stb $src, [$addr]",
- [(truncstorei8 i32:$src, ADDRrr:$addr)]>;
+ (outs), (ins MEMrr:$addr, IntRegs:$rd),
+ "stb $rd, [$addr]",
+ [(truncstorei8 i32:$rd, ADDRrr:$addr)]>;
def STBri : F3_2<3, 0b000101,
- (outs), (ins MEMri:$addr, IntRegs:$src),
- "stb $src, [$addr]",
- [(truncstorei8 i32:$src, ADDRri:$addr)]>;
+ (outs), (ins MEMri:$addr, IntRegs:$rd),
+ "stb $rd, [$addr]",
+ [(truncstorei8 i32:$rd, ADDRri:$addr)]>;
def STHrr : F3_1<3, 0b000110,
- (outs), (ins MEMrr:$addr, IntRegs:$src),
- "sth $src, [$addr]",
- [(truncstorei16 i32:$src, ADDRrr:$addr)]>;
+ (outs), (ins MEMrr:$addr, IntRegs:$rd),
+ "sth $rd, [$addr]",
+ [(truncstorei16 i32:$rd, ADDRrr:$addr)]>;
def STHri : F3_2<3, 0b000110,
- (outs), (ins MEMri:$addr, IntRegs:$src),
- "sth $src, [$addr]",
- [(truncstorei16 i32:$src, ADDRri:$addr)]>;
+ (outs), (ins MEMri:$addr, IntRegs:$rd),
+ "sth $rd, [$addr]",
+ [(truncstorei16 i32:$rd, ADDRri:$addr)]>;
def STrr : F3_1<3, 0b000100,
- (outs), (ins MEMrr:$addr, IntRegs:$src),
- "st $src, [$addr]",
- [(store i32:$src, ADDRrr:$addr)]>;
+ (outs), (ins MEMrr:$addr, IntRegs:$rd),
+ "st $rd, [$addr]",
+ [(store i32:$rd, ADDRrr:$addr)]>;
def STri : F3_2<3, 0b000100,
- (outs), (ins MEMri:$addr, IntRegs:$src),
- "st $src, [$addr]",
- [(store i32:$src, ADDRri:$addr)]>;
+ (outs), (ins MEMri:$addr, IntRegs:$rd),
+ "st $rd, [$addr]",
+ [(store i32:$rd, ADDRri:$addr)]>;
// Section B.5 - Store Floating-point Instructions, p. 97
def STFrr : F3_1<3, 0b100100,
- (outs), (ins MEMrr:$addr, FPRegs:$src),
- "st $src, [$addr]",
- [(store f32:$src, ADDRrr:$addr)]>;
+ (outs), (ins MEMrr:$addr, FPRegs:$rd),
+ "st $rd, [$addr]",
+ [(store f32:$rd, ADDRrr:$addr)]>;
def STFri : F3_2<3, 0b100100,
- (outs), (ins MEMri:$addr, FPRegs:$src),
- "st $src, [$addr]",
- [(store f32:$src, ADDRri:$addr)]>;
+ (outs), (ins MEMri:$addr, FPRegs:$rd),
+ "st $rd, [$addr]",
+ [(store f32:$rd, ADDRri:$addr)]>;
def STDFrr : F3_1<3, 0b100111,
- (outs), (ins MEMrr:$addr, DFPRegs:$src),
- "std $src, [$addr]",
- [(store f64:$src, ADDRrr:$addr)]>;
+ (outs), (ins MEMrr:$addr, DFPRegs:$rd),
+ "std $rd, [$addr]",
+ [(store f64:$rd, ADDRrr:$addr)]>;
def STDFri : F3_2<3, 0b100111,
- (outs), (ins MEMri:$addr, DFPRegs:$src),
- "std $src, [$addr]",
- [(store f64:$src, ADDRri:$addr)]>;
+ (outs), (ins MEMri:$addr, DFPRegs:$rd),
+ "std $rd, [$addr]",
+ [(store f64:$rd, ADDRri:$addr)]>;
+def STQFrr : F3_1<3, 0b100110,
+ (outs), (ins MEMrr:$addr, QFPRegs:$rd),
+ "stq $rd, [$addr]",
+ [(store f128:$rd, ADDRrr:$addr)]>,
+ Requires<[HasV9, HasHardQuad]>;
+def STQFri : F3_2<3, 0b100110,
+ (outs), (ins MEMri:$addr, QFPRegs:$rd),
+ "stq $rd, [$addr]",
+ [(store f128:$rd, ADDRri:$addr)]>,
+ Requires<[HasV9, HasHardQuad]>;
// Section B.9 - SETHI Instruction, p. 104
def SETHIi: F2_1<0b100,
- (outs IntRegs:$dst), (ins i32imm:$src),
- "sethi $src, $dst",
- [(set i32:$dst, SETHIimm:$src)]>;
+ (outs IntRegs:$rd), (ins i32imm:$imm22),
+ "sethi $imm22, $rd",
+ [(set i32:$rd, SETHIimm:$imm22)]>;
// Section B.10 - NOP Instruction, p. 105
// (It's a special case of SETHI)
@@ -449,30 +502,32 @@ defm SRA : F3_12<"sra", 0b100111, sra>;
defm ADD : F3_12<"add", 0b000000, add>;
// "LEA" forms of add (patterns to make tblgen happy)
-def LEA_ADDri : F3_2<2, 0b000000,
- (outs IntRegs:$dst), (ins MEMri:$addr),
- "add ${addr:arith}, $dst",
- [(set iPTR:$dst, ADDRri:$addr)]>;
+let Predicates = [Is32Bit] in
+ def LEA_ADDri : F3_2<2, 0b000000,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "add ${addr:arith}, $dst",
+ [(set iPTR:$dst, ADDRri:$addr)]>;
let Defs = [ICC] in
defm ADDCC : F3_12<"addcc", 0b010000, addc>;
-let Uses = [ICC] in
- defm ADDX : F3_12<"addx", 0b001000, adde>;
+let Uses = [ICC], Defs = [ICC] in
+ defm ADDX : F3_12<"addxcc", 0b011000, adde>;
// Section B.15 - Subtract Instructions, p. 110
defm SUB : F3_12 <"sub" , 0b000100, sub>;
-let Uses = [ICC] in
- defm SUBX : F3_12 <"subx" , 0b001100, sube>;
+let Uses = [ICC], Defs = [ICC] in
+ defm SUBX : F3_12 <"subxcc" , 0b011100, sube>;
-let Defs = [ICC] in {
+let Defs = [ICC] in
defm SUBCC : F3_12 <"subcc", 0b010100, subc>;
+let Defs = [ICC], rd = 0 in {
def CMPrr : F3_1<2, 0b010100,
(outs), (ins IntRegs:$b, IntRegs:$c),
"cmp $b, $c",
[(SPcmpicc i32:$b, i32:$c)]>;
- def CMPri : F3_1<2, 0b010100,
+ def CMPri : F3_2<2, 0b010100,
(outs), (ins IntRegs:$b, i32imm:$c),
"cmp $b, $c",
[(SPcmpicc i32:$b, (i32 simm13:$c))]>;
@@ -502,23 +557,30 @@ defm RESTORE : F3_12np<"restore", 0b111101>;
// Section B.21 - Branch on Integer Condition Codes Instructions, p. 119
+// unconditional branch class.
+class BranchAlways<dag ins, string asmstr, list<dag> pattern>
+ : F2_2<0b010, (outs), ins, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+ let isBarrier = 1;
+}
+
+let cond = 8 in
+ def BA : BranchAlways<(ins brtarget:$imm22), "ba $imm22", [(br bb:$imm22)]>;
+
// conditional branch class:
-class BranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
- : F2_2<cc, 0b010, (outs), ins, asmstr, pattern> {
+class BranchSP<dag ins, string asmstr, list<dag> pattern>
+ : F2_2<0b010, (outs), ins, asmstr, pattern> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
}
-let isBarrier = 1 in
- def BA : BranchSP<0b1000, (ins brtarget:$dst),
- "ba $dst",
- [(br bb:$dst)]>;
-
// Indirect branch instructions.
let isTerminator = 1, isBarrier = 1,
hasDelaySlot = 1, isBranch =1,
- isIndirectBranch = 1 in {
+ isIndirectBranch = 1, rd = 0 in {
def BINDrr : F3_1<2, 0b111000,
(outs), (ins MEMrr:$ptr),
"jmp $ptr",
@@ -529,37 +591,31 @@ let isTerminator = 1, isBarrier = 1,
[(brind ADDRri:$ptr)]>;
}
-// FIXME: the encoding for the JIT should look at the condition field.
let Uses = [ICC] in
- def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
- "b$cc $dst",
- [(SPbricc bb:$dst, imm:$cc)]>;
-
+ def BCOND : BranchSP<(ins brtarget:$imm22, CCOp:$cond),
+ "b$cond $imm22",
+ [(SPbricc bb:$imm22, imm:$cond)]>;
// Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121
// floating-point conditional branch class:
-class FPBranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
- : F2_2<cc, 0b110, (outs), ins, asmstr, pattern> {
+class FPBranchSP<dag ins, string asmstr, list<dag> pattern>
+ : F2_2<0b110, (outs), ins, asmstr, pattern> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
}
-// FIXME: the encoding for the JIT should look at the condition field.
let Uses = [FCC] in
- def FBCOND : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc),
- "fb$cc $dst",
- [(SPbrfcc bb:$dst, imm:$cc)]>;
+ def FBCOND : FPBranchSP<(ins brtarget:$imm22, CCOp:$cond),
+ "fb$cond $imm22",
+ [(SPbrfcc bb:$imm22, imm:$cond)]>;
// Section B.24 - Call and Link Instruction, p. 125
// This is the only Format 1 instruction
let Uses = [O6],
- hasDelaySlot = 1, isCall = 1,
- Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
- D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
- ICC, FCC, Y] in {
+ hasDelaySlot = 1, isCall = 1 in {
def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops),
"call $dst", []> {
bits<30> disp;
@@ -571,21 +627,21 @@ let Uses = [O6],
def JMPLrr : F3_1<2, 0b111000,
(outs), (ins MEMrr:$ptr, variable_ops),
"call $ptr",
- [(call ADDRrr:$ptr)]>;
+ [(call ADDRrr:$ptr)]> { let rd = 15; }
def JMPLri : F3_2<2, 0b111000,
(outs), (ins MEMri:$ptr, variable_ops),
"call $ptr",
- [(call ADDRri:$ptr)]>;
+ [(call ADDRri:$ptr)]> { let rd = 15; }
}
// Section B.28 - Read State Register Instructions
-let Uses = [Y] in
+let Uses = [Y], rs1 = 0, rs2 = 0 in
def RDY : F3_1<2, 0b101000,
(outs IntRegs:$dst), (ins),
"rd %y, $dst", []>;
// Section B.29 - Write State Register Instructions
-let Defs = [Y] in {
+let Defs = [Y], rd = 0 in {
def WRYrr : F3_1<2, 0b110000,
(outs), (ins IntRegs:$b, IntRegs:$c),
"wr $b, $c, %y", []>;
@@ -594,58 +650,93 @@ let Defs = [Y] in {
"wr $b, $c, %y", []>;
}
// Convert Integer to Floating-point Instructions, p. 141
-def FITOS : F3_3<2, 0b110100, 0b011000100,
+def FITOS : F3_3u<2, 0b110100, 0b011000100,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fitos $src, $dst",
[(set FPRegs:$dst, (SPitof FPRegs:$src))]>;
-def FITOD : F3_3<2, 0b110100, 0b011001000,
+def FITOD : F3_3u<2, 0b110100, 0b011001000,
(outs DFPRegs:$dst), (ins FPRegs:$src),
"fitod $src, $dst",
[(set DFPRegs:$dst, (SPitof FPRegs:$src))]>;
+def FITOQ : F3_3u<2, 0b110100, 0b011001100,
+ (outs QFPRegs:$dst), (ins FPRegs:$src),
+ "fitoq $src, $dst",
+ [(set QFPRegs:$dst, (SPitof FPRegs:$src))]>,
+ Requires<[HasHardQuad]>;
// Convert Floating-point to Integer Instructions, p. 142
-def FSTOI : F3_3<2, 0b110100, 0b011010001,
+def FSTOI : F3_3u<2, 0b110100, 0b011010001,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fstoi $src, $dst",
[(set FPRegs:$dst, (SPftoi FPRegs:$src))]>;
-def FDTOI : F3_3<2, 0b110100, 0b011010010,
+def FDTOI : F3_3u<2, 0b110100, 0b011010010,
(outs FPRegs:$dst), (ins DFPRegs:$src),
"fdtoi $src, $dst",
[(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>;
+def FQTOI : F3_3u<2, 0b110100, 0b011010011,
+ (outs FPRegs:$dst), (ins QFPRegs:$src),
+ "fqtoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi QFPRegs:$src))]>,
+ Requires<[HasHardQuad]>;
// Convert between Floating-point Formats Instructions, p. 143
-def FSTOD : F3_3<2, 0b110100, 0b011001001,
+def FSTOD : F3_3u<2, 0b110100, 0b011001001,
(outs DFPRegs:$dst), (ins FPRegs:$src),
"fstod $src, $dst",
[(set f64:$dst, (fextend f32:$src))]>;
-def FDTOS : F3_3<2, 0b110100, 0b011000110,
+def FSTOQ : F3_3u<2, 0b110100, 0b011001101,
+ (outs QFPRegs:$dst), (ins FPRegs:$src),
+ "fstoq $src, $dst",
+ [(set f128:$dst, (fextend f32:$src))]>,
+ Requires<[HasHardQuad]>;
+def FDTOS : F3_3u<2, 0b110100, 0b011000110,
(outs FPRegs:$dst), (ins DFPRegs:$src),
"fdtos $src, $dst",
[(set f32:$dst, (fround f64:$src))]>;
+def FDTOQ : F3_3u<2, 0b110100, 0b01101110,
+ (outs QFPRegs:$dst), (ins DFPRegs:$src),
+ "fdtoq $src, $dst",
+ [(set f128:$dst, (fextend f64:$src))]>,
+ Requires<[HasHardQuad]>;
+def FQTOS : F3_3u<2, 0b110100, 0b011000111,
+ (outs FPRegs:$dst), (ins QFPRegs:$src),
+ "fqtos $src, $dst",
+ [(set f32:$dst, (fround f128:$src))]>,
+ Requires<[HasHardQuad]>;
+def FQTOD : F3_3u<2, 0b110100, 0b011001011,
+ (outs DFPRegs:$dst), (ins QFPRegs:$src),
+ "fqtod $src, $dst",
+ [(set f64:$dst, (fround f128:$src))]>,
+ Requires<[HasHardQuad]>;
// Floating-point Move Instructions, p. 144
-def FMOVS : F3_3<2, 0b110100, 0b000000001,
+def FMOVS : F3_3u<2, 0b110100, 0b000000001,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fmovs $src, $dst", []>;
-def FNEGS : F3_3<2, 0b110100, 0b000000101,
+def FNEGS : F3_3u<2, 0b110100, 0b000000101,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fnegs $src, $dst",
[(set f32:$dst, (fneg f32:$src))]>;
-def FABSS : F3_3<2, 0b110100, 0b000001001,
+def FABSS : F3_3u<2, 0b110100, 0b000001001,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fabss $src, $dst",
[(set f32:$dst, (fabs f32:$src))]>;
// Floating-point Square Root Instructions, p.145
-def FSQRTS : F3_3<2, 0b110100, 0b000101001,
+def FSQRTS : F3_3u<2, 0b110100, 0b000101001,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fsqrts $src, $dst",
[(set f32:$dst, (fsqrt f32:$src))]>;
-def FSQRTD : F3_3<2, 0b110100, 0b000101010,
+def FSQRTD : F3_3u<2, 0b110100, 0b000101010,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fsqrtd $src, $dst",
[(set f64:$dst, (fsqrt f64:$src))]>;
+def FSQRTQ : F3_3u<2, 0b110100, 0b000101011,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src),
+ "fsqrtq $src, $dst",
+ [(set f128:$dst, (fsqrt f128:$src))]>,
+ Requires<[HasHardQuad]>;
@@ -658,6 +749,12 @@ def FADDD : F3_3<2, 0b110100, 0b001000010,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"faddd $src1, $src2, $dst",
[(set f64:$dst, (fadd f64:$src1, f64:$src2))]>;
+def FADDQ : F3_3<2, 0b110100, 0b001000011,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src1, QFPRegs:$src2),
+ "faddq $src1, $src2, $dst",
+ [(set f128:$dst, (fadd f128:$src1, f128:$src2))]>,
+ Requires<[HasHardQuad]>;
+
def FSUBS : F3_3<2, 0b110100, 0b001000101,
(outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fsubs $src1, $src2, $dst",
@@ -666,6 +763,12 @@ def FSUBD : F3_3<2, 0b110100, 0b001000110,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"fsubd $src1, $src2, $dst",
[(set f64:$dst, (fsub f64:$src1, f64:$src2))]>;
+def FSUBQ : F3_3<2, 0b110100, 0b001000111,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src1, QFPRegs:$src2),
+ "fsubq $src1, $src2, $dst",
+ [(set f128:$dst, (fsub f128:$src1, f128:$src2))]>,
+ Requires<[HasHardQuad]>;
+
// Floating-point Multiply and Divide Instructions, p. 147
def FMULS : F3_3<2, 0b110100, 0b001001001,
@@ -676,11 +779,24 @@ def FMULD : F3_3<2, 0b110100, 0b001001010,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"fmuld $src1, $src2, $dst",
[(set f64:$dst, (fmul f64:$src1, f64:$src2))]>;
+def FMULQ : F3_3<2, 0b110100, 0b001001011,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src1, QFPRegs:$src2),
+ "fmulq $src1, $src2, $dst",
+ [(set f128:$dst, (fmul f128:$src1, f128:$src2))]>,
+ Requires<[HasHardQuad]>;
+
def FSMULD : F3_3<2, 0b110100, 0b001101001,
(outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fsmuld $src1, $src2, $dst",
[(set f64:$dst, (fmul (fextend f32:$src1),
(fextend f32:$src2)))]>;
+def FDMULQ : F3_3<2, 0b110100, 0b001101110,
+ (outs QFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fdmulq $src1, $src2, $dst",
+ [(set f128:$dst, (fmul (fextend f64:$src1),
+ (fextend f64:$src2)))]>,
+ Requires<[HasHardQuad]>;
+
def FDIVS : F3_3<2, 0b110100, 0b001001101,
(outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fdivs $src1, $src2, $dst",
@@ -689,21 +805,61 @@ def FDIVD : F3_3<2, 0b110100, 0b001001110,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"fdivd $src1, $src2, $dst",
[(set f64:$dst, (fdiv f64:$src1, f64:$src2))]>;
+def FDIVQ : F3_3<2, 0b110100, 0b001001111,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src1, QFPRegs:$src2),
+ "fdivq $src1, $src2, $dst",
+ [(set f128:$dst, (fdiv f128:$src1, f128:$src2))]>,
+ Requires<[HasHardQuad]>;
// Floating-point Compare Instructions, p. 148
// Note: the 2nd template arg is different for these guys.
// Note 2: the result of a FCMP is not available until the 2nd cycle
-// after the instr is retired, but there is no interlock. This behavior
-// is modelled with a forced noop after the instruction.
+// after the instr is retired, but there is no interlock in Sparc V8.
+// This behavior is modeled with a forced noop after the instruction in
+// DelaySlotFiller.
+
let Defs = [FCC] in {
- def FCMPS : F3_3<2, 0b110101, 0b001010001,
+ def FCMPS : F3_3c<2, 0b110101, 0b001010001,
(outs), (ins FPRegs:$src1, FPRegs:$src2),
- "fcmps $src1, $src2\n\tnop",
+ "fcmps $src1, $src2",
[(SPcmpfcc f32:$src1, f32:$src2)]>;
- def FCMPD : F3_3<2, 0b110101, 0b001010010,
+ def FCMPD : F3_3c<2, 0b110101, 0b001010010,
(outs), (ins DFPRegs:$src1, DFPRegs:$src2),
- "fcmpd $src1, $src2\n\tnop",
+ "fcmpd $src1, $src2",
[(SPcmpfcc f64:$src1, f64:$src2)]>;
+ def FCMPQ : F3_3c<2, 0b110101, 0b001010011,
+ (outs), (ins QFPRegs:$src1, QFPRegs:$src2),
+ "fcmpq $src1, $src2",
+ [(SPcmpfcc f128:$src1, f128:$src2)]>,
+ Requires<[HasHardQuad]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions for Thread Local Storage(TLS).
+//===----------------------------------------------------------------------===//
+
+def TLS_ADDrr : F3_1<2, 0b000000,
+ (outs IntRegs:$rd),
+ (ins IntRegs:$rs1, IntRegs:$rs2, TLSSym:$sym),
+ "add $rs1, $rs2, $rd, $sym",
+ [(set i32:$rd,
+ (tlsadd i32:$rs1, i32:$rs2, tglobaltlsaddr:$sym))]>;
+
+let mayLoad = 1 in
+ def TLS_LDrr : F3_1<3, 0b000000,
+ (outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym),
+ "ld [$addr], $dst, $sym",
+ [(set i32:$dst,
+ (tlsld ADDRrr:$addr, tglobaltlsaddr:$sym))]>;
+
+let Uses = [O6], isCall = 1, hasDelaySlot = 1 in
+ def TLS_CALL : InstSP<(outs),
+ (ins calltarget:$disp, TLSSym:$sym, variable_ops),
+ "call $disp, $sym",
+ [(tlscall texternalsym:$disp, tglobaltlsaddr:$sym)]> {
+ bits<30> disp;
+ let op = 1;
+ let Inst{29-0} = disp;
}
//===----------------------------------------------------------------------===//
@@ -713,73 +869,108 @@ let Defs = [FCC] in {
// V9 Conditional Moves.
let Predicates = [HasV9], Constraints = "$f = $rd" in {
// Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
- // FIXME: Add instruction encodings for the JIT some day.
- let Uses = [ICC] in {
+ let Uses = [ICC], cc = 0b100 in {
def MOVICCrr
- : Pseudo<(outs IntRegs:$rd), (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cc),
- "mov$cc %icc, $rs2, $rd",
- [(set i32:$rd, (SPselecticc i32:$rs2, i32:$f, imm:$cc))]>;
+ : F4_1<0b101100, (outs IntRegs:$rd),
+ (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cond),
+ "mov$cond %icc, $rs2, $rd",
+ [(set i32:$rd, (SPselecticc i32:$rs2, i32:$f, imm:$cond))]>;
+
def MOVICCri
- : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cc),
- "mov$cc %icc, $i, $rd",
- [(set i32:$rd, (SPselecticc simm11:$i, i32:$f, imm:$cc))]>;
+ : F4_2<0b101100, (outs IntRegs:$rd),
+ (ins i32imm:$simm11, IntRegs:$f, CCOp:$cond),
+ "mov$cond %icc, $simm11, $rd",
+ [(set i32:$rd,
+ (SPselecticc simm11:$simm11, i32:$f, imm:$cond))]>;
}
- let Uses = [FCC] in {
+ let Uses = [FCC], cc = 0b000 in {
def MOVFCCrr
- : Pseudo<(outs IntRegs:$rd), (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cc),
- "mov$cc %fcc0, $rs2, $rd",
- [(set i32:$rd, (SPselectfcc i32:$rs2, i32:$f, imm:$cc))]>;
+ : F4_1<0b101100, (outs IntRegs:$rd),
+ (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cond),
+ "mov$cond %fcc0, $rs2, $rd",
+ [(set i32:$rd, (SPselectfcc i32:$rs2, i32:$f, imm:$cond))]>;
def MOVFCCri
- : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cc),
- "mov$cc %fcc0, $i, $rd",
- [(set i32:$rd, (SPselectfcc simm11:$i, i32:$f, imm:$cc))]>;
+ : F4_2<0b101100, (outs IntRegs:$rd),
+ (ins i32imm:$simm11, IntRegs:$f, CCOp:$cond),
+ "mov$cond %fcc0, $simm11, $rd",
+ [(set i32:$rd,
+ (SPselectfcc simm11:$simm11, i32:$f, imm:$cond))]>;
}
- let Uses = [ICC] in {
+ let Uses = [ICC], opf_cc = 0b100 in {
def FMOVS_ICC
- : Pseudo<(outs FPRegs:$rd), (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cc),
- "fmovs$cc %icc, $rs2, $rd",
- [(set f32:$rd, (SPselecticc f32:$rs2, f32:$f, imm:$cc))]>;
+ : F4_3<0b110101, 0b000001, (outs FPRegs:$rd),
+ (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cond),
+ "fmovs$cond %icc, $rs2, $rd",
+ [(set f32:$rd, (SPselecticc f32:$rs2, f32:$f, imm:$cond))]>;
def FMOVD_ICC
- : Pseudo<(outs DFPRegs:$rd), (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cc),
- "fmovd$cc %icc, $rs2, $rd",
- [(set f64:$rd, (SPselecticc f64:$rs2, f64:$f, imm:$cc))]>;
+ : F4_3<0b110101, 0b000010, (outs DFPRegs:$rd),
+ (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond),
+ "fmovd$cond %icc, $rs2, $rd",
+ [(set f64:$rd, (SPselecticc f64:$rs2, f64:$f, imm:$cond))]>;
+ def FMOVQ_ICC
+ : F4_3<0b110101, 0b000011, (outs QFPRegs:$rd),
+ (ins QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond),
+ "fmovd$cond %icc, $rs2, $rd",
+ [(set f128:$rd, (SPselecticc f128:$rs2, f128:$f, imm:$cond))]>;
}
- let Uses = [FCC] in {
+ let Uses = [FCC], opf_cc = 0b000 in {
def FMOVS_FCC
- : Pseudo<(outs FPRegs:$rd), (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cc),
- "fmovs$cc %fcc0, $rs2, $rd",
- [(set f32:$rd, (SPselectfcc f32:$rs2, f32:$f, imm:$cc))]>;
+ : F4_3<0b110101, 0b000001, (outs FPRegs:$rd),
+ (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cond),
+ "fmovs$cond %fcc0, $rs2, $rd",
+ [(set f32:$rd, (SPselectfcc f32:$rs2, f32:$f, imm:$cond))]>;
def FMOVD_FCC
- : Pseudo<(outs DFPRegs:$rd), (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cc),
- "fmovd$cc %fcc0, $rs2, $rd",
- [(set f64:$rd, (SPselectfcc f64:$rs2, f64:$f, imm:$cc))]>;
+ : F4_3<0b110101, 0b000010, (outs DFPRegs:$rd),
+ (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond),
+ "fmovd$cond %fcc0, $rs2, $rd",
+ [(set f64:$rd, (SPselectfcc f64:$rs2, f64:$f, imm:$cond))]>;
+ def FMOVQ_FCC
+ : F4_3<0b110101, 0b000011, (outs QFPRegs:$rd),
+ (ins QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond),
+ "fmovd$cond %fcc0, $rs2, $rd",
+ [(set f128:$rd, (SPselectfcc f128:$rs2, f128:$f, imm:$cond))]>;
}
}
// Floating-Point Move Instructions, p. 164 of the V9 manual.
let Predicates = [HasV9] in {
- def FMOVD : F3_3<2, 0b110100, 0b000000010,
+ def FMOVD : F3_3u<2, 0b110100, 0b000000010,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fmovd $src, $dst", []>;
- def FNEGD : F3_3<2, 0b110100, 0b000000110,
+ def FMOVQ : F3_3u<2, 0b110100, 0b000000011,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src),
+ "fmovq $src, $dst", []>,
+ Requires<[HasHardQuad]>;
+ def FNEGD : F3_3u<2, 0b110100, 0b000000110,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fnegd $src, $dst",
[(set f64:$dst, (fneg f64:$src))]>;
- def FABSD : F3_3<2, 0b110100, 0b000001010,
+ def FNEGQ : F3_3u<2, 0b110100, 0b000000111,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src),
+ "fnegq $src, $dst",
+ [(set f128:$dst, (fneg f128:$src))]>,
+ Requires<[HasHardQuad]>;
+ def FABSD : F3_3u<2, 0b110100, 0b000001010,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fabsd $src, $dst",
[(set f64:$dst, (fabs f64:$src))]>;
+ def FABSQ : F3_3u<2, 0b110100, 0b000001011,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src),
+ "fabsq $src, $dst",
+ [(set f128:$dst, (fabs f128:$src))]>,
+ Requires<[HasHardQuad]>;
}
// POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear
// the top 32-bits before using it. To do this clearing, we use a SLLri X,0.
-def POPCrr : F3_1<2, 0b101110,
- (outs IntRegs:$dst), (ins IntRegs:$src),
- "popc $src, $dst", []>, Requires<[HasV9]>;
+let rs1 = 0 in
+ def POPCrr : F3_1<2, 0b101110,
+ (outs IntRegs:$dst), (ins IntRegs:$src),
+ "popc $src, $dst", []>, Requires<[HasV9]>;
def : Pat<(ctpop i32:$src),
(POPCrr (SLLri $src, 0))>;
@@ -801,6 +992,14 @@ def : Pat<(SPlo tglobaladdr:$in), (ORri (i32 G0), tglobaladdr:$in)>;
def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>;
+// GlobalTLS addresses
+def : Pat<(SPhi tglobaltlsaddr:$in), (SETHIi tglobaltlsaddr:$in)>;
+def : Pat<(SPlo tglobaltlsaddr:$in), (ORri (i32 G0), tglobaltlsaddr:$in)>;
+def : Pat<(add (SPhi tglobaltlsaddr:$in1), (SPlo tglobaltlsaddr:$in2)),
+ (ADDri (SETHIi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>;
+def : Pat<(xor (SPhi tglobaltlsaddr:$in1), (SPlo tglobaltlsaddr:$in2)),
+ (XORri (SETHIi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>;
+
// Blockaddress
def : Pat<(SPhi tblockaddress:$in), (SETHIi tblockaddress:$in)>;
def : Pat<(SPlo tblockaddress:$in), (ORri (i32 G0), tblockaddress:$in)>;
diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp
new file mode 100644
index 0000000..6493c7d
--- /dev/null
+++ b/lib/Target/Sparc/SparcJITInfo.cpp
@@ -0,0 +1,165 @@
+//===-- SparcJITInfo.cpp - Implement the Sparc JIT Interface --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the Sparc target.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "jit"
+#include "SparcJITInfo.h"
+#include "SparcRelocations.h"
+
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Support/Memory.h"
+
+using namespace llvm;
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+extern "C" void SparcCompilationCallback();
+
+extern "C" {
+#if defined (__sparc__)
+ asm(
+ ".text\n"
+ "\t.align 4\n"
+ "\t.global SparcCompilationCallback\n"
+ "\t.type SparcCompilationCallback, #function\n"
+ "SparcCompilationCallback:\n"
+ // Save current register window.
+ "\tsave %sp, -192, %sp\n"
+ // stubaddr+4 is in %g1.
+ "\tcall SparcCompilationCallbackC\n"
+ "\t sub %g1, 4, %o0\n"
+ // restore original register window and
+ // copy %o0 to %g1
+ "\t restore %o0, 0, %g1\n"
+ // call the new stub
+ "\tjmp %g1\n"
+ "\t nop\n"
+ "\t.size SparcCompilationCallback, .-SparcCompilationCallback"
+ );
+
+#else
+ void SparcCompilationCallback() {
+ llvm_unreachable(
+ "Cannot call SparcCompilationCallback() on a non-sparc arch!");
+ }
+#endif
+}
+
+#define HI(Val) (((unsigned)(Val)) >> 10)
+#define LO(Val) (((unsigned)(Val)) & 0x3FF)
+
+#define SETHI_INST(imm, rd) (0x01000000 | ((rd) << 25) | ((imm) & 0x3FFFFF))
+#define JMP_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x38 << 19) \
+ | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF))
+#define NOP_INST SETHI_INST(0, 0)
+
+extern "C" void *SparcCompilationCallbackC(intptr_t StubAddr) {
+ // Get the address of the compiled code for this function.
+ intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr);
+
+ // Rewrite the function stub so that we don't end up here every time we
+ // execute the call. We're replacing the first three instructions of the
+ // stub with code that jumps to the compiled function:
+ // sethi %hi(NewVal), %g1
+ // jmp %g1+%lo(NewVal)
+ // nop
+
+ *(intptr_t *)(StubAddr) = SETHI_INST(HI(NewVal), 1);
+ *(intptr_t *)(StubAddr + 4) = JMP_INST(1, LO(NewVal), 0);
+ *(intptr_t *)(StubAddr + 8) = NOP_INST;
+
+ sys::Memory::InvalidateInstructionCache((void*) StubAddr, 12);
+ return (void*)StubAddr;
+}
+
+void SparcJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ assert(0 && "FIXME: Implement SparcJITInfo::replaceMachineCodeForFunction");
+}
+
+
+TargetJITInfo::StubLayout SparcJITInfo::getStubLayout() {
+ // The stub contains 3 4-byte instructions, aligned at 4 bytes. See
+ // emitFunctionStub for details.
+
+ StubLayout Result = { 3*4, 4 };
+ return Result;
+}
+
+void *SparcJITInfo::emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE)
+{
+ JCE.emitAlignment(4);
+ void *Addr = (void*) (JCE.getCurrentPCValue());
+ if (!sys::Memory::setRangeWritable(Addr, 12))
+ llvm_unreachable("ERROR: Unable to mark stub writable.");
+
+ intptr_t EmittedAddr;
+ if (Fn != (void*)(intptr_t)SparcCompilationCallback)
+ EmittedAddr = (intptr_t)Fn;
+ else
+ EmittedAddr = (intptr_t)SparcCompilationCallback;
+
+ // sethi %hi(EmittedAddr), %g1
+ // jmp %g1+%lo(EmittedAddr), %g1
+ // nop
+
+ JCE.emitWordBE(SETHI_INST(HI(EmittedAddr), 1));
+ JCE.emitWordBE(JMP_INST(1, LO(EmittedAddr), 1));
+ JCE.emitWordBE(NOP_INST);
+
+ sys::Memory::InvalidateInstructionCache(Addr, 12);
+ if (!sys::Memory::setRangeExecutable(Addr, 12))
+ llvm_unreachable("ERROR: Unable to mark stub executable.");
+
+ return Addr;
+}
+
+TargetJITInfo::LazyResolverFn
+SparcJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return SparcCompilationCallback;
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void SparcJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char *GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*) Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t) MR->getResultPointer();
+
+ switch ((SP::RelocationType) MR->getRelocationType()) {
+ case SP::reloc_sparc_hi:
+ ResultPtr = (ResultPtr >> 10) & 0x3fffff;
+ break;
+
+ case SP::reloc_sparc_lo:
+ ResultPtr = (ResultPtr & 0x3ff);
+ break;
+
+ case SP::reloc_sparc_pc30:
+ ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x3fffffff;
+ break;
+
+ case SP::reloc_sparc_pc22:
+ ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x3fffff;
+ break;
+
+ case SP::reloc_sparc_pc19:
+ ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x7ffff;
+ break;
+ }
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ }
+}
diff --git a/lib/Target/Sparc/SparcJITInfo.h b/lib/Target/Sparc/SparcJITInfo.h
new file mode 100644
index 0000000..9c6e488
--- /dev/null
+++ b/lib/Target/Sparc/SparcJITInfo.h
@@ -0,0 +1,67 @@
+//==- SparcJITInfo.h - Sparc Implementation of the JIT Interface -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SparcJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCJITINFO_H
+#define SPARCJITINFO_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+class SparcTargetMachine;
+
+class SparcJITInfo : public TargetJITInfo {
+
+ bool IsPIC;
+
+ public:
+ explicit SparcJITInfo()
+ : IsPIC(false) {}
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on Sparc.
+ virtual StubLayout getStubLayout();
+
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char *GOTBase);
+
+ /// Initialize - Initialize internal stage for the function being JITted.
+ void Initialize(const MachineFunction &MF, bool isPIC) {
+ IsPIC = isPIC;
+ }
+
+};
+}
+
+#endif
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index dc97f06..c98613a 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -40,8 +40,17 @@ SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st)
const uint16_t* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
const {
- static const uint16_t CalleeSavedRegs[] = { 0 };
- return CalleeSavedRegs;
+ return CSR_SaveList;
+}
+
+const uint32_t*
+SparcRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ return CSR_RegMask;
+}
+
+const uint32_t*
+SparcRegisterInfo::getRTCallPreservedMask(CallingConv::ID CC) const {
+ return RTCSR_RegMask;
}
BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -65,6 +74,15 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(SP::G0);
Reserved.set(SP::G6);
Reserved.set(SP::G7);
+
+ // Unaliased double registers are not available in non-V9 targets.
+ if (!Subtarget.isV9()) {
+ for (unsigned n = 0; n != 16; ++n) {
+ for (MCRegAliasIterator AI(SP::D16 + n, this, true); AI.isValid(); ++AI)
+ Reserved.set(*AI);
+ }
+ }
+
return Reserved;
}
@@ -74,6 +92,62 @@ SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF,
return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
}
+static void replaceFI(MachineFunction &MF,
+ MachineBasicBlock::iterator II,
+ MachineInstr &MI,
+ DebugLoc dl,
+ unsigned FIOperandNum, int Offset,
+ unsigned FramePtr)
+{
+ // Replace frame index with a frame pointer reference.
+ if (Offset >= -4096 && Offset <= 4095) {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it.
+ MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // FIXME: it would be better to scavenge a register here instead of
+ // reserving G1 all of the time.
+ if (Offset >= 0) {
+ // Emit nonnegaive immediates with sethi + or.
+ // sethi %hi(Offset), %g1
+ // add %g1, %fp, %g1
+ // Insert G1+%lo(offset) into the user.
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1)
+ .addImm(HI22(Offset));
+
+
+ // Emit G1 = G1 + I6
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+ .addReg(FramePtr);
+ // Insert: G1+%lo(offset) into the user.
+ MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(LO10(Offset));
+ return;
+ }
+
+ // Emit Negative numbers with sethi + xor
+ // sethi %hix(Offset), %g1
+ // xor %g1, %lox(offset), %g1
+ // add %g1, %fp, %g1
+ // Insert: G1 + 0 into the user.
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1)
+ .addImm(HIX22(Offset));
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::XORri), SP::G1)
+ .addReg(SP::G1).addImm(LOX10(Offset));
+
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+ .addReg(FramePtr);
+ // Insert: G1+%lo(offset) into the user.
+ MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
+}
+
+
void
SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
@@ -98,35 +172,40 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset += (stackSize) ? Subtarget.getAdjustedFrameSize(stackSize) : 0 ;
}
- // Replace frame index with a frame pointer reference.
- if (Offset >= -4096 && Offset <= 4095) {
- // If the offset is small enough to fit in the immediate field, directly
- // encode it.
- MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
- } else {
- // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to
- // scavenge a register here instead of reserving G1 all of the time.
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- unsigned OffHi = (unsigned)Offset >> 10U;
- BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
- // Emit G1 = G1 + I6
- BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
- .addReg(FramePtr);
- // Insert: G1+%lo(offset) into the user.
- MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1));
+ if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) {
+ if (MI.getOpcode() == SP::STQFri) {
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ unsigned SrcReg = MI.getOperand(2).getReg();
+ unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64);
+ unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64);
+ MachineInstr *StMI =
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri))
+ .addReg(FramePtr).addImm(0).addReg(SrcEvenReg);
+ replaceFI(MF, II, *StMI, dl, 0, Offset, FramePtr);
+ MI.setDesc(TII.get(SP::STDFri));
+ MI.getOperand(2).setReg(SrcOddReg);
+ Offset += 8;
+ } else if (MI.getOpcode() == SP::LDQFri) {
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64);
+ unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64);
+ MachineInstr *StMI =
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg)
+ .addReg(FramePtr).addImm(0);
+ replaceFI(MF, II, *StMI, dl, 1, Offset, FramePtr);
+
+ MI.setDesc(TII.get(SP::LDDFri));
+ MI.getOperand(0).setReg(DestOddReg);
+ Offset += 8;
+ }
}
+
+ replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FramePtr);
+
}
unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}
-unsigned SparcRegisterInfo::getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
-}
-
-unsigned SparcRegisterInfo::getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
-}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 6b77d4e..00b5a98 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -32,6 +32,9 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint32_t* getCallPreservedMask(CallingConv::ID CC) const;
+
+ const uint32_t* getRTCallPreservedMask(CallingConv::ID CC) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
@@ -47,10 +50,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
-
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
};
} // end namespace llvm
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index a59c442..2a575c0 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -11,8 +11,8 @@
// Declarations that describe the Sparc register file
//===----------------------------------------------------------------------===//
-class SparcReg<string n> : Register<n> {
- field bits<5> Num;
+class SparcReg<bits<16> Enc, string n> : Register<n> {
+ let HWEncoding = Enc;
let Namespace = "SP";
}
@@ -23,25 +23,31 @@ class SparcCtrlReg<string n>: Register<n> {
let Namespace = "SP" in {
def sub_even : SubRegIndex<32>;
def sub_odd : SubRegIndex<32, 32>;
+def sub_even64 : SubRegIndex<64>;
+def sub_odd64 : SubRegIndex<64, 64>;
}
// Registers are identified with 5-bit ID numbers.
// Ri - 32-bit integer registers
-class Ri<bits<5> num, string n> : SparcReg<n> {
- let Num = num;
-}
+class Ri<bits<16> Enc, string n> : SparcReg<Enc, n>;
+
// Rf - 32-bit floating-point registers
-class Rf<bits<5> num, string n> : SparcReg<n> {
- let Num = num;
-}
+class Rf<bits<16> Enc, string n> : SparcReg<Enc, n>;
+
// Rd - Slots in the FP register file for 64-bit floating-point values.
-class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
- let Num = num;
+class Rd<bits<16> Enc, string n, list<Register> subregs> : SparcReg<Enc, n> {
let SubRegs = subregs;
let SubRegIndices = [sub_even, sub_odd];
let CoveredBySubRegs = 1;
}
+// Rq - Slots in the FP register file for 128-bit floating-point values.
+class Rq<bits<16> Enc, string n, list<Register> subregs> : SparcReg<Enc, n> {
+ let SubRegs = subregs;
+ let SubRegIndices = [sub_even64, sub_odd64];
+ let CoveredBySubRegs = 1;
+}
+
// Control Registers
def ICC : SparcCtrlReg<"ICC">; // This represents icc and xcc in 64-bit code.
def FCC : SparcCtrlReg<"FCC">;
@@ -135,6 +141,43 @@ def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<[85]>;
def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<[86]>;
def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
+// Unaliased double precision floating point registers.
+// FIXME: Define DwarfRegNum for these registers.
+def D16 : SparcReg< 1, "F32">;
+def D17 : SparcReg< 3, "F34">;
+def D18 : SparcReg< 5, "F36">;
+def D19 : SparcReg< 7, "F38">;
+def D20 : SparcReg< 9, "F40">;
+def D21 : SparcReg<11, "F42">;
+def D22 : SparcReg<13, "F44">;
+def D23 : SparcReg<15, "F46">;
+def D24 : SparcReg<17, "F48">;
+def D25 : SparcReg<19, "F50">;
+def D26 : SparcReg<21, "F52">;
+def D27 : SparcReg<23, "F54">;
+def D28 : SparcReg<25, "F56">;
+def D29 : SparcReg<27, "F58">;
+def D30 : SparcReg<29, "F60">;
+def D31 : SparcReg<31, "F62">;
+
+// Aliases of the F* registers used to hold 128-bit for values (long doubles).
+def Q0 : Rq< 0, "F0", [D0, D1]>;
+def Q1 : Rq< 4, "F4", [D2, D3]>;
+def Q2 : Rq< 8, "F8", [D4, D5]>;
+def Q3 : Rq<12, "F12", [D6, D7]>;
+def Q4 : Rq<16, "F16", [D8, D9]>;
+def Q5 : Rq<20, "F20", [D10, D11]>;
+def Q6 : Rq<24, "F24", [D12, D13]>;
+def Q7 : Rq<28, "F28", [D14, D15]>;
+def Q8 : Rq< 1, "F32", [D16, D17]>;
+def Q9 : Rq< 5, "F36", [D18, D19]>;
+def Q10 : Rq< 9, "F40", [D20, D21]>;
+def Q11 : Rq<13, "F44", [D22, D23]>;
+def Q12 : Rq<17, "F48", [D24, D25]>;
+def Q13 : Rq<21, "F52", [D26, D27]>;
+def Q14 : Rq<25, "F56", [D28, D29]>;
+def Q15 : Rq<29, "F60", [D30, D31]>;
+
// Register classes.
//
// FIXME: the register order should be defined in terms of the preferred
@@ -158,4 +201,6 @@ def I64Regs : RegisterClass<"SP", [i64], 64, (add IntRegs)>;
// Floating point register classes.
def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
-def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>;
+def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 31)>;
+
+def QFPRegs : RegisterClass<"SP", [f128], 128, (sequence "Q%u", 0, 15)>;
diff --git a/lib/Target/Sparc/SparcRelocations.h b/lib/Target/Sparc/SparcRelocations.h
new file mode 100644
index 0000000..388cfe7
--- /dev/null
+++ b/lib/Target/Sparc/SparcRelocations.h
@@ -0,0 +1,41 @@
+//===-- SparcRelocations.h - Sparc Code Relocations -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sparc target-specific relocation types
+// (for relocation-model=static).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_RELOCATIONS_H
+#define SPARC_RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace SP {
+ enum RelocationType {
+ // reloc_sparc_hi - upper 22 bits
+ reloc_sparc_hi = 1,
+
+ // reloc_sparc_lo - lower 10 bits
+ reloc_sparc_lo = 2,
+
+ // reloc_sparc_pc30 - pc rel. 30 bits for call
+ reloc_sparc_pc30 = 3,
+
+ // reloc_sparc_pc22 - pc rel. 22 bits for branch
+ reloc_sparc_pc22 = 4,
+
+ // reloc_sparc_pc22 - pc rel. 19 bits for branch with icc/xcc
+ reloc_sparc_pc19 = 5
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index f9ce098..7d09d0e 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -30,7 +30,8 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
IsV9(false),
V8DeprecatedInsts(false),
IsVIS(false),
- Is64Bit(is64Bit) {
+ Is64Bit(is64Bit),
+ HasHardQuad(false) {
// Determine default and user specified characteristics
std::string CPUName = CPU;
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index 2bf599d..0f81cc9 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -29,6 +29,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
bool V8DeprecatedInsts;
bool IsVIS;
bool Is64Bit;
+ bool HasHardQuad;
public:
SparcSubtarget(const std::string &TT, const std::string &CPU,
@@ -37,6 +38,7 @@ public:
bool isV9() const { return IsV9; }
bool isVIS() const { return IsVIS; }
bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; }
+ bool hasHardQuad() const { return HasHardQuad; }
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index a7355f4..0f93674 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -65,6 +65,13 @@ bool SparcPassConfig::addInstSelector() {
return false;
}
+bool SparcTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ JITCodeEmitter &JCE) {
+ // Machine code emitter pass for Sparc.
+ PM.add(createSparcJITCodeEmitterPass(*this, JCE));
+ return false;
+}
+
/// addPreEmitPass - This pass may be implemented by targets that want to run
/// passes immediately before machine code is emitted. This should return
/// true if -print-machineinstrs should print out the code after the passes.
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 081075d..8c9bcd3 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -17,6 +17,7 @@
#include "SparcFrameLowering.h"
#include "SparcISelLowering.h"
#include "SparcInstrInfo.h"
+#include "SparcJITInfo.h"
#include "SparcSelectionDAGInfo.h"
#include "SparcSubtarget.h"
#include "llvm/IR/DataLayout.h"
@@ -32,6 +33,7 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcTargetLowering TLInfo;
SparcSelectionDAGInfo TSInfo;
SparcFrameLowering FrameLowering;
+ SparcJITInfo JITInfo;
public:
SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -52,10 +54,14 @@ public:
virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
+ virtual SparcJITInfo *getJITInfo() {
+ return &JITInfo;
+ }
virtual const DataLayout *getDataLayout() const { return &DL; }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
};
/// SparcV8TargetMachine - Sparc 32-bit target machine
diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index bb71463..4eea163 100644
--- a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -15,7 +15,9 @@ using namespace llvm;
Target llvm::TheSparcTarget;
Target llvm::TheSparcV9Target;
-extern "C" void LLVMInitializeSparcTargetInfo() {
- RegisterTarget<Triple::sparc> X(TheSparcTarget, "sparc", "Sparc");
- RegisterTarget<Triple::sparcv9> Y(TheSparcV9Target, "sparcv9", "Sparc V9");
+extern "C" void LLVMInitializeSparcTargetInfo() {
+ RegisterTarget<Triple::sparc, /*HasJIT=*/ true>
+ X(TheSparcTarget, "sparc", "Sparc");
+ RegisterTarget<Triple::sparcv9, /*HasJIT=*/ true>
+ Y(TheSparcV9Target, "sparcv9", "Sparc V9");
}
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 58af2c4..763f40c 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -32,6 +32,7 @@ static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) {
namespace {
enum RegisterKind {
GR32Reg,
+ GRH32Reg,
GR64Reg,
GR128Reg,
ADDR32Reg,
@@ -262,6 +263,8 @@ public:
// Used by the TableGen code to check for particular operand types.
bool isGR32() const { return isReg(GR32Reg); }
+ bool isGRH32() const { return isReg(GRH32Reg); }
+ bool isGRX32() const { return false; }
bool isGR64() const { return isReg(GR64Reg); }
bool isGR128() const { return isReg(GR128Reg); }
bool isADDR32() const { return isReg(ADDR32Reg); }
@@ -327,8 +330,9 @@ private:
StringRef Mnemonic);
public:
- SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
- : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ const MCInstrInfo &MII)
+ : MCTargetAsmParser(), STI(sti), Parser(parser) {
MCAsmParserExtension::Initialize(Parser);
// Initialize the set of available features.
@@ -355,6 +359,14 @@ public:
return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg);
}
OperandMatchResultTy
+ parseGRH32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg);
+ }
+ OperandMatchResultTy
+ parseGRX32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ llvm_unreachable("GRX32 should only be used for pseudo instructions");
+ }
+ OperandMatchResultTy
parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg);
}
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index ab657f6..d21c0a8 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -21,9 +21,11 @@ add_llvm_target(SystemZCodeGen
SystemZISelLowering.cpp
SystemZInstrInfo.cpp
SystemZLongBranch.cpp
+ SystemZMachineFunctionInfo.cpp
SystemZMCInstLower.cpp
SystemZRegisterInfo.cpp
SystemZSelectionDAGInfo.cpp
+ SystemZShortenInst.cpp
SystemZSubtarget.cpp
SystemZTargetMachine.cpp
)
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 79469b6..fc3c38d 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -48,14 +48,11 @@ extern "C" void LLVMInitializeSystemZDisassembler() {
}
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
- const unsigned *Regs,
- bool isAddress = false) {
+ const unsigned *Regs) {
assert(RegNo < 16 && "Invalid register");
- if (!isAddress || RegNo) {
- RegNo = Regs[RegNo];
- if (RegNo == 0)
- return MCDisassembler::Fail;
- }
+ RegNo = Regs[RegNo];
+ if (RegNo == 0)
+ return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateReg(RegNo));
return MCDisassembler::Success;
}
@@ -66,6 +63,12 @@ static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs);
}
+static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs);
+}
+
static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
@@ -81,7 +84,7 @@ static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, true);
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs);
}
static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 37ebff3..e1e64d3 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -124,18 +124,6 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
O << *MO.getExpr();
}
-void SystemZInstPrinter::printCallOperand(const MCInst *MI, int OpNum,
- raw_ostream &O) {
- const MCOperand &MO = MI->getOperand(OpNum);
- if (MO.isImm()) {
- O << "0x";
- O.write_hex(MO.getImm());
- } else {
- O << *MO.getExpr();
- O << "@PLT";
- }
-}
-
void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
printOperand(MI->getOperand(OpNum), O);
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index 30cdee5..734ecf0 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -58,7 +58,6 @@ private:
void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
- void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O);
// Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 027db44..26a8fae 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -35,15 +35,6 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
llvm_unreachable("Unknown fixup kind!");
}
-// If Opcode is a relaxable interprocedural reference, return the relaxed form,
-// otherwise return 0.
-static unsigned getRelaxedOpcode(unsigned Opcode) {
- switch (Opcode) {
- case SystemZ::BRAS: return SystemZ::BRASL;
- }
- return 0;
-}
-
namespace {
class SystemZMCAsmBackend : public MCAsmBackend {
uint8_t OSABI;
@@ -59,14 +50,20 @@ public:
LLVM_OVERRIDE;
virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const LLVM_OVERRIDE;
- virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE;
+ virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE {
+ return false;
+ }
virtual bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
const MCRelaxableFragment *Fragment,
const MCAsmLayout &Layout) const
- LLVM_OVERRIDE;
+ LLVM_OVERRIDE {
+ return false;
+ }
virtual void relaxInstruction(const MCInst &Inst,
- MCInst &Res) const LLVM_OVERRIDE;
+ MCInst &Res) const LLVM_OVERRIDE {
+ llvm_unreachable("SystemZ does do not have assembler relaxation");
+ }
virtual bool writeNopData(uint64_t Count,
MCObjectWriter *OW) const LLVM_OVERRIDE;
virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const
@@ -114,28 +111,6 @@ void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
}
-bool SystemZMCAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
- return getRelaxedOpcode(Inst.getOpcode()) != 0;
-}
-
-bool
-SystemZMCAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
- uint64_t Value,
- const MCRelaxableFragment *Fragment,
- const MCAsmLayout &Layout) const {
- // At the moment we just need to relax 16-bit fields to wider fields.
- Value = extractBitsForFixup(Fixup.getKind(), Value);
- return (int16_t)Value != (int64_t)Value;
-}
-
-void SystemZMCAsmBackend::relaxInstruction(const MCInst &Inst,
- MCInst &Res) const {
- unsigned Opcode = getRelaxedOpcode(Inst.getOpcode());
- assert(Opcode && "Unexpected insn to relax");
- Res = Inst;
- Res.setOpcode(Opcode);
-}
-
bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
MCObjectWriter *OW) const {
for (uint64_t I = 0; I != Count; ++I)
@@ -143,8 +118,9 @@ bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
return true;
}
-MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, StringRef TT,
- StringRef CPU) {
+MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
return new SystemZMCAsmBackend(OSABI);
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index 9e27aa0..965c41e 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -19,10 +19,8 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(StringRef TT) {
IsLittleEndian = false;
CommentString = "#";
- PCSymbol = ".";
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
- WeakRefDirective = "\t.weak\t";
ZeroDirective = "\t.space\t";
Data64bitsDirective = "\t.quad\t";
UsesELFSectionDirectiveForBSS = true;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
index d440787..b9ac92a 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -10,13 +10,13 @@
#ifndef SystemZTARGETASMINFO_H
#define SystemZTARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
class StringRef;
-class SystemZMCAsmInfo : public MCAsmInfo {
+class SystemZMCAsmInfo : public MCAsmInfoELF {
public:
explicit SystemZMCAsmInfo(StringRef TT);
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index bda7714..f07ea7b 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -79,14 +79,6 @@ private:
SmallVectorImpl<MCFixup> &Fixups) const {
return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2);
}
- uint64_t getPLT16DBLEncoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT16DBL, 2);
- }
- uint64_t getPLT32DBLEncoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT32DBL, 2);
- }
};
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 3653192..9e1296b 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -28,10 +28,17 @@
using namespace llvm;
const unsigned SystemZMC::GR32Regs[16] = {
- SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W,
- SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W,
- SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W,
- SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W
+ SystemZ::R0L, SystemZ::R1L, SystemZ::R2L, SystemZ::R3L,
+ SystemZ::R4L, SystemZ::R5L, SystemZ::R6L, SystemZ::R7L,
+ SystemZ::R8L, SystemZ::R9L, SystemZ::R10L, SystemZ::R11L,
+ SystemZ::R12L, SystemZ::R13L, SystemZ::R14L, SystemZ::R15L
+};
+
+const unsigned SystemZMC::GRH32Regs[16] = {
+ SystemZ::R0H, SystemZ::R1H, SystemZ::R2H, SystemZ::R3H,
+ SystemZ::R4H, SystemZ::R5H, SystemZ::R6H, SystemZ::R7H,
+ SystemZ::R8H, SystemZ::R9H, SystemZ::R10H, SystemZ::R11H,
+ SystemZ::R12H, SystemZ::R13H, SystemZ::R14H, SystemZ::R15H
};
const unsigned SystemZMC::GR64Regs[16] = {
@@ -69,6 +76,24 @@ const unsigned SystemZMC::FP128Regs[16] = {
SystemZ::F12Q, SystemZ::F13Q, 0, 0
};
+unsigned SystemZMC::getFirstReg(unsigned Reg) {
+ static unsigned Map[SystemZ::NUM_TARGET_REGS];
+ static bool Initialized = false;
+ if (!Initialized) {
+ for (unsigned I = 0; I < 16; ++I) {
+ Map[GR32Regs[I]] = I;
+ Map[GRH32Regs[I]] = I;
+ Map[GR64Regs[I]] = I;
+ Map[GR128Regs[I]] = I;
+ Map[FP32Regs[I]] = I;
+ Map[FP64Regs[I]] = I;
+ Map[FP128Regs[I]] = I;
+ }
+ }
+ assert(Reg < SystemZ::NUM_TARGET_REGS);
+ return Map[Reg];
+}
+
static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
StringRef TT) {
MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
@@ -162,7 +187,7 @@ static MCStreamer *createSystemZMCObjectStreamer(const Target &T, StringRef TT,
MCCodeEmitter *Emitter,
bool RelaxAll,
bool NoExecStack) {
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ return createELFStreamer(Ctx, 0, MAB, OS, Emitter, RelaxAll, NoExecStack);
}
extern "C" void LLVMInitializeSystemZTargetMC() {
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 3c9f0cb..97e325b 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -42,11 +42,31 @@ namespace SystemZMC {
// as %r0-%r15. It seems better to provide the same interface for
// all classes though.
extern const unsigned GR32Regs[16];
+ extern const unsigned GRH32Regs[16];
extern const unsigned GR64Regs[16];
extern const unsigned GR128Regs[16];
extern const unsigned FP32Regs[16];
extern const unsigned FP64Regs[16];
extern const unsigned FP128Regs[16];
+
+ // Return the 0-based number of the first architectural register that
+ // contains the given LLVM register. E.g. R1D -> 1.
+ unsigned getFirstReg(unsigned Reg);
+
+ // Return the given register as a GR64.
+ inline unsigned getRegAsGR64(unsigned Reg) {
+ return GR64Regs[getFirstReg(Reg)];
+ }
+
+ // Return the given register as a low GR32.
+ inline unsigned getRegAsGR32(unsigned Reg) {
+ return GR32Regs[getFirstReg(Reg)];
+ }
+
+ // Return the given register as a high GR32.
+ inline unsigned getRegAsGRH32(unsigned Reg) {
+ return GRH32Regs[getFirstReg(Reg)];
+ }
}
MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
@@ -54,8 +74,9 @@ MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createSystemZMCAsmBackend(const Target &T, StringRef TT,
- StringRef CPU);
+MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI);
} // end namespace llvm
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
index 2782b63..afa6cf0 100644
--- a/lib/Target/SystemZ/README.txt
+++ b/lib/Target/SystemZ/README.txt
@@ -35,19 +35,11 @@ performance measurements.
--
-We don't support tail calls at present.
-
---
-
-We don't support prefetching yet.
-
---
-
There is no scheduling support.
--
-We don't use the BRANCH ON COUNT or BRANCH ON INDEX families of instruction.
+We don't use the BRANCH ON INDEX instructions.
--
@@ -56,18 +48,7 @@ and conditional returns.
--
-We don't use the condition code results of anything except comparisons.
-
-Implementing this may need something more finely grained than the z_cmp
-and z_ucmp that we have now. It might (or might not) also be useful to
-have a mask of "don't care" values in conditional branches. For example,
-integer comparisons never set CC to 3, so the bottom bit of the CC mask
-isn't particularly relevant. JNLH and JE are equally good for testing
-equality after an integer comparison, etc.
-
---
-
-We don't use the LOAD AND TEST or TEST DATA CLASS instructions.
+We don't use the TEST DATA CLASS instructions.
--
@@ -77,20 +58,16 @@ condition codes. For example, we could use LCDFR instead of LCDBR.
--
-We don't optimize block memory operations.
+We only use MVC, XC and CLC for constant-length block operations.
+We could extend them to variable-length operations too,
+using EXECUTE RELATIVE LONG.
-It's definitely worth using things like MVC, CLC, NC, XC and OC with
-constant lengths. MVCIN may be worthwhile too.
-
-We should probably implement things like memcpy using MVC with EXECUTE.
-Likewise memcmp and CLC. MVCLE and CLCLE could be useful too.
+MVCIN, MVCLE and CLCLE may be worthwhile too.
--
-We don't optimize string operations.
-
-MVST, CLST, SRST and CUSE could be useful here. Some of the TRANSLATE
-family might be too, although they are probably more difficult to exploit.
+We don't use CUSE or the TRANSLATE family of instructions for string
+operations. The TRANSLATE ones are probably more difficult to exploit.
--
@@ -113,14 +90,7 @@ We don't use the halfword forms of LOAD REVERSED and STORE REVERSED
--
-We could take advantage of the various ... UNDER MASK instructions,
-such as ICM and STCM.
-
---
-
-DAGCombiner can detect integer absolute, but there's not yet an associated
-ISD opcode. We could add one and implement it using LOAD POSITIVE.
-Negated absolutes could use LOAD NEGATIVE.
+We don't use ICM or STCM.
--
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index eccc2aa..dcebbad 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -52,6 +52,29 @@ namespace llvm {
const unsigned CCMASK_CS_NE = CCMASK_1;
const unsigned CCMASK_CS = CCMASK_0 | CCMASK_1;
+ // Condition-code mask assignments for a completed SRST loop.
+ const unsigned CCMASK_SRST_FOUND = CCMASK_1;
+ const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2;
+ const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2;
+
+ // Condition-code mask assignments for TEST UNDER MASK.
+ const unsigned CCMASK_TM_ALL_0 = CCMASK_0;
+ const unsigned CCMASK_TM_MIXED_MSB_0 = CCMASK_1;
+ const unsigned CCMASK_TM_MIXED_MSB_1 = CCMASK_2;
+ const unsigned CCMASK_TM_ALL_1 = CCMASK_3;
+ const unsigned CCMASK_TM_SOME_0 = CCMASK_TM_ALL_1 ^ CCMASK_ANY;
+ const unsigned CCMASK_TM_SOME_1 = CCMASK_TM_ALL_0 ^ CCMASK_ANY;
+ const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1;
+ const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3;
+ const unsigned CCMASK_TM = CCMASK_ANY;
+
+ // The position of the low CC bit in an IPM result.
+ const unsigned IPM_CC = 28;
+
+ // Mask assignments for PFD.
+ const unsigned PFD_READ = 1;
+ const unsigned PFD_WRITE = 2;
+
// Return true if Val fits an LLILL operand.
static inline bool isImmLL(uint64_t Val) {
return (Val & ~0x000000000000ffffULL) == 0;
@@ -86,6 +109,7 @@ namespace llvm {
FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
+ FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
} // end namespace llvm;
#endif
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 3a57ea0..75cbda4 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -19,16 +19,142 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/Mangler.h"
using namespace llvm;
+// Return an RI instruction like MI with opcode Opcode, but with the
+// GR64 register operands turned into GR32s.
+static MCInst lowerRILow(const MachineInstr *MI, unsigned Opcode) {
+ if (MI->isCompare())
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(1).getImm());
+ else
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(1).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+}
+
+// Return an RI instruction like MI with opcode Opcode, but with the
+// GR64 register operands turned into GRH32s.
+static MCInst lowerRIHigh(const MachineInstr *MI, unsigned Opcode) {
+ if (MI->isCompare())
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(1).getImm());
+ else
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(1).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+}
+
+// Return an RI instruction like MI with opcode Opcode, but with the
+// R2 register turned into a GR64.
+static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) {
+ return MCInstBuilder(Opcode)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()))
+ .addImm(MI->getOperand(3).getImm())
+ .addImm(MI->getOperand(4).getImm())
+ .addImm(MI->getOperand(5).getImm());
+}
+
void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
+ SystemZMCInstLower Lower(MF->getContext(), *this);
MCInst LoweredMI;
- Lower.lower(MI, LoweredMI);
+ switch (MI->getOpcode()) {
+ case SystemZ::Return:
+ LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D);
+ break;
+
+ case SystemZ::CallBRASL:
+ LoweredMI = MCInstBuilder(SystemZ::BRASL)
+ .addReg(SystemZ::R14D)
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT));
+ break;
+
+ case SystemZ::CallBASR:
+ LoweredMI = MCInstBuilder(SystemZ::BASR)
+ .addReg(SystemZ::R14D)
+ .addReg(MI->getOperand(0).getReg());
+ break;
+
+ case SystemZ::CallJG:
+ LoweredMI = MCInstBuilder(SystemZ::JG)
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT));
+ break;
+
+ case SystemZ::CallBR:
+ LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D);
+ break;
+
+ case SystemZ::IILF64:
+ LoweredMI = MCInstBuilder(SystemZ::IILF)
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+ break;
+
+ case SystemZ::IIHF64:
+ LoweredMI = MCInstBuilder(SystemZ::IIHF)
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+ break;
+
+ case SystemZ::RISBHH:
+ case SystemZ::RISBHL:
+ LoweredMI = lowerRIEfLow(MI, SystemZ::RISBHG);
+ break;
+
+ case SystemZ::RISBLH:
+ case SystemZ::RISBLL:
+ LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG);
+ break;
+
+#define LOWER_LOW(NAME) \
+ case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break
+
+ LOWER_LOW(IILL);
+ LOWER_LOW(IILH);
+ LOWER_LOW(TMLL);
+ LOWER_LOW(TMLH);
+ LOWER_LOW(NILL);
+ LOWER_LOW(NILH);
+ LOWER_LOW(NILF);
+ LOWER_LOW(OILL);
+ LOWER_LOW(OILH);
+ LOWER_LOW(OILF);
+ LOWER_LOW(XILF);
+
+#undef LOWER_LOW
+
+#define LOWER_HIGH(NAME) \
+ case SystemZ::NAME##64: LoweredMI = lowerRIHigh(MI, SystemZ::NAME); break
+
+ LOWER_HIGH(IIHL);
+ LOWER_HIGH(IIHH);
+ LOWER_HIGH(TMHL);
+ LOWER_HIGH(TMHH);
+ LOWER_HIGH(NIHL);
+ LOWER_HIGH(NIHH);
+ LOWER_HIGH(NIHF);
+ LOWER_HIGH(OIHL);
+ LOWER_HIGH(OIHH);
+ LOWER_HIGH(OIHF);
+ LOWER_HIGH(XIHF);
+
+#undef LOWER_HIGH
+
+ default:
+ Lower.lower(MI, LoweredMI);
+ break;
+ }
OutStreamer.EmitInstruction(LoweredMI);
}
@@ -48,7 +174,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
static_cast<SystemZConstantPoolValue*>(MCPV);
const MCExpr *Expr =
- MCSymbolRefExpr::Create(Mang->getSymbol(ZCPV->getGlobalValue()),
+ MCSymbolRefExpr::Create(getSymbol(ZCPV->getGlobalValue()),
getModifierVariantKind(ZCPV->getModifier()),
OutContext);
uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
@@ -66,7 +192,7 @@ bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
return true;
OS << -int64_t(MI->getOperand(OpNo).getImm());
} else {
- SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
+ SystemZMCInstLower Lower(MF->getContext(), *this);
MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
SystemZInstPrinter::printOperand(MO, OS);
}
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
index c2d727f..c4f641e 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@@ -23,7 +23,7 @@ def RetCC_SystemZ : CallingConv<[
// call-clobbered argument registers available for code that doesn't
// care about the ABI. (R6 is an argument register too, but is
// call-saved and therefore not suitable for return values.)
- CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W]>>,
+ CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L]>>,
CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
// ABI-complaint code returns float and double in F0. Make the
@@ -53,7 +53,7 @@ def CC_SystemZ : CallingConv<[
// The first 5 integer arguments are passed in R2-R6. Note that R6
// is call-saved.
- CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W, R6W]>>,
+ CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L, R6L]>>,
CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
// The first 4 float and double arguments are passed in even registers F0-F6.
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index e9c4f6d..6c70811 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -39,7 +39,7 @@ unsigned SystemZConstantPoolValue::getRelocationInfo() const {
int SystemZConstantPoolValue::
getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) {
unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();
for (unsigned I = 0, E = Constants.size(); I != E; ++I) {
if (Constants[I].isMachineConstantPoolEntry() &&
(Constants[I].getAlignment() & AlignMask) == 0) {
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index a58da90..acfb491 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -111,7 +111,7 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
const SystemZTargetMachine &TM,
unsigned GPR64, bool IsImplicit) {
const SystemZRegisterInfo *RI = TM.getRegisterInfo();
- unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_32bit);
+ unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32);
bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
if (!IsLive || !IsImplicit) {
MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive));
@@ -420,8 +420,7 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
// Skip the return instruction.
- assert(MBBI->getOpcode() == SystemZ::RET &&
- "Can only insert epilogue into returning blocks");
+ assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");
uint64_t StackSize = getAllocatedStackSize(MF);
if (ZFI->getLowSavedGPR()) {
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index d9794b1..f4a2773 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -107,7 +107,8 @@ static uint64_t allOnes(unsigned int Count) {
//
// (and (rotl Input, Rotate), Mask)
//
-// otherwise. The value has BitSize bits.
+// otherwise. The output value has BitSize bits, although Input may be
+// narrower (in which case the upper bits are don't care).
struct RxSBGOperands {
RxSBGOperands(unsigned Op, SDValue N)
: Opcode(Op), BitSize(N.getValueType().getSizeInBits()),
@@ -128,7 +129,7 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
const SystemZSubtarget &Subtarget;
// Used by SystemZOperands.td to create integer constants.
- inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+ inline SDValue getImm(const SDNode *Node, uint64_t Imm) const {
return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
}
@@ -142,33 +143,39 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
// Try to fold more of the base or index of AM into AM, where IsBase
// selects between the base and index.
- bool expandAddress(SystemZAddressingMode &AM, bool IsBase);
+ bool expandAddress(SystemZAddressingMode &AM, bool IsBase) const;
// Try to describe N in AM, returning true on success.
- bool selectAddress(SDValue N, SystemZAddressingMode &AM);
+ bool selectAddress(SDValue N, SystemZAddressingMode &AM) const;
// Extract individual target operands from matched address AM.
void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
- SDValue &Base, SDValue &Disp);
+ SDValue &Base, SDValue &Disp) const;
void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
- SDValue &Base, SDValue &Disp, SDValue &Index);
+ SDValue &Base, SDValue &Disp, SDValue &Index) const;
// Try to match Addr as a FormBD address with displacement type DR.
// Return true on success, storing the base and displacement in
// Base and Disp respectively.
bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
- SDValue &Base, SDValue &Disp);
+ SDValue &Base, SDValue &Disp) const;
+
+ // Try to match Addr as a FormBDX address with displacement type DR.
+ // Return true on success and if the result had no index. Store the
+ // base and displacement in Base and Disp respectively.
+ bool selectMVIAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
+ SDValue &Base, SDValue &Disp) const;
// Try to match Addr as a FormBDX* address of form Form with
// displacement type DR. Return true on success, storing the base,
// displacement and index in Base, Disp and Index respectively.
bool selectBDXAddr(SystemZAddressingMode::AddrForm Form,
SystemZAddressingMode::DispRange DR, SDValue Addr,
- SDValue &Base, SDValue &Disp, SDValue &Index);
+ SDValue &Base, SDValue &Disp, SDValue &Index) const;
// PC-relative address matching routines used by SystemZOperands.td.
- bool selectPCRelAddress(SDValue Addr, SDValue &Target) {
- if (Addr.getOpcode() == SystemZISD::PCREL_WRAPPER) {
+ bool selectPCRelAddress(SDValue Addr, SDValue &Target) const {
+ if (SystemZISD::isPCREL(Addr.getOpcode())) {
Target = Addr.getOperand(0);
return true;
}
@@ -176,64 +183,72 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
}
// BD matching routines used by SystemZOperands.td.
- bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) const {
return selectBDAddr(SystemZAddressingMode::Disp12Only, Addr, Base, Disp);
}
- bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
return selectBDAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);
}
- bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) const {
return selectBDAddr(SystemZAddressingMode::Disp20Only, Addr, Base, Disp);
}
- bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
}
+ // MVI matching routines used by SystemZOperands.td.
+ bool selectMVIAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
+ return selectMVIAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);
+ }
+ bool selectMVIAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
+ return selectMVIAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
+ }
+
// BDX matching routines used by SystemZOperands.td.
bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp12Only,
Addr, Base, Disp, Index);
}
bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp12Pair,
Addr, Base, Disp, Index);
}
bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc,
SystemZAddressingMode::Disp12Only,
Addr, Base, Disp, Index);
}
bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp20Only,
Addr, Base, Disp, Index);
}
bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp20Only128,
Addr, Base, Disp, Index);
}
bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp20Pair,
Addr, Base, Disp, Index);
}
bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
SystemZAddressingMode::Disp12Pair,
Addr, Base, Disp, Index);
}
bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
SystemZAddressingMode::Disp20Pair,
Addr, Base, Disp, Index);
@@ -242,21 +257,21 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
// Check whether (or Op (and X InsertMask)) is effectively an insertion
// of X into bits InsertMask of some Y != Op. Return true if so and
// set Op to that Y.
- bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask);
+ bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask) const;
// Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used.
// Return true on success.
- bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask);
+ bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) const;
// Try to fold some of RxSBG.Input into other fields of RxSBG.
// Return true on success.
- bool expandRxSBG(RxSBGOperands &RxSBG);
+ bool expandRxSBG(RxSBGOperands &RxSBG) const;
- // Return an undefined i64 value.
- SDValue getUNDEF64(SDLoc DL);
+ // Return an undefined value of type VT.
+ SDValue getUNDEF(SDLoc DL, EVT VT) const;
// Convert N to VT, if it isn't already.
- SDValue convertTo(SDLoc DL, EVT VT, SDValue N);
+ SDValue convertTo(SDLoc DL, EVT VT, SDValue N) const;
// Try to implement AND or shift node N using RISBG with the zero flag set.
// Return the selected node on success, otherwise return null.
@@ -276,8 +291,26 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
uint64_t UpperVal, uint64_t LowerVal);
+ // Return true if Load and Store are loads and stores of the same size
+ // and are guaranteed not to overlap. Such operations can be implemented
+ // using block (SS-format) instructions.
+ //
+ // Partial overlap would lead to incorrect code, since the block operations
+ // are logically bytewise, even though they have a fast path for the
+ // non-overlapping case. We also need to avoid full overlap (i.e. two
+ // addresses that might be equal at run time) because although that case
+ // would be handled correctly, it might be implemented by millicode.
+ bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const;
+
+ // N is a (store (load Y), X) pattern. Return true if it can use an MVC
+ // from Y to X.
bool storeLoadCanUseMVC(SDNode *N) const;
+ // N is a (store (op (load A[0]), (load A[1])), X) pattern. Return true
+ // if A[1 - I] == X and if N can use a block operation like NC from A[I]
+ // to X.
+ bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const;
+
public:
SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(TM, OptLevel),
@@ -363,9 +396,9 @@ static bool expandIndex(SystemZAddressingMode &AM, SDValue Base,
// The base or index of AM is equivalent to Op0 + Op1, where IsBase selects
// between the base and index. Try to fold Op1 into AM's displacement.
static bool expandDisp(SystemZAddressingMode &AM, bool IsBase,
- SDValue Op0, ConstantSDNode *Op1) {
+ SDValue Op0, uint64_t Op1) {
// First try adjusting the displacement.
- int64_t TestDisp = AM.Disp + Op1->getSExtValue();
+ int64_t TestDisp = AM.Disp + Op1;
if (selectDisp(AM.DR, TestDisp)) {
changeComponent(AM, IsBase, Op0);
AM.Disp = TestDisp;
@@ -378,7 +411,7 @@ static bool expandDisp(SystemZAddressingMode &AM, bool IsBase,
}
bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,
- bool IsBase) {
+ bool IsBase) const {
SDValue N = IsBase ? AM.Base : AM.Index;
unsigned Opcode = N.getOpcode();
if (Opcode == ISD::TRUNCATE) {
@@ -398,13 +431,23 @@ bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,
return expandAdjDynAlloc(AM, IsBase, Op0);
if (Op0Code == ISD::Constant)
- return expandDisp(AM, IsBase, Op1, cast<ConstantSDNode>(Op0));
+ return expandDisp(AM, IsBase, Op1,
+ cast<ConstantSDNode>(Op0)->getSExtValue());
if (Op1Code == ISD::Constant)
- return expandDisp(AM, IsBase, Op0, cast<ConstantSDNode>(Op1));
+ return expandDisp(AM, IsBase, Op0,
+ cast<ConstantSDNode>(Op1)->getSExtValue());
if (IsBase && expandIndex(AM, Op0, Op1))
return true;
}
+ if (Opcode == SystemZISD::PCREL_OFFSET) {
+ SDValue Full = N.getOperand(0);
+ SDValue Base = N.getOperand(1);
+ SDValue Anchor = Base.getOperand(0);
+ uint64_t Offset = (cast<GlobalAddressSDNode>(Full)->getOffset() -
+ cast<GlobalAddressSDNode>(Anchor)->getOffset());
+ return expandDisp(AM, IsBase, Base, Offset);
+ }
return false;
}
@@ -483,14 +526,15 @@ static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) {
// Return true if Addr is suitable for AM, updating AM if so.
bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
- SystemZAddressingMode &AM) {
+ SystemZAddressingMode &AM) const {
// Start out assuming that the address will need to be loaded separately,
// then try to extend it as much as we can.
AM.Base = Addr;
// First try treating the address as a constant.
if (Addr.getOpcode() == ISD::Constant &&
- expandDisp(AM, true, SDValue(), cast<ConstantSDNode>(Addr)))
+ expandDisp(AM, true, SDValue(),
+ cast<ConstantSDNode>(Addr)->getSExtValue()))
;
else
// Otherwise try expanding each component.
@@ -530,7 +574,7 @@ static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) {
void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
EVT VT, SDValue &Base,
- SDValue &Disp) {
+ SDValue &Disp) const {
Base = AM.Base;
if (!Base.getNode())
// Register 0 means "no base". This is mostly useful for shifts.
@@ -555,7 +599,8 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
EVT VT, SDValue &Base,
- SDValue &Disp, SDValue &Index) {
+ SDValue &Disp,
+ SDValue &Index) const {
getAddressOperands(AM, VT, Base, Disp);
Index = AM.Index;
@@ -566,7 +611,7 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR,
SDValue Addr, SDValue &Base,
- SDValue &Disp) {
+ SDValue &Disp) const {
SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR);
if (!selectAddress(Addr, AM))
return false;
@@ -575,10 +620,21 @@ bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR,
return true;
}
+bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR,
+ SDValue Addr, SDValue &Base,
+ SDValue &Disp) const {
+ SystemZAddressingMode AM(SystemZAddressingMode::FormBDXNormal, DR);
+ if (!selectAddress(Addr, AM) || AM.Index.getNode())
+ return false;
+
+ getAddressOperands(AM, Addr.getValueType(), Base, Disp);
+ return true;
+}
+
bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
SystemZAddressingMode::DispRange DR,
SDValue Addr, SDValue &Base,
- SDValue &Disp, SDValue &Index) {
+ SDValue &Disp, SDValue &Index) const {
SystemZAddressingMode AM(Form, DR);
if (!selectAddress(Addr, AM))
return false;
@@ -588,7 +644,7 @@ bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
}
bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
- uint64_t InsertMask) {
+ uint64_t InsertMask) const {
// We're only interested in cases where the insertion is into some operand
// of Op, rather than into Op itself. The only useful case is an AND.
if (Op.getOpcode() != ISD::AND)
@@ -619,7 +675,8 @@ bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
return true;
}
-bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) {
+bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG,
+ uint64_t Mask) const {
const SystemZInstrInfo *TII = getInstrInfo();
if (RxSBG.Rotate != 0)
Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate));
@@ -631,26 +688,15 @@ bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) {
return false;
}
-// RxSBG.Input is a shift of Count bits in the direction given by IsLeft.
-// Return true if the result depends on the signs or zeros that are
-// shifted in.
-static bool shiftedInBitsMatter(RxSBGOperands &RxSBG, uint64_t Count,
- bool IsLeft) {
- // Work out which bits of the shift result are zeros or sign copies.
- uint64_t ShiftedIn = allOnes(Count);
- if (!IsLeft)
- ShiftedIn <<= RxSBG.BitSize - Count;
-
- // Rotate that mask in the same way as RxSBG.Input is rotated.
+// Return true if any bits of (RxSBG.Input & Mask) are significant.
+static bool maskMatters(RxSBGOperands &RxSBG, uint64_t Mask) {
+ // Rotate the mask in the same way as RxSBG.Input is rotated.
if (RxSBG.Rotate != 0)
- ShiftedIn = ((ShiftedIn << RxSBG.Rotate) |
- (ShiftedIn >> (64 - RxSBG.Rotate)));
-
- // Fail if any of the zero or sign bits are used.
- return (ShiftedIn & RxSBG.Mask) != 0;
+ Mask = ((Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate)));
+ return (Mask & RxSBG.Mask) != 0;
}
-bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) {
+bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
SDValue N = RxSBG.Input;
unsigned Opcode = N.getOpcode();
switch (Opcode) {
@@ -706,7 +752,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) {
case ISD::ROTL: {
// Any 64-bit rotate left can be merged into the RxSBG.
- if (RxSBG.BitSize != 64)
+ if (RxSBG.BitSize != 64 || N.getValueType() != MVT::i64)
return false;
ConstantSDNode *CountNode
= dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
@@ -718,6 +764,19 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) {
return true;
}
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: {
+ // Check that the extension bits are don't-care (i.e. are masked out
+ // by the final mask).
+ unsigned InnerBitSize = N.getOperand(0).getValueType().getSizeInBits();
+ if (maskMatters(RxSBG, allOnes(RxSBG.BitSize) - allOnes(InnerBitSize)))
+ return false;
+
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
+
case ISD::SHL: {
ConstantSDNode *CountNode =
dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
@@ -725,17 +784,18 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) {
return false;
uint64_t Count = CountNode->getZExtValue();
- if (Count < 1 || Count >= RxSBG.BitSize)
+ unsigned BitSize = N.getValueType().getSizeInBits();
+ if (Count < 1 || Count >= BitSize)
return false;
if (RxSBG.Opcode == SystemZ::RNSBG) {
// Treat (shl X, count) as (rotl X, size-count) as long as the bottom
// count bits from RxSBG.Input are ignored.
- if (shiftedInBitsMatter(RxSBG, Count, true))
+ if (maskMatters(RxSBG, allOnes(Count)))
return false;
} else {
// Treat (shl X, count) as (and (rotl X, count), ~0<<count).
- if (!refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count) << Count))
+ if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count) << Count))
return false;
}
@@ -752,18 +812,19 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) {
return false;
uint64_t Count = CountNode->getZExtValue();
- if (Count < 1 || Count >= RxSBG.BitSize)
+ unsigned BitSize = N.getValueType().getSizeInBits();
+ if (Count < 1 || Count >= BitSize)
return false;
if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) {
// Treat (srl|sra X, count) as (rotl X, size-count) as long as the top
// count bits from RxSBG.Input are ignored.
- if (shiftedInBitsMatter(RxSBG, Count, false))
+ if (maskMatters(RxSBG, allOnes(Count) << (BitSize - Count)))
return false;
} else {
// Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count),
// which is similar to SLL above.
- if (!refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count)))
+ if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count)))
return false;
}
@@ -776,24 +837,17 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) {
}
}
-SDValue SystemZDAGToDAGISel::getUNDEF64(SDLoc DL) {
- SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64);
+SDValue SystemZDAGToDAGISel::getUNDEF(SDLoc DL, EVT VT) const {
+ SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
return SDValue(N, 0);
}
-SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) {
- if (N.getValueType() == MVT::i32 && VT == MVT::i64) {
- SDValue Index = CurDAG->getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
- SDNode *Insert = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG,
- DL, VT, getUNDEF64(DL), N, Index);
- return SDValue(Insert, 0);
- }
- if (N.getValueType() == MVT::i64 && VT == MVT::i32) {
- SDValue Index = CurDAG->getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
- SDNode *Extract = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, VT, N, Index);
- return SDValue(Extract, 0);
- }
+SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const {
+ if (N.getValueType() == MVT::i32 && VT == MVT::i64)
+ return CurDAG->getTargetInsertSubreg(SystemZ::subreg_l32,
+ DL, VT, getUNDEF(DL, MVT::i64), N);
+ if (N.getValueType() == MVT::i64 && VT == MVT::i32)
+ return CurDAG->getTargetExtractSubreg(SystemZ::subreg_l32, DL, VT, N);
assert(N.getValueType() == VT && "Unexpected value types");
return N;
}
@@ -803,7 +857,8 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));
unsigned Count = 0;
while (expandRxSBG(RISBG))
- Count += 1;
+ if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND)
+ Count += 1;
if (Count == 0)
return 0;
if (Count == 1) {
@@ -831,14 +886,22 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
}
}
+ unsigned Opcode = SystemZ::RISBG;
+ EVT OpcodeVT = MVT::i64;
+ if (VT == MVT::i32 && Subtarget.hasHighWord()) {
+ Opcode = SystemZ::RISBMux;
+ OpcodeVT = MVT::i32;
+ RISBG.Start &= 31;
+ RISBG.End &= 31;
+ }
SDValue Ops[5] = {
- getUNDEF64(SDLoc(N)),
- convertTo(SDLoc(N), MVT::i64, RISBG.Input),
+ getUNDEF(SDLoc(N), OpcodeVT),
+ convertTo(SDLoc(N), OpcodeVT, RISBG.Input),
CurDAG->getTargetConstant(RISBG.Start, MVT::i32),
CurDAG->getTargetConstant(RISBG.End | 128, MVT::i32),
CurDAG->getTargetConstant(RISBG.Rotate, MVT::i32)
};
- N = CurDAG->getMachineNode(SystemZ::RISBG, SDLoc(N), MVT::i64, Ops);
+ N = CurDAG->getMachineNode(Opcode, SDLoc(N), OpcodeVT, Ops);
return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode();
}
@@ -852,7 +915,8 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
unsigned Count[] = { 0, 0 };
for (unsigned I = 0; I < 2; ++I)
while (expandRxSBG(RxSBG[I]))
- Count[I] += 1;
+ if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND)
+ Count[I] += 1;
// Do nothing if neither operand is suitable.
if (Count[0] == 0 && Count[1] == 0)
@@ -900,49 +964,64 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
return Or.getNode();
}
-// N is a (store (load ...), ...) pattern. Return true if it can use MVC.
-bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const {
- StoreSDNode *Store = cast<StoreSDNode>(N);
- LoadSDNode *Load = cast<LoadSDNode>(Store->getValue().getNode());
+bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
+ LoadSDNode *Load) const {
+ // Check that the two memory operands have the same size.
+ if (Load->getMemoryVT() != Store->getMemoryVT())
+ return false;
- // MVC is logically a bytewise copy, so can't be used for volatile accesses.
+ // Volatility stops an access from being decomposed.
if (Load->isVolatile() || Store->isVolatile())
return false;
- // Prefer not to use MVC if either address can use ... RELATIVE LONG
- // instructions.
- assert(Load->getMemoryVT() == Store->getMemoryVT() &&
- "Should already have checked that the types match");
- uint64_t Size = Load->getMemoryVT().getStoreSize();
- if (Size > 1 && Size <= 8) {
- // Prefer LHRL, LRL and LGRL.
- if (Load->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER)
- return false;
- // Prefer STHRL, STRL and STGRL.
- if (Store->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER)
- return false;
- }
-
// There's no chance of overlap if the load is invariant.
if (Load->isInvariant())
return true;
- // If both operands are aligned, they must be equal or not overlap.
- if (Load->getAlignment() >= Size && Store->getAlignment() >= Size)
- return true;
-
// Otherwise we need to check whether there's an alias.
const Value *V1 = Load->getSrcValue();
const Value *V2 = Store->getSrcValue();
if (!V1 || !V2)
return false;
+ // Reject equality.
+ uint64_t Size = Load->getMemoryVT().getStoreSize();
int64_t End1 = Load->getSrcValueOffset() + Size;
int64_t End2 = Store->getSrcValueOffset() + Size;
+ if (V1 == V2 && End1 == End2)
+ return false;
+
return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getTBAAInfo()),
AliasAnalysis::Location(V2, End2, Store->getTBAAInfo()));
}
+bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const {
+ StoreSDNode *Store = cast<StoreSDNode>(N);
+ LoadSDNode *Load = cast<LoadSDNode>(Store->getValue());
+
+ // Prefer not to use MVC if either address can use ... RELATIVE LONG
+ // instructions.
+ uint64_t Size = Load->getMemoryVT().getStoreSize();
+ if (Size > 1 && Size <= 8) {
+ // Prefer LHRL, LRL and LGRL.
+ if (SystemZISD::isPCREL(Load->getBasePtr().getOpcode()))
+ return false;
+ // Prefer STHRL, STRL and STGRL.
+ if (SystemZISD::isPCREL(Store->getBasePtr().getOpcode()))
+ return false;
+ }
+
+ return canUseBlockOperation(Store, Load);
+}
+
+bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
+ unsigned I) const {
+ StoreSDNode *StoreA = cast<StoreSDNode>(N);
+ LoadSDNode *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I));
+ LoadSDNode *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I));
+ return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB);
+}
+
SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected
DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
@@ -950,6 +1029,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ Node->setNodeId(-1);
return 0;
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 6acdcd4..f6e1853 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -23,8 +23,23 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include <cctype>
+
using namespace llvm;
+namespace {
+// Represents a sequence for extracting a 0/1 value from an IPM result:
+// (((X ^ XORValue) + AddValue) >> Bit)
+struct IPMConversion {
+ IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)
+ : XORValue(xorValue), AddValue(addValue), Bit(bit) {}
+
+ int64_t XORValue;
+ int64_t AddValue;
+ unsigned Bit;
+};
+}
+
// Classify VT as either 32 or 64 bit.
static bool is32Bit(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
@@ -51,7 +66,10 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
MVT PtrVT = getPointerTy();
// Set up the register classes.
- addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
+ if (Subtarget.hasHighWord())
+ addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
+ else
+ addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
@@ -83,8 +101,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
++I) {
MVT VT = MVT::SimpleValueType(I);
if (isTypeLegal(VT)) {
- // Expand SETCC(X, Y, COND) into SELECT_CC(X, Y, 1, 0, COND).
- setOperationAction(ISD::SETCC, VT, Expand);
+ // Lower SET_CC into an IPM-based sequence.
+ setOperationAction(ISD::SETCC, VT, Custom);
// Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
setOperationAction(ISD::SELECT, VT, Expand);
@@ -128,9 +146,11 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
- // Use *MUL_LOHI where possible and a wider multiplication otherwise.
+ // Use *MUL_LOHI where possible instead of MULH*.
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Custom);
+ setOperationAction(ISD::UMUL_LOHI, VT, Custom);
// We have instructions for signed but not unsigned FP conversion.
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
@@ -165,14 +185,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
setOperationAction(ISD::OR, MVT::i64, Custom);
- // The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR),
- // but they aren't really worth using. There is no 64-bit SMUL_LOHI,
- // but there is a 64-bit UMUL_LOHI: MLGR.
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
-
// FIXME: Can we support these natively?
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
@@ -200,6 +212,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
+ // Handle prefetches with PFD or PFDRL.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+
// Handle floating-point types.
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
I <= MVT::LAST_FP_VALUETYPE;
@@ -209,6 +224,15 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
// We can use FI for FRINT.
setOperationAction(ISD::FRINT, VT, Legal);
+ // We can use the extended form of FI for other rounding operations.
+ if (Subtarget.hasFPExtension()) {
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FROUND, VT, Legal);
+ }
+
// No special instructions for these.
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
@@ -255,8 +279,13 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
MaxStoresPerMemsetOptSize = 0;
}
-bool
-SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
@@ -305,6 +334,22 @@ bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM,
return AM.Scale == 0 || AM.Scale == 1;
}
+bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
+ if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
+ return false;
+ unsigned FromBits = FromType->getPrimitiveSizeInBits();
+ unsigned ToBits = ToType->getPrimitiveSizeInBits();
+ return FromBits > ToBits;
+}
+
+bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
+ if (!FromVT.isInteger() || !ToVT.isInteger())
+ return false;
+ unsigned FromBits = FromVT.getSizeInBits();
+ unsigned ToBits = ToVT.getSizeInBits();
+ return FromBits > ToBits;
+}
+
//===----------------------------------------------------------------------===//
// Inline asm support
//===----------------------------------------------------------------------===//
@@ -316,6 +361,7 @@ SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
case 'a': // Address register
case 'd': // Data register (equivalent to 'r')
case 'f': // Floating-point register
+ case 'h': // High-part register
case 'r': // General-purpose register
return C_RegisterClass;
@@ -358,6 +404,7 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
case 'a': // Address register
case 'd': // Data register (equivalent to 'r')
+ case 'h': // High-part register
case 'r': // General-purpose register
if (CallOperandVal->getType()->isIntegerTy())
weight = CW_Register;
@@ -438,6 +485,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const {
return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
+ case 'h': // High-part register (an LLVM extension)
+ return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
+
case 'f': // Floating-point register
if (VT == MVT::f64)
return std::make_pair(0U, &SystemZ::FP64BitRegClass);
@@ -527,6 +577,17 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
#include "SystemZGenCallingConv.inc"
+bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
+ Type *ToType) const {
+ return isTruncateFree(FromType, ToType);
+}
+
+bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!CI->isTailCall())
+ return false;
+ return true;
+}
+
// Value is a value that has been passed to us in the location described by VA
// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
// any loads onto Chain.
@@ -689,6 +750,23 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
return Chain;
}
+static bool canUseSiblingCall(CCState ArgCCInfo,
+ SmallVectorImpl<CCValAssign> &ArgLocs) {
+ // Punt if there are any indirect or stack arguments, or if the call
+ // needs the call-saved argument register R6.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (!VA.isRegLoc())
+ return false;
+ unsigned Reg = VA.getLocReg();
+ if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
+ return false;
+ }
+ return true;
+}
+
SDValue
SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
@@ -699,26 +777,29 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
- bool &isTailCall = CLI.IsTailCall;
+ bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
EVT PtrVT = getPointerTy();
- // SystemZ target does not yet support tail call optimization.
- isTailCall = false;
-
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+ // We don't support GuaranteedTailCallOpt, only automatically-detected
+ // sibling calls.
+ if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs))
+ IsTailCall = false;
+
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
// Mark the start of the call.
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true),
- DL);
+ if (!IsTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true),
+ DL);
// Copy argument values to their designated locations.
SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
@@ -767,22 +848,27 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Build a sequence of copy-to-reg nodes, chained and glued together.
- SDValue Glue;
- for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
- Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
- RegsToPass[I].second, Glue);
- Glue = Chain.getValue(1);
- }
-
// Accept direct calls by converting symbolic call addresses to the
- // associated Target* opcodes.
+ // associated Target* opcodes. Force %r1 to be used for indirect
+ // tail calls.
+ SDValue Glue;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+ } else if (IsTailCall) {
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
+ Glue = Chain.getValue(1);
+ Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
+ }
+
+ // Build a sequence of copy-to-reg nodes, chained and glued together.
+ for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
+ Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
+ RegsToPass[I].second, Glue);
+ Glue = Chain.getValue(1);
}
// The first call operand is the chain and the second is the target address.
@@ -802,6 +888,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Emit the call.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ if (IsTailCall)
+ return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, &Ops[0], Ops.size());
Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
Glue = Chain.getValue(1);
@@ -910,6 +998,73 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) {
#undef CONV
}
+// Return a sequence for getting a 1 from an IPM result when CC has a
+// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
+// The handling of CC values outside CCValid doesn't matter.
+static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
+ // Deal with cases where the result can be taken directly from a bit
+ // of the IPM result.
+ if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))
+ return IPMConversion(0, 0, SystemZ::IPM_CC);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))
+ return IPMConversion(0, 0, SystemZ::IPM_CC + 1);
+
+ // Deal with cases where we can add a value to force the sign bit
+ // to contain the right value. Putting the bit in 31 means we can
+ // use SRL rather than RISBG(L), and also makes it easier to get a
+ // 0/-1 value, so it has priority over the other tests below.
+ //
+ // These sequences rely on the fact that the upper two bits of the
+ // IPM result are zero.
+ uint64_t TopBit = uint64_t(1) << 31;
+ if (CCMask == (CCValid & SystemZ::CCMASK_0))
+ return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))
+ return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0
+ | SystemZ::CCMASK_1
+ | SystemZ::CCMASK_2)))
+ return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & SystemZ::CCMASK_3))
+ return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_1
+ | SystemZ::CCMASK_2
+ | SystemZ::CCMASK_3)))
+ return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);
+
+ // Next try inverting the value and testing a bit. 0/1 could be
+ // handled this way too, but we dealt with that case above.
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))
+ return IPMConversion(-1, 0, SystemZ::IPM_CC);
+
+ // Handle cases where adding a value forces a non-sign bit to contain
+ // the right value.
+ if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))
+ return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))
+ return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);
+
+ // The remaing cases are 1, 2, 0/1/3 and 0/2/3. All these are
+ // can be done by inverting the low CC bit and applying one of the
+ // sign-based extractions above.
+ if (CCMask == (CCValid & SystemZ::CCMASK_1))
+ return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & SystemZ::CCMASK_2))
+ return IPMConversion(1 << SystemZ::IPM_CC,
+ TopBit - (3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0
+ | SystemZ::CCMASK_1
+ | SystemZ::CCMASK_3)))
+ return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0
+ | SystemZ::CCMASK_2
+ | SystemZ::CCMASK_3)))
+ return IPMConversion(1 << SystemZ::IPM_CC,
+ TopBit - (1 << SystemZ::IPM_CC), 31);
+
+ llvm_unreachable("Unexpected CC combination");
+}
+
// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1
// can be converted to a comparison against zero, adjust the operands
// as necessary.
@@ -1009,74 +1164,309 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned,
CmpOp1 = DAG.getConstant(Value, MVT::i32);
}
-// Return true if a comparison described by CCMask, CmpOp0 and CmpOp1
-// is an equality comparison that is better implemented using unsigned
-// rather than signed comparison instructions.
-static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0,
- SDValue CmpOp1, unsigned CCMask) {
- // The test must be for equality or inequality.
- if (CCMask != SystemZ::CCMASK_CMP_EQ && CCMask != SystemZ::CCMASK_CMP_NE)
+// Return true if Op is either an unextended load, or a load suitable
+// for integer register-memory comparisons of type ICmpType.
+static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
+ LoadSDNode *Load = dyn_cast<LoadSDNode>(Op.getNode());
+ if (Load) {
+ // There are no instructions to compare a register with a memory byte.
+ if (Load->getMemoryVT() == MVT::i8)
+ return false;
+ // Otherwise decide on extension type.
+ switch (Load->getExtensionType()) {
+ case ISD::NON_EXTLOAD:
+ return true;
+ case ISD::SEXTLOAD:
+ return ICmpType != SystemZICMP::UnsignedOnly;
+ case ISD::ZEXTLOAD:
+ return ICmpType != SystemZICMP::SignedOnly;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+// Return true if it is better to swap comparison operands Op0 and Op1.
+// ICmpType is the type of an integer comparison.
+static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1,
+ unsigned ICmpType) {
+ // Leave f128 comparisons alone, since they have no memory forms.
+ if (Op0.getValueType() == MVT::f128)
return false;
- if (CmpOp1.getOpcode() == ISD::Constant) {
- uint64_t Value = cast<ConstantSDNode>(CmpOp1)->getSExtValue();
+ // Always keep a floating-point constant second, since comparisons with
+ // zero can use LOAD TEST and comparisons with other constants make a
+ // natural memory operand.
+ if (isa<ConstantFPSDNode>(Op1))
+ return false;
- // If we're comparing with memory, prefer unsigned comparisons for
- // values that are in the unsigned 16-bit range but not the signed
- // 16-bit range. We want to use CLFHSI and CLGHSI.
- if (CmpOp0.hasOneUse() &&
- ISD::isNormalLoad(CmpOp0.getNode()) &&
- (Value >= 32768 && Value < 65536))
- return true;
+ // Never swap comparisons with zero since there are many ways to optimize
+ // those later.
+ ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+ if (COp1 && COp1->getZExtValue() == 0)
+ return false;
- // Use unsigned comparisons for values that are in the CLGFI range
- // but not in the CGFI range.
- if (CmpOp0.getValueType() == MVT::i64 && (Value >> 31) == 1)
+ // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
+ // In that case we generally prefer the memory to be second.
+ if ((isNaturalMemoryOperand(Op0, ICmpType) && Op0.hasOneUse()) &&
+ !(isNaturalMemoryOperand(Op1, ICmpType) && Op1.hasOneUse())) {
+ // The only exceptions are when the second operand is a constant and
+ // we can use things like CHHSI.
+ if (!COp1)
return true;
+ // The unsigned memory-immediate instructions can handle 16-bit
+ // unsigned integers.
+ if (ICmpType != SystemZICMP::SignedOnly &&
+ isUInt<16>(COp1->getZExtValue()))
+ return false;
+ // The signed memory-immediate instructions can handle 16-bit
+ // signed integers.
+ if (ICmpType != SystemZICMP::UnsignedOnly &&
+ isInt<16>(COp1->getSExtValue()))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+// Return true if shift operation N has an in-range constant shift value.
+// Store it in ShiftVal if so.
+static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!Shift)
+ return false;
+ uint64_t Amount = Shift->getZExtValue();
+ if (Amount >= N.getValueType().getSizeInBits())
return false;
+
+ ShiftVal = Amount;
+ return true;
+}
+
+// Check whether an AND with Mask is suitable for a TEST UNDER MASK
+// instruction and whether the CC value is descriptive enough to handle
+// a comparison of type Opcode between the AND result and CmpVal.
+// CCMask says which comparison result is being tested and BitSize is
+// the number of bits in the operands. If TEST UNDER MASK can be used,
+// return the corresponding CC mask, otherwise return 0.
+static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
+ uint64_t Mask, uint64_t CmpVal,
+ unsigned ICmpType) {
+ assert(Mask != 0 && "ANDs with zero should have been removed by now");
+
+ // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
+ if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
+ !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
+ return 0;
+
+ // Work out the masks for the lowest and highest bits.
+ unsigned HighShift = 63 - countLeadingZeros(Mask);
+ uint64_t High = uint64_t(1) << HighShift;
+ uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
+
+ // Signed ordered comparisons are effectively unsigned if the sign
+ // bit is dropped.
+ bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
+
+ // Check for equality comparisons with 0, or the equivalent.
+ if (CmpVal == 0) {
+ if (CCMask == SystemZ::CCMASK_CMP_EQ)
+ return SystemZ::CCMASK_TM_ALL_0;
+ if (CCMask == SystemZ::CCMASK_CMP_NE)
+ return SystemZ::CCMASK_TM_SOME_1;
+ }
+ if (EffectivelyUnsigned && CmpVal <= Low) {
+ if (CCMask == SystemZ::CCMASK_CMP_LT)
+ return SystemZ::CCMASK_TM_ALL_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GE)
+ return SystemZ::CCMASK_TM_SOME_1;
+ }
+ if (EffectivelyUnsigned && CmpVal < Low) {
+ if (CCMask == SystemZ::CCMASK_CMP_LE)
+ return SystemZ::CCMASK_TM_ALL_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GT)
+ return SystemZ::CCMASK_TM_SOME_1;
}
- // Prefer CL for zero-extended loads.
- if (CmpOp1.getOpcode() == ISD::ZERO_EXTEND ||
- ISD::isZEXTLoad(CmpOp1.getNode()))
- return true;
+ // Check for equality comparisons with the mask, or the equivalent.
+ if (CmpVal == Mask) {
+ if (CCMask == SystemZ::CCMASK_CMP_EQ)
+ return SystemZ::CCMASK_TM_ALL_1;
+ if (CCMask == SystemZ::CCMASK_CMP_NE)
+ return SystemZ::CCMASK_TM_SOME_0;
+ }
+ if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
+ if (CCMask == SystemZ::CCMASK_CMP_GT)
+ return SystemZ::CCMASK_TM_ALL_1;
+ if (CCMask == SystemZ::CCMASK_CMP_LE)
+ return SystemZ::CCMASK_TM_SOME_0;
+ }
+ if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
+ if (CCMask == SystemZ::CCMASK_CMP_GE)
+ return SystemZ::CCMASK_TM_ALL_1;
+ if (CCMask == SystemZ::CCMASK_CMP_LT)
+ return SystemZ::CCMASK_TM_SOME_0;
+ }
- // ...and for "in-register" zero extensions.
- if (CmpOp1.getOpcode() == ISD::AND && CmpOp1.getValueType() == MVT::i64) {
- SDValue Mask = CmpOp1.getOperand(1);
- if (Mask.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Mask)->getZExtValue() == 0xffffffff)
- return true;
+ // Check for ordered comparisons with the top bit.
+ if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
+ if (CCMask == SystemZ::CCMASK_CMP_LE)
+ return SystemZ::CCMASK_TM_MSB_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GT)
+ return SystemZ::CCMASK_TM_MSB_1;
+ }
+ if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
+ if (CCMask == SystemZ::CCMASK_CMP_LT)
+ return SystemZ::CCMASK_TM_MSB_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GE)
+ return SystemZ::CCMASK_TM_MSB_1;
}
- return false;
+ // If there are just two bits, we can do equality checks for Low and High
+ // as well.
+ if (Mask == Low + High) {
+ if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
+ return SystemZ::CCMASK_TM_MIXED_MSB_0;
+ if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
+ return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
+ if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
+ return SystemZ::CCMASK_TM_MIXED_MSB_1;
+ if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
+ return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
+ }
+
+ // Looks like we've exhausted our options.
+ return 0;
+}
+
+// See whether the comparison (Opcode CmpOp0, CmpOp1, ICmpType) can be
+// implemented as a TEST UNDER MASK instruction when the condition being
+// tested is as described by CCValid and CCMask. Update the arguments
+// with the TM version if so.
+static void adjustForTestUnderMask(SelectionDAG &DAG, unsigned &Opcode,
+ SDValue &CmpOp0, SDValue &CmpOp1,
+ unsigned &CCValid, unsigned &CCMask,
+ unsigned &ICmpType) {
+ // Check that we have a comparison with a constant.
+ ConstantSDNode *ConstCmpOp1 = dyn_cast<ConstantSDNode>(CmpOp1);
+ if (!ConstCmpOp1)
+ return;
+ uint64_t CmpVal = ConstCmpOp1->getZExtValue();
+
+ // Check whether the nonconstant input is an AND with a constant mask.
+ if (CmpOp0.getOpcode() != ISD::AND)
+ return;
+ SDValue AndOp0 = CmpOp0.getOperand(0);
+ SDValue AndOp1 = CmpOp0.getOperand(1);
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(AndOp1.getNode());
+ if (!Mask)
+ return;
+ uint64_t MaskVal = Mask->getZExtValue();
+
+ // Check whether the combination of mask, comparison value and comparison
+ // type are suitable.
+ unsigned BitSize = CmpOp0.getValueType().getSizeInBits();
+ unsigned NewCCMask, ShiftVal;
+ if (ICmpType != SystemZICMP::SignedOnly &&
+ AndOp0.getOpcode() == ISD::SHL &&
+ isSimpleShift(AndOp0, ShiftVal) &&
+ (NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal >> ShiftVal,
+ CmpVal >> ShiftVal,
+ SystemZICMP::Any))) {
+ AndOp0 = AndOp0.getOperand(0);
+ AndOp1 = DAG.getConstant(MaskVal >> ShiftVal, AndOp0.getValueType());
+ } else if (ICmpType != SystemZICMP::SignedOnly &&
+ AndOp0.getOpcode() == ISD::SRL &&
+ isSimpleShift(AndOp0, ShiftVal) &&
+ (NewCCMask = getTestUnderMaskCond(BitSize, CCMask,
+ MaskVal << ShiftVal,
+ CmpVal << ShiftVal,
+ SystemZICMP::UnsignedOnly))) {
+ AndOp0 = AndOp0.getOperand(0);
+ AndOp1 = DAG.getConstant(MaskVal << ShiftVal, AndOp0.getValueType());
+ } else {
+ NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal, CmpVal,
+ ICmpType);
+ if (!NewCCMask)
+ return;
+ }
+
+ // Go ahead and make the change.
+ Opcode = SystemZISD::TM;
+ CmpOp0 = AndOp0;
+ CmpOp1 = AndOp1;
+ ICmpType = (bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
+ bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
+ CCValid = SystemZ::CCMASK_TM;
+ CCMask = NewCCMask;
}
// Return a target node that compares CmpOp0 with CmpOp1 and stores a
// 2-bit result in CC. Set CCValid to the CCMASK_* of all possible
// 2-bit results and CCMask to the subset of those results that are
// associated with Cond.
-static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
+static SDValue emitCmp(const SystemZTargetMachine &TM, SelectionDAG &DAG,
+ SDLoc DL, SDValue CmpOp0, SDValue CmpOp1,
ISD::CondCode Cond, unsigned &CCValid,
unsigned &CCMask) {
bool IsUnsigned = false;
CCMask = CCMaskForCondCode(Cond);
- if (CmpOp0.getValueType().isFloatingPoint())
+ unsigned Opcode, ICmpType = 0;
+ if (CmpOp0.getValueType().isFloatingPoint()) {
CCValid = SystemZ::CCMASK_FCMP;
- else {
+ Opcode = SystemZISD::FCMP;
+ } else {
IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO;
CCValid = SystemZ::CCMASK_ICMP;
CCMask &= CCValid;
adjustZeroCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask);
adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask);
- if (preferUnsignedComparison(DAG, CmpOp0, CmpOp1, CCMask))
- IsUnsigned = true;
+ Opcode = SystemZISD::ICMP;
+ // Choose the type of comparison. Equality and inequality tests can
+ // use either signed or unsigned comparisons. The choice also doesn't
+ // matter if both sign bits are known to be clear. In those cases we
+ // want to give the main isel code the freedom to choose whichever
+ // form fits best.
+ if (CCMask == SystemZ::CCMASK_CMP_EQ ||
+ CCMask == SystemZ::CCMASK_CMP_NE ||
+ (DAG.SignBitIsZero(CmpOp0) && DAG.SignBitIsZero(CmpOp1)))
+ ICmpType = SystemZICMP::Any;
+ else if (IsUnsigned)
+ ICmpType = SystemZICMP::UnsignedOnly;
+ else
+ ICmpType = SystemZICMP::SignedOnly;
}
- SDLoc DL(CmpOp0);
- return DAG.getNode((IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
- DL, MVT::Glue, CmpOp0, CmpOp1);
+ if (shouldSwapCmpOperands(CmpOp0, CmpOp1, ICmpType)) {
+ std::swap(CmpOp0, CmpOp1);
+ CCMask = ((CCMask & SystemZ::CCMASK_CMP_EQ) |
+ (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
+ (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
+ (CCMask & SystemZ::CCMASK_CMP_UO));
+ }
+
+ adjustForTestUnderMask(DAG, Opcode, CmpOp0, CmpOp1, CCValid, CCMask,
+ ICmpType);
+ if (Opcode == SystemZISD::ICMP || Opcode == SystemZISD::TM)
+ return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1,
+ DAG.getConstant(ICmpType, MVT::i32));
+ return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1);
+}
+
+// Implement a 32-bit *MUL_LOHI operation by extending both operands to
+// 64 bits. Extend is the extension type to use. Store the high part
+// in Hi and the low part in Lo.
+static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
+ unsigned Extend, SDValue Op0, SDValue Op1,
+ SDValue &Hi, SDValue &Lo) {
+ Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
+ Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
+ Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
}
// Lower a binary operation that produces two VT results, one in each
@@ -1092,14 +1482,38 @@ static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT,
SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
SDValue(In128, 0), Op1);
bool Is32Bit = is32Bit(VT);
- SDValue SubReg0 = DAG.getTargetConstant(SystemZ::even128(Is32Bit), VT);
- SDValue SubReg1 = DAG.getTargetConstant(SystemZ::odd128(Is32Bit), VT);
- SDNode *Reg0 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- VT, Result, SubReg0);
- SDNode *Reg1 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- VT, Result, SubReg1);
- Even = SDValue(Reg0, 0);
- Odd = SDValue(Reg1, 0);
+ Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
+ Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
+}
+
+SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue CmpOp0 = Op.getOperand(0);
+ SDValue CmpOp1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDLoc DL(Op);
+
+ unsigned CCValid, CCMask;
+ SDValue Glue = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask);
+
+ IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
+ SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+
+ if (Conversion.XORValue)
+ Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
+ DAG.getConstant(Conversion.XORValue, MVT::i32));
+
+ if (Conversion.AddValue)
+ Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
+ DAG.getConstant(Conversion.AddValue, MVT::i32));
+
+ // The SHR/AND sequence should get optimized to an RISBG.
+ Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
+ DAG.getConstant(Conversion.Bit, MVT::i32));
+ if (Conversion.Bit != 31)
+ Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
+ DAG.getConstant(1, MVT::i32));
+ return Result;
}
SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -1111,7 +1525,7 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
unsigned CCValid, CCMask;
- SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCValid, CCMask);
+ SDValue Flags = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask);
return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
Chain, DAG.getConstant(CCValid, MVT::i32),
DAG.getConstant(CCMask, MVT::i32), Dest, Flags);
@@ -1127,7 +1541,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
SDLoc DL(Op);
unsigned CCValid, CCMask;
- SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCValid, CCMask);
+ SDValue Flags = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask);
SmallVector<SDValue, 5> Ops;
Ops.push_back(TrueOp);
@@ -1151,18 +1565,18 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
SDValue Result;
if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
- // Make sure that the offset is aligned to a halfword. If it isn't,
- // create an "anchor" at the previous 12-bit boundary.
- // FIXME check whether there is a better way of handling this.
- if (Offset & 1) {
- Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
- Offset & ~uint64_t(0xfff));
- Offset &= 0xfff;
- } else {
- Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Offset);
+ // Assign anchors at 1<<12 byte boundaries.
+ uint64_t Anchor = Offset & ~uint64_t(0xfff);
+ Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
+ Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+
+ // The offset can be folded into the address if it is aligned to a halfword.
+ Offset -= Anchor;
+ if (Offset != 0 && (Offset & 1) == 0) {
+ SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
+ Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
Offset = 0;
}
- Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
} else {
Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
@@ -1264,24 +1678,33 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
EVT InVT = In.getValueType();
EVT ResVT = Op.getValueType();
- SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
- SDValue Shift32 = DAG.getConstant(32, MVT::i64);
if (InVT == MVT::i32 && ResVT == MVT::f32) {
- SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
- SDValue Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, Shift32);
- SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shift);
- SDNode *Out = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- MVT::f32, Out64, SubReg32);
- return SDValue(Out, 0);
+ SDValue In64;
+ if (Subtarget.hasHighWord()) {
+ SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
+ MVT::i64);
+ In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
+ MVT::i64, SDValue(U64, 0), In);
+ } else {
+ In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
+ In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
+ DAG.getConstant(32, MVT::i64));
+ }
+ SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
+ return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
+ DL, MVT::f32, Out64);
}
if (InVT == MVT::f32 && ResVT == MVT::i32) {
SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
- SDNode *In64 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
- MVT::f64, SDValue(U64, 0), In, SubReg32);
- SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, SDValue(In64, 0));
- SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, Shift32);
- SDValue Out = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
- return Out;
+ SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
+ MVT::f64, SDValue(U64, 0), In);
+ SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
+ if (Subtarget.hasHighWord())
+ return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
+ MVT::i32, Out64);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
+ DAG.getConstant(32, MVT::i64));
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
}
llvm_unreachable("Unexpected bitcast combination");
}
@@ -1364,18 +1787,64 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(Ops, 2, DL);
}
-SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
+SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
- assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI");
+ SDValue Ops[2];
+ if (is32Bit(VT))
+ // Just do a normal 64-bit multiplication and extract the results.
+ // We define this so that it can be used for constant division.
+ lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
+ Op.getOperand(1), Ops[1], Ops[0]);
+ else {
+ // Do a full 128-bit multiplication based on UMUL_LOHI64:
+ //
+ // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
+ //
+ // but using the fact that the upper halves are either all zeros
+ // or all ones:
+ //
+ // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
+ //
+ // and grouping the right terms together since they are quicker than the
+ // multiplication:
+ //
+ // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
+ SDValue C63 = DAG.getConstant(63, MVT::i64);
+ SDValue LL = Op.getOperand(0);
+ SDValue RL = Op.getOperand(1);
+ SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
+ SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
+ // UMUL_LOHI64 returns the low result in the odd register and the high
+ // result in the even register. SMUL_LOHI is defined to return the
+ // low half first, so the results are in reverse order.
+ lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+ LL, RL, Ops[1], Ops[0]);
+ SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
+ SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
+ SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
+ Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
+ }
+ return DAG.getMergeValues(Ops, 2, DL);
+}
- // UMUL_LOHI64 returns the low result in the odd register and the high
- // result in the even register. UMUL_LOHI is defined to return the
- // low half first, so the results are in reverse order.
+SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
SDValue Ops[2];
- lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
- Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+ if (is32Bit(VT))
+ // Just do a normal 64-bit multiplication and extract the results.
+ // We define this so that it can be used for constant division.
+ lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
+ Op.getOperand(1), Ops[1], Ops[0]);
+ else
+ // UMUL_LOHI64 returns the low result in the odd register and the high
+ // result in the even register. UMUL_LOHI is defined to return the
+ // low half first, so the results are in reverse order.
+ lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+ Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
return DAG.getMergeValues(Ops, 2, DL);
}
@@ -1464,10 +1933,10 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
// high 32 bits and just masks out low bits. We can skip it if so.
if (HighOp.getOpcode() == ISD::AND &&
HighOp.getOperand(1).getOpcode() == ISD::Constant) {
- ConstantSDNode *MaskNode = cast<ConstantSDNode>(HighOp.getOperand(1));
- uint64_t Mask = MaskNode->getZExtValue() | Masks[High];
- if ((Mask >> 32) == 0xffffffff)
- HighOp = HighOp.getOperand(0);
+ SDValue HighOp0 = HighOp.getOperand(0);
+ uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
+ if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
+ HighOp = HighOp0;
}
// Take advantage of the fact that all GR32 operations only change the
@@ -1476,10 +1945,8 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
// can be folded.
SDLoc DL(Op);
SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
- SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
- SDNode *Result = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
- MVT::i64, HighOp, Low32, SubReg32);
- return SDValue(Result, 0);
+ return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
+ MVT::i64, HighOp, Low32);
}
// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
@@ -1618,6 +2085,26 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SystemZ::R15D, Op.getOperand(1));
}
+SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
+ SelectionDAG &DAG) const {
+ bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ if (!IsData)
+ // Just preserve the chain.
+ return Op.getOperand(0);
+
+ bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
+ MemIntrinsicSDNode *Node = cast<MemIntrinsicSDNode>(Op.getNode());
+ SDValue Ops[] = {
+ Op.getOperand(0),
+ DAG.getConstant(Code, MVT::i32),
+ Op.getOperand(1)
+ };
+ return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op),
+ Node->getVTList(), Ops, array_lengthof(Ops),
+ Node->getMemoryVT(), Node->getMemOperand());
+}
+
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -1625,6 +2112,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerBR_CC(Op, DAG);
case ISD::SELECT_CC:
return lowerSELECT_CC(Op, DAG);
+ case ISD::SETCC:
+ return lowerSETCC(Op, DAG);
case ISD::GlobalAddress:
return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
case ISD::GlobalTLSAddress:
@@ -1643,6 +2132,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerVACOPY(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return lowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::SMUL_LOHI:
+ return lowerSMUL_LOHI(Op, DAG);
case ISD::UMUL_LOHI:
return lowerUMUL_LOHI(Op, DAG);
case ISD::SDIVREM:
@@ -1679,6 +2170,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerSTACKSAVE(Op, DAG);
case ISD::STACKRESTORE:
return lowerSTACKRESTORE(Op, DAG);
+ case ISD::PREFETCH:
+ return lowerPREFETCH(Op, DAG);
default:
llvm_unreachable("Unexpected node to lower");
}
@@ -1689,9 +2182,12 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
OPCODE(RET_FLAG);
OPCODE(CALL);
+ OPCODE(SIBCALL);
OPCODE(PCREL_WRAPPER);
- OPCODE(CMP);
- OPCODE(UCMP);
+ OPCODE(PCREL_OFFSET);
+ OPCODE(ICMP);
+ OPCODE(FCMP);
+ OPCODE(TM);
OPCODE(BR_CCMASK);
OPCODE(SELECT_CCMASK);
OPCODE(ADJDYNALLOC);
@@ -1701,6 +2197,19 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(UDIVREM32);
OPCODE(UDIVREM64);
OPCODE(MVC);
+ OPCODE(MVC_LOOP);
+ OPCODE(NC);
+ OPCODE(NC_LOOP);
+ OPCODE(OC);
+ OPCODE(OC_LOOP);
+ OPCODE(XC);
+ OPCODE(XC_LOOP);
+ OPCODE(CLC);
+ OPCODE(CLC_LOOP);
+ OPCODE(STRCMP);
+ OPCODE(STPCPY);
+ OPCODE(SEARCH_STRING);
+ OPCODE(IPM);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
OPCODE(ATOMIC_LOADW_SUB);
@@ -1713,6 +2222,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(ATOMIC_LOADW_UMIN);
OPCODE(ATOMIC_LOADW_UMAX);
OPCODE(ATOMIC_CMP_SWAPW);
+ OPCODE(PREFETCH);
}
return NULL;
#undef OPCODE
@@ -1742,6 +2252,31 @@ static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
return NewMBB;
}
+// Split MBB before MI and return the new block (the one that contains MI).
+static MachineBasicBlock *splitBlockBefore(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+ NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
+ NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ return NewMBB;
+}
+
+// Force base value Base into a register before MI. Return the register.
+static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
+ const SystemZInstrInfo *TII) {
+ if (Base.isReg())
+ return Base.getReg();
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg)
+ .addOperand(Base).addImm(0).addReg(0);
+ return Reg;
+}
+
// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr *MI,
@@ -1756,7 +2291,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
DebugLoc DL = MI->getDebugLoc();
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
// StartMBB:
@@ -1777,7 +2312,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
// %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
// ...
MBB = JoinMBB;
- BuildMI(*MBB, MBB->begin(), DL, TII->get(SystemZ::PHI), DestReg)
+ BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg)
.addReg(TrueReg).addMBB(StartMBB)
.addReg(FalseReg).addMBB(FalseMBB);
@@ -1824,7 +2359,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
CCMask ^= CCValid;
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
// StartMBB:
@@ -1900,7 +2435,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
// Insert a basic block for the main loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
// StartMBB:
@@ -1934,11 +2469,11 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
.addReg(RotatedOldVal).addOperand(Src2);
if (BitSize < 32)
// XILF with the upper BitSize bits set.
- BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
+ BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
.addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize)));
else if (BitSize == 32)
// XILF with every bit set.
- BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
+ BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
.addReg(Tmp).addImm(~uint32_t(0));
else {
// Use LCGR and add -1 to the result, which is more compact than
@@ -2022,7 +2557,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
// Insert 3 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
@@ -2129,7 +2664,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
// Insert 2 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
@@ -2205,8 +2740,8 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true
// if the high register of the GR128 value must be cleared or false if
-// it's "don't care". SubReg is subreg_odd32 when extending a GR32
-// and subreg_odd when extending a GR64.
+// it's "don't care". SubReg is subreg_l32 when extending a GR32
+// and subreg_l64 when extending a GR64.
MachineBasicBlock *
SystemZTargetLowering::emitExt128(MachineInstr *MI,
MachineBasicBlock *MBB,
@@ -2228,7 +2763,7 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
.addImm(0);
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
- .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_high);
+ .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
In128 = NewIn128;
}
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
@@ -2239,28 +2774,237 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
}
MachineBasicBlock *
-SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI,
- MachineBasicBlock *MBB) const {
+SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode) const {
const SystemZInstrInfo *TII = TM.getInstrInfo();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
- MachineOperand DestBase = MI->getOperand(0);
+ MachineOperand DestBase = earlyUseOperand(MI->getOperand(0));
uint64_t DestDisp = MI->getOperand(1).getImm();
- MachineOperand SrcBase = MI->getOperand(2);
+ MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2));
uint64_t SrcDisp = MI->getOperand(3).getImm();
uint64_t Length = MI->getOperand(4).getImm();
- BuildMI(*MBB, MI, DL, TII->get(SystemZ::MVC))
- .addOperand(DestBase).addImm(DestDisp).addImm(Length)
- .addOperand(SrcBase).addImm(SrcDisp);
+ // When generating more than one CLC, all but the last will need to
+ // branch to the end when a difference is found.
+ MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
+ splitBlockAfter(MI, MBB) : 0);
+
+ // Check for the loop form, in which operand 5 is the trip count.
+ if (MI->getNumExplicitOperands() > 5) {
+ bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
+
+ uint64_t StartCountReg = MI->getOperand(5).getReg();
+ uint64_t StartSrcReg = forceReg(MI, SrcBase, TII);
+ uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg :
+ forceReg(MI, DestBase, TII));
+
+ const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
+ uint64_t ThisSrcReg = MRI.createVirtualRegister(RC);
+ uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg :
+ MRI.createVirtualRegister(RC));
+ uint64_t NextSrcReg = MRI.createVirtualRegister(RC);
+ uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg :
+ MRI.createVirtualRegister(RC));
+
+ RC = &SystemZ::GR64BitRegClass;
+ uint64_t ThisCountReg = MRI.createVirtualRegister(RC);
+ uint64_t NextCountReg = MRI.createVirtualRegister(RC);
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
+
+ // StartMBB:
+ // # fall through to LoopMMB
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
+ // [ %NextDestReg, NextMBB ]
+ // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
+ // [ %NextSrcReg, NextMBB ]
+ // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
+ // [ %NextCountReg, NextMBB ]
+ // ( PFD 2, 768+DestDisp(%ThisDestReg) )
+ // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
+ // ( JLH EndMBB )
+ //
+ // The prefetch is used only for MVC. The JLH is used only for CLC.
+ MBB = LoopMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
+ .addReg(StartDestReg).addMBB(StartMBB)
+ .addReg(NextDestReg).addMBB(NextMBB);
+ if (!HaveSingleBase)
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
+ .addReg(StartSrcReg).addMBB(StartMBB)
+ .addReg(NextSrcReg).addMBB(NextMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
+ .addReg(StartCountReg).addMBB(StartMBB)
+ .addReg(NextCountReg).addMBB(NextMBB);
+ if (Opcode == SystemZ::MVC)
+ BuildMI(MBB, DL, TII->get(SystemZ::PFD))
+ .addImm(SystemZ::PFD_WRITE)
+ .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0);
+ BuildMI(MBB, DL, TII->get(Opcode))
+ .addReg(ThisDestReg).addImm(DestDisp).addImm(256)
+ .addReg(ThisSrcReg).addImm(SrcDisp);
+ if (EndMBB) {
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ MBB->addSuccessor(NextMBB);
+ }
+
+ // NextMBB:
+ // %NextDestReg = LA 256(%ThisDestReg)
+ // %NextSrcReg = LA 256(%ThisSrcReg)
+ // %NextCountReg = AGHI %ThisCountReg, -1
+ // CGHI %NextCountReg, 0
+ // JLH LoopMBB
+ // # fall through to DoneMMB
+ //
+ // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
+ MBB = NextMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
+ .addReg(ThisDestReg).addImm(256).addReg(0);
+ if (!HaveSingleBase)
+ BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
+ .addReg(ThisSrcReg).addImm(256).addReg(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
+ .addReg(ThisCountReg).addImm(-1);
+ BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
+ .addReg(NextCountReg).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ DestBase = MachineOperand::CreateReg(NextDestReg, false);
+ SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
+ Length &= 255;
+ MBB = DoneMBB;
+ }
+ // Handle any remaining bytes with straight-line code.
+ while (Length > 0) {
+ uint64_t ThisLength = std::min(Length, uint64_t(256));
+ // The previous iteration might have created out-of-range displacements.
+ // Apply them using LAY if so.
+ if (!isUInt<12>(DestDisp)) {
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+ .addOperand(DestBase).addImm(DestDisp).addReg(0);
+ DestBase = MachineOperand::CreateReg(Reg, false);
+ DestDisp = 0;
+ }
+ if (!isUInt<12>(SrcDisp)) {
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+ .addOperand(SrcBase).addImm(SrcDisp).addReg(0);
+ SrcBase = MachineOperand::CreateReg(Reg, false);
+ SrcDisp = 0;
+ }
+ BuildMI(*MBB, MI, DL, TII->get(Opcode))
+ .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength)
+ .addOperand(SrcBase).addImm(SrcDisp);
+ DestDisp += ThisLength;
+ SrcDisp += ThisLength;
+ Length -= ThisLength;
+ // If there's another CLC to go, branch to the end if a difference
+ // was found.
+ if (EndMBB && Length > 0) {
+ MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ MBB->addSuccessor(NextMBB);
+ MBB = NextMBB;
+ }
+ }
+ if (EndMBB) {
+ MBB->addSuccessor(EndMBB);
+ MBB = EndMBB;
+ MBB->addLiveIn(SystemZ::CC);
+ }
MI->eraseFromParent();
return MBB;
}
+// Decompose string pseudo-instruction MI into a loop that continually performs
+// Opcode until CC != 3.
+MachineBasicBlock *
+SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode) const {
+ const SystemZInstrInfo *TII = TM.getInstrInfo();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ uint64_t End1Reg = MI->getOperand(0).getReg();
+ uint64_t Start1Reg = MI->getOperand(1).getReg();
+ uint64_t Start2Reg = MI->getOperand(2).getReg();
+ uint64_t CharReg = MI->getOperand(3).getReg();
+
+ const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
+ uint64_t This1Reg = MRI.createVirtualRegister(RC);
+ uint64_t This2Reg = MRI.createVirtualRegister(RC);
+ uint64_t End2Reg = MRI.createVirtualRegister(RC);
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ // # fall through to LoopMMB
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
+ // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
+ // R0L = %CharReg
+ // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
+ // JO LoopMBB
+ // # fall through to DoneMMB
+ //
+ // The load of R0L can be hoisted by post-RA LICM.
+ MBB = LoopMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
+ .addReg(Start1Reg).addMBB(StartMBB)
+ .addReg(End1Reg).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
+ .addReg(Start2Reg).addMBB(StartMBB)
+ .addReg(End2Reg).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
+ BuildMI(MBB, DL, TII->get(Opcode))
+ .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
+ .addReg(This1Reg).addReg(This2Reg);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ DoneMBB->addLiveIn(SystemZ::CC);
+
+ MI->eraseFromParent();
+ return DoneMBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
+ case SystemZ::Select32Mux:
case SystemZ::Select32:
case SystemZ::SelectF32:
case SystemZ::Select64:
@@ -2268,18 +3012,14 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
case SystemZ::SelectF128:
return emitSelect(MI, MBB);
- case SystemZ::CondStore8_32:
- return emitCondStore(MI, MBB, SystemZ::STC32, 0, false);
- case SystemZ::CondStore8_32Inv:
- return emitCondStore(MI, MBB, SystemZ::STC32, 0, true);
- case SystemZ::CondStore16_32:
- return emitCondStore(MI, MBB, SystemZ::STH32, 0, false);
- case SystemZ::CondStore16_32Inv:
- return emitCondStore(MI, MBB, SystemZ::STH32, 0, true);
- case SystemZ::CondStore32_32:
- return emitCondStore(MI, MBB, SystemZ::ST32, SystemZ::STOC32, false);
- case SystemZ::CondStore32_32Inv:
- return emitCondStore(MI, MBB, SystemZ::ST32, SystemZ::STOC32, true);
+ case SystemZ::CondStore8Mux:
+ return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
+ case SystemZ::CondStore8MuxInv:
+ return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
+ case SystemZ::CondStore16Mux:
+ return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
+ case SystemZ::CondStore16MuxInv:
+ return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
case SystemZ::CondStore8:
return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
case SystemZ::CondStore8Inv:
@@ -2306,11 +3046,11 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
case SystemZ::AEXT128_64:
- return emitExt128(MI, MBB, false, SystemZ::subreg_low);
+ return emitExt128(MI, MBB, false, SystemZ::subreg_l64);
case SystemZ::ZEXT128_32:
- return emitExt128(MI, MBB, true, SystemZ::subreg_low32);
+ return emitExt128(MI, MBB, true, SystemZ::subreg_l32);
case SystemZ::ZEXT128_64:
- return emitExt128(MI, MBB, true, SystemZ::subreg_low);
+ return emitExt128(MI, MBB, true, SystemZ::subreg_l64);
case SystemZ::ATOMIC_SWAPW:
return emitAtomicLoadBinary(MI, MBB, 0, 0);
@@ -2346,98 +3086,98 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
case SystemZ::ATOMIC_LOADW_NR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
case SystemZ::ATOMIC_LOADW_NILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
case SystemZ::ATOMIC_LOAD_NR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
- case SystemZ::ATOMIC_LOAD_NILL32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32);
- case SystemZ::ATOMIC_LOAD_NILH32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32);
- case SystemZ::ATOMIC_LOAD_NILF32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32);
- case SystemZ::ATOMIC_LOAD_NGR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
case SystemZ::ATOMIC_LOAD_NILL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
case SystemZ::ATOMIC_LOAD_NILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64);
- case SystemZ::ATOMIC_LOAD_NIHL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64);
- case SystemZ::ATOMIC_LOAD_NIHH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
case SystemZ::ATOMIC_LOAD_NILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64);
- case SystemZ::ATOMIC_LOAD_NIHF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
+ case SystemZ::ATOMIC_LOAD_NGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
+ case SystemZ::ATOMIC_LOAD_NILL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
+ case SystemZ::ATOMIC_LOAD_NILH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
+ case SystemZ::ATOMIC_LOAD_NIHL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
+ case SystemZ::ATOMIC_LOAD_NIHH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
+ case SystemZ::ATOMIC_LOAD_NILF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
+ case SystemZ::ATOMIC_LOAD_NIHF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
case SystemZ::ATOMIC_LOADW_OR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
case SystemZ::ATOMIC_LOADW_OILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
case SystemZ::ATOMIC_LOAD_OR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
- case SystemZ::ATOMIC_LOAD_OILL32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL32, 32);
- case SystemZ::ATOMIC_LOAD_OILH32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 32);
- case SystemZ::ATOMIC_LOAD_OILF32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF32, 32);
- case SystemZ::ATOMIC_LOAD_OGR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
case SystemZ::ATOMIC_LOAD_OILL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
case SystemZ::ATOMIC_LOAD_OILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 64);
- case SystemZ::ATOMIC_LOAD_OIHL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL, 64);
- case SystemZ::ATOMIC_LOAD_OIHH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
case SystemZ::ATOMIC_LOAD_OILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 64);
- case SystemZ::ATOMIC_LOAD_OIHF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
+ case SystemZ::ATOMIC_LOAD_OGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
+ case SystemZ::ATOMIC_LOAD_OILL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
+ case SystemZ::ATOMIC_LOAD_OILH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
+ case SystemZ::ATOMIC_LOAD_OIHL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
+ case SystemZ::ATOMIC_LOAD_OIHH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
+ case SystemZ::ATOMIC_LOAD_OILF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
+ case SystemZ::ATOMIC_LOAD_OIHF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
case SystemZ::ATOMIC_LOADW_XR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
case SystemZ::ATOMIC_LOADW_XILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
case SystemZ::ATOMIC_LOAD_XR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
- case SystemZ::ATOMIC_LOAD_XILF32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 32);
+ case SystemZ::ATOMIC_LOAD_XILF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
case SystemZ::ATOMIC_LOAD_XGR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
- case SystemZ::ATOMIC_LOAD_XILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 64);
- case SystemZ::ATOMIC_LOAD_XIHF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF, 64);
+ case SystemZ::ATOMIC_LOAD_XILF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
+ case SystemZ::ATOMIC_LOAD_XIHF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
case SystemZ::ATOMIC_LOADW_NRi:
return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
case SystemZ::ATOMIC_LOADW_NILHi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
case SystemZ::ATOMIC_LOAD_NRi:
return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
- case SystemZ::ATOMIC_LOAD_NILL32i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32, true);
- case SystemZ::ATOMIC_LOAD_NILH32i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32, true);
- case SystemZ::ATOMIC_LOAD_NILF32i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32, true);
- case SystemZ::ATOMIC_LOAD_NGRi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
case SystemZ::ATOMIC_LOAD_NILLi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
case SystemZ::ATOMIC_LOAD_NILHi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64, true);
- case SystemZ::ATOMIC_LOAD_NIHLi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64, true);
- case SystemZ::ATOMIC_LOAD_NIHHi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
case SystemZ::ATOMIC_LOAD_NILFi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64, true);
- case SystemZ::ATOMIC_LOAD_NIHFi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
+ case SystemZ::ATOMIC_LOAD_NGRi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILL64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILH64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHL64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHH64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILF64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHF64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
case SystemZ::ATOMIC_LOADW_MIN:
return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
@@ -2481,8 +3221,27 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
case SystemZ::ATOMIC_CMP_SWAPW:
return emitAtomicCmpSwapW(MI, MBB);
- case SystemZ::MVCWrapper:
- return emitMVCWrapper(MI, MBB);
+ case SystemZ::MVCSequence:
+ case SystemZ::MVCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
+ case SystemZ::NCSequence:
+ case SystemZ::NCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::NC);
+ case SystemZ::OCSequence:
+ case SystemZ::OCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::OC);
+ case SystemZ::XCSequence:
+ case SystemZ::XCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::XC);
+ case SystemZ::CLCSequence:
+ case SystemZ::CLCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
+ case SystemZ::CLSTLoop:
+ return emitStringWrapper(MI, MBB, SystemZ::CLST);
+ case SystemZ::MVSTLoop:
+ return emitStringWrapper(MI, MBB, SystemZ::MVST);
+ case SystemZ::SRSTLoop:
+ return emitStringWrapper(MI, MBB, SystemZ::SRST);
default:
llvm_unreachable("Unexpected instr type to insert");
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index c0dbe49..c6dcca6 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -32,17 +32,32 @@ namespace SystemZISD {
// is the target address. The arguments start at operand 2.
// There is an optional glue operand at the end.
CALL,
+ SIBCALL,
// Wraps a TargetGlobalAddress that should be loaded using PC-relative
// accesses (LARL). Operand 0 is the address.
PCREL_WRAPPER,
- // Signed integer and floating-point comparisons. The operands are the
- // two values to compare.
- CMP,
+ // Used in cases where an offset is applied to a TargetGlobalAddress.
+ // Operand 0 is the full TargetGlobalAddress and operand 1 is a
+ // PCREL_WRAPPER for an anchor point. This is used so that we can
+ // cheaply refer to either the full address or the anchor point
+ // as a register base.
+ PCREL_OFFSET,
- // Likewise unsigned integer comparison.
- UCMP,
+ // Integer comparisons. There are three operands: the two values
+ // to compare, and an integer of type SystemZICMP.
+ ICMP,
+
+ // Floating-point comparisons. The two operands are the values to compare.
+ FCMP,
+
+ // Test under mask. The first operand is ANDed with the second operand
+ // and the condition codes are set on the result. The third operand is
+ // a boolean that is true if the condition codes need to distinguish
+ // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the
+ // register forms do but the memory forms don't).
+ TM,
// Branches if a condition is true. Operand 0 is the chain operand;
// operand 1 is the 4-bit condition-code mask, with bit N in
@@ -73,13 +88,50 @@ namespace SystemZISD {
UDIVREM32,
UDIVREM64,
- // Use MVC to copy bytes from one memory location to another.
- // The first operand is the target address, the second operand is the
- // source address, and the third operand is the constant length.
+ // Use a series of MVCs to copy bytes from one memory location to another.
+ // The operands are:
+ // - the target address
+ // - the source address
+ // - the constant length
+ //
// This isn't a memory opcode because we'd need to attach two
// MachineMemOperands rather than one.
MVC,
+ // Like MVC, but implemented as a loop that handles X*256 bytes
+ // followed by straight-line code to handle the rest (if any).
+ // The value of X is passed as an additional operand.
+ MVC_LOOP,
+
+ // Similar to MVC and MVC_LOOP, but for logic operations (AND, OR, XOR).
+ NC,
+ NC_LOOP,
+ OC,
+ OC_LOOP,
+ XC,
+ XC_LOOP,
+
+ // Use CLC to compare two blocks of memory, with the same comments
+ // as for MVC and MVC_LOOP.
+ CLC,
+ CLC_LOOP,
+
+ // Use an MVST-based sequence to implement stpcpy().
+ STPCPY,
+
+ // Use a CLST-based sequence to implement strcmp(). The two input operands
+ // are the addresses of the strings to compare.
+ STRCMP,
+
+ // Use an SRST-based sequence to search a block of memory. The first
+ // operand is the end address, the second is the start, and the third
+ // is the character to search for. CC is set to 1 on success and 2
+ // on failure.
+ SEARCH_STRING,
+
+ // Store the CC value in bits 29 and 28 of an integer.
+ IPM,
+
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
@@ -111,7 +163,27 @@ namespace SystemZISD {
// operand into the high bits
// Operand 4: the negative of operand 2, for rotating the other way
// Operand 5: the width of the field in bits (8 or 16)
- ATOMIC_CMP_SWAPW
+ ATOMIC_CMP_SWAPW,
+
+ // Prefetch from the second operand using the 4-bit control code in
+ // the first operand. The code is 1 for a load prefetch and 2 for
+ // a store prefetch.
+ PREFETCH
+ };
+
+ // Return true if OPCODE is some kind of PC-relative address.
+ inline bool isPCREL(unsigned Opcode) {
+ return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET;
+ }
+}
+
+namespace SystemZICMP {
+ // Describes whether an integer comparison needs to be signed or unsigned,
+ // or whether either type is OK.
+ enum {
+ Any,
+ UnsignedOnly,
+ SignedOnly
};
}
@@ -126,15 +198,15 @@ public:
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const LLVM_OVERRIDE {
return MVT::i32;
}
- virtual EVT getSetCCResultType(LLVMContext &, EVT) const LLVM_OVERRIDE {
- return MVT::i32;
- }
+ virtual EVT getSetCCResultType(LLVMContext &, EVT) const LLVM_OVERRIDE;
virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const LLVM_OVERRIDE;
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const LLVM_OVERRIDE;
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const
LLVM_OVERRIDE;
virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const
LLVM_OVERRIDE;
+ virtual bool isTruncateFree(Type *, Type *) const LLVM_OVERRIDE;
+ virtual bool isTruncateFree(EVT, EVT) const LLVM_OVERRIDE;
virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE;
virtual std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const std::string &Constraint,
@@ -154,6 +226,8 @@ public:
MachineBasicBlock *BB) const LLVM_OVERRIDE;
virtual SDValue LowerOperation(SDValue Op,
SelectionDAG &DAG) const LLVM_OVERRIDE;
+ virtual bool allowTruncateForTailCall(Type *, Type *) const LLVM_OVERRIDE;
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const LLVM_OVERRIDE;
virtual SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -176,6 +250,7 @@ private:
const SystemZTargetMachine &TM;
// Implement LowerOperation for individual opcodes.
+ SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
@@ -189,6 +264,7 @@ private:
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
@@ -199,6 +275,7 @@ private:
SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
@@ -230,8 +307,12 @@ private:
unsigned BitSize) const;
MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *emitMVCWrapper(MachineInstr *MI,
- MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Opcode) const;
+ MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Opcode) const;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index b903b51..6080046 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -27,9 +27,9 @@ defm CondStoreF64 : CondStores<FP64, nonvolatile_store,
// Load zero.
let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
- def LZER : InherentRRE<"lze", 0xB374, FP32, (fpimm0)>;
- def LZDR : InherentRRE<"lzd", 0xB375, FP64, (fpimm0)>;
- def LZXR : InherentRRE<"lzx", 0xB376, FP128, (fpimm0)>;
+ def LZER : InherentRRE<"lzer", 0xB374, FP32, (fpimm0)>;
+ def LZDR : InherentRRE<"lzdr", 0xB375, FP64, (fpimm0)>;
+ def LZXR : InherentRRE<"lzxr", 0xB376, FP128, (fpimm0)>;
}
// Moves between two floating-point registers.
@@ -62,7 +62,7 @@ let isCodeGenOnly = 1 in {
// The sign of an FP128 is in the high register.
def : Pat<(fcopysign FP32:$src1, FP128:$src2),
- (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_high))>;
+ (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
// fcopysign with an FP64 result.
let isCodeGenOnly = 1 in
@@ -71,24 +71,24 @@ def CPSDRdd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP64>;
// The sign of an FP128 is in the high register.
def : Pat<(fcopysign FP64:$src1, FP128:$src2),
- (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_high))>;
+ (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
// fcopysign with an FP128 result. Use "upper" as the high half and leave
// the low half as-is.
class CopySign128<RegisterOperand cls, dag upper>
: Pat<(fcopysign FP128:$src1, cls:$src2),
- (INSERT_SUBREG FP128:$src1, upper, subreg_high)>;
+ (INSERT_SUBREG FP128:$src1, upper, subreg_h64)>;
-def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_high),
+def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64),
FP32:$src2)>;
-def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high),
+def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
FP64:$src2)>;
-def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high),
- (EXTRACT_SUBREG FP128:$src2, subreg_high))>;
+def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
-defm LoadStoreF32 : MVCLoadStore<load, store, f32, MVCWrapper, 4>;
-defm LoadStoreF64 : MVCLoadStore<load, store, f64, MVCWrapper, 8>;
-defm LoadStoreF128 : MVCLoadStore<load, store, f128, MVCWrapper, 16>;
+defm LoadStoreF32 : MVCLoadStore<load, f32, MVCSequence, 4>;
+defm LoadStoreF64 : MVCLoadStore<load, f64, MVCSequence, 8>;
+defm LoadStoreF128 : MVCLoadStore<load, f128, MVCSequence, 16>;
//===----------------------------------------------------------------------===//
// Load instructions
@@ -134,9 +134,9 @@ def LEXBR : UnaryRRE<"lexb", 0xB346, null_frag, FP128, FP128>;
def LDXBR : UnaryRRE<"ldxb", 0xB345, null_frag, FP128, FP128>;
def : Pat<(f32 (fround FP128:$src)),
- (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_32bit)>;
+ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
def : Pat<(f64 (fround FP128:$src)),
- (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_high)>;
+ (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
// Extend register floating-point values to wider representations.
def LDEBR : UnaryRRE<"ldeb", 0xB304, fextend, FP64, FP32>;
@@ -212,21 +212,56 @@ def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>;
def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>;
// Round to an integer, with the second operand (modifier M3) specifying
-// the rounding mode.
-//
-// These forms always check for inexact conditions. z196 added versions
-// that allow this to suppressed (as for fnearbyint), but we don't yet
-// support -march=z196.
+// the rounding mode. These forms always check for inexact conditions.
def FIEBR : UnaryRRF<"fieb", 0xB357, FP32, FP32>;
def FIDBR : UnaryRRF<"fidb", 0xB35F, FP64, FP64>;
def FIXBR : UnaryRRF<"fixb", 0xB347, FP128, FP128>;
+// Extended forms of the previous three instructions. M4 can be set to 4
+// to suppress detection of inexact conditions.
+def FIEBRA : UnaryRRF4<"fiebra", 0xB357, FP32, FP32>,
+ Requires<[FeatureFPExtension]>;
+def FIDBRA : UnaryRRF4<"fidbra", 0xB35F, FP64, FP64>,
+ Requires<[FeatureFPExtension]>;
+def FIXBRA : UnaryRRF4<"fixbra", 0xB347, FP128, FP128>,
+ Requires<[FeatureFPExtension]>;
+
// frint rounds according to the current mode (modifier 0) and detects
// inexact conditions.
def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>;
def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>;
def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
+let Predicates = [FeatureFPExtension] in {
+ // fnearbyint is like frint but does not detect inexact conditions.
+ def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>;
+ def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>;
+ def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
+
+ // floor is no longer allowed to raise an inexact condition,
+ // so restrict it to the cases where the condition can be suppressed.
+ // Mode 7 is round towards -inf.
+ def : Pat<(ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>;
+ def : Pat<(ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>;
+ def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
+
+ // Same idea for ceil, where mode 6 is round towards +inf.
+ def : Pat<(fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>;
+ def : Pat<(fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>;
+ def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
+
+ // Same idea for trunc, where mode 5 is round towards zero.
+ def : Pat<(ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>;
+ def : Pat<(ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>;
+ def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
+
+ // Same idea for round, where mode 1 is round towards nearest with
+ // ties away from zero.
+ def : Pat<(frnd FP32:$src), (FIEBRA 1, FP32:$src, 4)>;
+ def : Pat<(frnd FP64:$src), (FIDBRA 1, FP64:$src, 4)>;
+ def : Pat<(frnd FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
+}
+
//===----------------------------------------------------------------------===//
// Binary arithmetic
//===----------------------------------------------------------------------===//
@@ -265,26 +300,26 @@ def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>;
def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>;
def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))),
(MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- FP32:$src1, subreg_32bit), FP32:$src2)>;
+ FP32:$src1, subreg_h32), FP32:$src2)>;
// f64 multiplication of an FP32 register and an f32 memory.
def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
def : Pat<(fmul (f64 (fextend FP32:$src1)),
(f64 (extloadf32 bdxaddr12only:$addr))),
- (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_32bit),
+ (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
bdxaddr12only:$addr)>;
// f128 multiplication of two FP64 registers.
def MXDBR : BinaryRRE<"mxdb", 0xB307, null_frag, FP128, FP64>;
def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (fextend FP64:$src2))),
(MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
- FP64:$src1, subreg_high), FP64:$src2)>;
+ FP64:$src1, subreg_h64), FP64:$src2)>;
// f128 multiplication of an FP64 register and an f64 memory.
def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
def : Pat<(fmul (f128 (fextend FP64:$src1)),
(f128 (extloadf64 bdxaddr12only:$addr))),
- (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_high),
+ (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
bdxaddr12only:$addr)>;
// Fused multiply-add.
@@ -314,12 +349,12 @@ def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
//===----------------------------------------------------------------------===//
let Defs = [CC], CCValues = 0xF in {
- def CEBR : CompareRRE<"ceb", 0xB309, z_cmp, FP32, FP32>;
- def CDBR : CompareRRE<"cdb", 0xB319, z_cmp, FP64, FP64>;
- def CXBR : CompareRRE<"cxb", 0xB349, z_cmp, FP128, FP128>;
+ def CEBR : CompareRRE<"ceb", 0xB309, z_fcmp, FP32, FP32>;
+ def CDBR : CompareRRE<"cdb", 0xB319, z_fcmp, FP64, FP64>;
+ def CXBR : CompareRRE<"cxb", 0xB349, z_fcmp, FP128, FP128>;
- def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load, 4>;
- def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load, 8>;
+ def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>;
+ def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 954df11..a8efe16 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -308,10 +308,11 @@ class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> R1;
bits<4> R2;
bits<4> R3;
+ bits<4> R4;
let Inst{31-16} = op;
let Inst{15-12} = R3;
- let Inst{11-8} = 0;
+ let Inst{11-8} = R4;
let Inst{7-4} = R1;
let Inst{3-0} = R2;
}
@@ -539,6 +540,10 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
// One output operand and five input operands. The first two operands
// are registers and the other three are immediates.
//
+// Prefetch:
+// One 4-bit immediate operand and one address operand. The immediate
+// operand is 1 for a load prefetch and 2 for a store prefetch.
+//
// The format determines which input operands are tied to output operands,
// and also determines the shape of any address operand.
//
@@ -552,7 +557,7 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
dag src>
: InstRRE<opcode, (outs cls:$R1), (ins),
- mnemonic#"r\t$R1",
+ mnemonic#"\t$R1",
[(set cls:$R1, src)]> {
let R2 = 0;
}
@@ -626,27 +631,33 @@ class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
let mayStore = 1;
}
+// StoreSI* instructions are used to store an integer to memory, but the
+// addresses are more restricted than for normal stores. If we are in the
+// situation of having to force either the address into a register or the
+// constant into a register, it's usually better to do the latter.
+// We therefore match the address in the same way as a normal store and
+// only use the StoreSI* instruction if the matched address is suitable.
class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
- Immediate imm, AddressingMode mode = bdaddr12only>
- : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ Immediate imm>
+ : InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
- [(operator imm:$I2, mode:$BD1)]> {
+ [(operator imm:$I2, mviaddr12pair:$BD1)]> {
let mayStore = 1;
}
class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- Immediate imm, AddressingMode mode = bdaddr20only>
- : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ Immediate imm>
+ : InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
- [(operator imm:$I2, mode:$BD1)]> {
+ [(operator imm:$I2, mviaddr20pair:$BD1)]> {
let mayStore = 1;
}
class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
Immediate imm>
- : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+ : InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
- [(operator imm:$I2, bdaddr12only:$BD1)]> {
+ [(operator imm:$I2, mviaddr12pair:$BD1)]> {
let mayStore = 1;
}
@@ -654,9 +665,9 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
SDPatternOperator operator, Immediate imm> {
let DispKey = mnemonic in {
let DispSize = "12" in
- def "" : StoreSI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
+ def "" : StoreSI<mnemonic, siOpcode, operator, imm>;
let DispSize = "20" in
- def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>;
+ def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm>;
}
}
@@ -719,8 +730,14 @@ class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
mnemonic#"r\t$R1, $R3, $R2", []> {
let OpKey = mnemonic ## cls1;
let OpType = "reg";
+ let R4 = 0;
}
+class UnaryRRF4<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2, uimm8zx4:$R4),
+ mnemonic#"\t$R1, $R3, $R2, $R4", []>;
+
// These instructions are generated by if conversion. The old value of R1
// is added as an implicit use.
class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
@@ -729,6 +746,7 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
mnemonic#"r$R3\t$R1, $R2", []>,
Requires<[FeatureLoadStoreOnCond]> {
let CCMaskLast = 1;
+ let R4 = 0;
}
// Like CondUnaryRRF, but used for the raw assembly form. The condition-code
@@ -740,6 +758,7 @@ class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
Requires<[FeatureLoadStoreOnCond]> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
+ let R4 = 0;
}
// Like CondUnaryRRF, but with a fixed CC mask.
@@ -751,6 +770,7 @@ class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let R3 = ccmask;
+ let R4 = 0;
}
class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
@@ -898,13 +918,16 @@ class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
[(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]> {
let OpKey = mnemonic ## cls1;
let OpType = "reg";
+ let R4 = 0;
}
class BinaryRRFK<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
: InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R2, cls2:$R3),
mnemonic#"rk\t$R1, $R2, $R3",
- [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]>;
+ [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]> {
+ let R4 = 0;
+}
multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
SDPatternOperator operator, RegisterOperand cls1,
@@ -1285,6 +1308,22 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let DisableEncoding = "$R1src";
}
+class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
+ : InstRXY<opcode, (outs), (ins uimm8zx4:$R1, bdxaddr20only:$XBD2),
+ mnemonic##"\t$R1, $XBD2",
+ [(operator uimm8zx4:$R1, bdxaddr20only:$XBD2)]>;
+
+class PrefetchRILPC<string mnemonic, bits<12> opcode,
+ SDPatternOperator operator>
+ : InstRIL<opcode, (outs), (ins uimm8zx4:$R1, pcrel32:$I2),
+ mnemonic##"\t$R1, $I2",
+ [(operator uimm8zx4:$R1, pcrel32:$I2)]> {
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
// A floating-point load-and test operation. Create both a normal unary
// operation and one that acts as a comparison against zero.
multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
@@ -1310,6 +1349,100 @@ class Pseudo<dag outs, dag ins, list<dag> pattern>
let isCodeGenOnly = 1;
}
+// Like UnaryRI, but expanded after RA depending on the choice of register.
+class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs cls:$R1), (ins imm:$I2),
+ [(set cls:$R1, (operator imm:$I2))]>;
+
+// Like UnaryRXY, but expanded after RA depending on the choice of register.
+class UnaryRXYPseudo<string key, SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : Pseudo<(outs cls:$R1), (ins mode:$XBD2),
+ [(set cls:$R1, (operator mode:$XBD2))]> {
+ let OpKey = key ## cls;
+ let OpType = "mem";
+ let mayLoad = 1;
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+ let AccessBytes = bytes;
+}
+
+// Like UnaryRR, but expanded after RA depending on the choice of registers.
+class UnaryRRPseudo<string key, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : Pseudo<(outs cls1:$R1), (ins cls2:$R2),
+ [(set cls1:$R1, (operator cls2:$R2))]> {
+ let OpKey = key ## cls1;
+ let OpType = "reg";
+}
+
+// Like BinaryRI, but expanded after RA depending on the choice of register.
+class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+}
+
+// Like BinaryRIE, but expanded after RA depending on the choice of register.
+class BinaryRIEPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2),
+ [(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
+
+// Like BinaryRIAndK, but expanded after RA depending on the choice of register.
+multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm> {
+ let NumOpsKey = key in {
+ let NumOpsValue = "3" in
+ def K : BinaryRIEPseudo<null_frag, cls, imm>,
+ Requires<[FeatureHighWord, FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRIPseudo<operator, cls, imm>,
+ Requires<[FeatureHighWord]>;
+ }
+}
+
+// Like CompareRI, but expanded after RA depending on the choice of register.
+class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]>;
+
+// Like CompareRXY, but expanded after RA depending on the choice of register.
+class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
+ SDPatternOperator load, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : Pseudo<(outs), (ins cls:$R1, mode:$XBD2),
+ [(operator cls:$R1, (load mode:$XBD2))]> {
+ let mayLoad = 1;
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+ let AccessBytes = bytes;
+}
+
+// Like StoreRXY, but expanded after RA depending on the choice of register.
+class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdxaddr20only>
+ : Pseudo<(outs), (ins cls:$R1, mode:$XBD2),
+ [(operator cls:$R1, mode:$XBD2)]> {
+ let mayStore = 1;
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+ let AccessBytes = bytes;
+}
+
+// Like RotateSelectRIEf, but expanded after RA depending on the choice
+// of registers.
+class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2>
+ : Pseudo<(outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5),
+ []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is
// the value of the PSW's 2-bit condition code field.
class SelectWrapper<RegisterOperand cls>
@@ -1386,3 +1519,85 @@ class AtomicLoadWBinaryReg<SDPatternOperator operator>
: AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>;
class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
: AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
+
+// Define an instruction that operates on two fixed-length blocks of memory,
+// and associated pseudo instructions for operating on blocks of any size.
+// The Sequence form uses a straight-line sequence of instructions and
+// the Loop form uses a loop of length-256 instructions followed by
+// another instruction to handle the excess.
+multiclass MemorySS<string mnemonic, bits<8> opcode,
+ SDPatternOperator sequence, SDPatternOperator loop> {
+ def "" : InstSS<opcode, (outs), (ins bdladdr12onlylen8:$BDL1,
+ bdaddr12only:$BD2),
+ mnemonic##"\t$BDL1, $BD2", []>;
+ let usesCustomInserter = 1 in {
+ def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length),
+ [(sequence bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length)]>;
+ def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256),
+ [(loop bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256)]>;
+ }
+}
+
+// Define an instruction that operates on two strings, both terminated
+// by the character in R0. The instruction processes a CPU-determinated
+// number of bytes at a time and sets CC to 3 if the instruction needs
+// to be repeated. Also define a pseudo instruction that represents
+// the full loop (the main instruction plus the branch on CC==3).
+multiclass StringRRE<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator> {
+ def "" : InstRRE<opcode, (outs GR64:$R1, GR64:$R2),
+ (ins GR64:$R1src, GR64:$R2src),
+ mnemonic#"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src, $R2 = $R2src";
+ let DisableEncoding = "$R1src, $R2src";
+ }
+ let usesCustomInserter = 1 in
+ def Loop : Pseudo<(outs GR64:$end),
+ (ins GR64:$start1, GR64:$start2, GR32:$char),
+ [(set GR64:$end, (operator GR64:$start1, GR64:$start2,
+ GR32:$char))]>;
+}
+
+// A pseudo instruction that is a direct alias of a real instruction.
+// These aliases are used in cases where a particular register operand is
+// fixed or where the same instruction is used with different register sizes.
+// The size parameter is the size in bytes of the associated real instruction.
+class Alias<int size, dag outs, dag ins, list<dag> pattern>
+ : InstSystemZ<size, outs, ins, "", pattern> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
+
+// An alias of a BinaryRI, but with different register sizes.
+class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+}
+
+// An alias of a BinaryRIL, but with different register sizes.
+class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+}
+
+// An alias of a CompareRI, but with different register sizes.
+class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Alias<4, (outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]> {
+ let isCompare = 1;
+}
+
+// An alias of a RotateSelectRIEf, but with different register sizes.
+class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2>
+ : Alias<6, (outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), []> {
+ let Constraints = "$R1 = $R1src";
+}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 9ee60aa..acfeed8 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -17,7 +17,7 @@
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRMAP_INFO
#include "SystemZGenInstrInfo.inc"
@@ -28,6 +28,18 @@ static uint64_t allOnes(unsigned int Count) {
return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
}
+// Reg should be a 32-bit GPR. Return true if it is a high register rather
+// than a low register.
+static bool isHighReg(unsigned int Reg) {
+ if (SystemZ::GRH32BitRegClass.contains(Reg))
+ return true;
+ assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32");
+ return false;
+}
+
+// Pin the vtable to this file.
+void SystemZInstrInfo::anchor() {}
+
SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
: SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
RI(tm), TM(tm) {
@@ -48,8 +60,8 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
// Set up the two 64-bit registers.
MachineOperand &HighRegOp = EarlierMI->getOperand(0);
MachineOperand &LowRegOp = MI->getOperand(0);
- HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_high));
- LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_low));
+ HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64));
+ LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64));
// The address in the first (high) instruction is already correct.
// Adjust the offset in the second (low) instruction.
@@ -82,6 +94,97 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
OffsetMO.setImm(Offset);
}
+// MI is an RI-style pseudo instruction. Replace it with LowOpcode
+// if the first operand is a low GR32 and HighOpcode if the first operand
+// is a high GR32. ConvertHigh is true if LowOpcode takes a signed operand
+// and HighOpcode takes an unsigned 32-bit operand. In those cases,
+// MI has the same kind of operand as LowOpcode, so needs to be converted
+// if HighOpcode is used.
+void SystemZInstrInfo::expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned HighOpcode,
+ bool ConvertHigh) const {
+ unsigned Reg = MI->getOperand(0).getReg();
+ bool IsHigh = isHighReg(Reg);
+ MI->setDesc(get(IsHigh ? HighOpcode : LowOpcode));
+ if (IsHigh && ConvertHigh)
+ MI->getOperand(1).setImm(uint32_t(MI->getOperand(1).getImm()));
+}
+
+// MI is a three-operand RIE-style pseudo instruction. Replace it with
+// LowOpcode3 if the registers are both low GR32s, otherwise use a move
+// followed by HighOpcode or LowOpcode, depending on whether the target
+// is a high or low GR32.
+void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned LowOpcodeK,
+ unsigned HighOpcode) const {
+ unsigned DestReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool DestIsHigh = isHighReg(DestReg);
+ bool SrcIsHigh = isHighReg(SrcReg);
+ if (!DestIsHigh && !SrcIsHigh)
+ MI->setDesc(get(LowOpcodeK));
+ else {
+ emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(),
+ DestReg, SrcReg, SystemZ::LR, 32,
+ MI->getOperand(1).isKill());
+ MI->setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
+ MI->getOperand(1).setReg(DestReg);
+ }
+}
+
+// MI is an RXY-style pseudo instruction. Replace it with LowOpcode
+// if the first operand is a low GR32 and HighOpcode if the first operand
+// is a high GR32.
+void SystemZInstrInfo::expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned HighOpcode) const {
+ unsigned Reg = MI->getOperand(0).getReg();
+ unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode,
+ MI->getOperand(2).getImm());
+ MI->setDesc(get(Opcode));
+}
+
+// MI is an RR-style pseudo instruction that zero-extends the low Size bits
+// of one GRX32 into another. Replace it with LowOpcode if both operands
+// are low registers, otherwise use RISB[LH]G.
+void SystemZInstrInfo::expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned Size) const {
+ emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(),
+ MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
+ LowOpcode, Size, MI->getOperand(1).isKill());
+ MI->eraseFromParent();
+}
+
+// Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR
+// DestReg before MBBI in MBB. Use LowLowOpcode when both DestReg and SrcReg
+// are low registers, otherwise use RISB[LH]G. Size is the number of bits
+// taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR).
+// KillSrc is true if this move is the last use of SrcReg.
+void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg, unsigned LowLowOpcode,
+ unsigned Size, bool KillSrc) const {
+ unsigned Opcode;
+ bool DestIsHigh = isHighReg(DestReg);
+ bool SrcIsHigh = isHighReg(SrcReg);
+ if (DestIsHigh && SrcIsHigh)
+ Opcode = SystemZ::RISBHH;
+ else if (DestIsHigh && !SrcIsHigh)
+ Opcode = SystemZ::RISBHL;
+ else if (!DestIsHigh && SrcIsHigh)
+ Opcode = SystemZ::RISBLH;
+ else {
+ BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0);
+ BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
+ .addReg(DestReg, RegState::Undef)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(32 - Size).addImm(128 + 31).addImm(Rotate);
+}
+
// If MI is a simple load or store for a frame object, return the register
// it loads or stores and set FrameIndex to the index of the frame object.
// Return 0 otherwise.
@@ -293,6 +396,103 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
return Count;
}
+bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const {
+ assert(MI->isCompare() && "Caller should have checked for a comparison");
+
+ if (MI->getNumExplicitOperands() == 2 &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isImm()) {
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
+ Value = MI->getOperand(1).getImm();
+ Mask = ~0;
+ return true;
+ }
+
+ return false;
+}
+
+// If Reg is a virtual register, return its definition, otherwise return null.
+static MachineInstr *getDef(unsigned Reg,
+ const MachineRegisterInfo *MRI) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return 0;
+ return MRI->getUniqueVRegDef(Reg);
+}
+
+// Return true if MI is a shift of type Opcode by Imm bits.
+static bool isShift(MachineInstr *MI, int Opcode, int64_t Imm) {
+ return (MI->getOpcode() == Opcode &&
+ !MI->getOperand(2).getReg() &&
+ MI->getOperand(3).getImm() == Imm);
+}
+
+// If the destination of MI has no uses, delete it as dead.
+static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) {
+ if (MRI->use_nodbg_empty(MI->getOperand(0).getReg()))
+ MI->eraseFromParent();
+}
+
+// Compare compares SrcReg against zero. Check whether SrcReg contains
+// the result of an IPM sequence whose input CC survives until Compare,
+// and whether Compare is therefore redundant. Delete it and return
+// true if so.
+static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
+ const MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI) {
+ MachineInstr *LGFR = 0;
+ MachineInstr *RLL = getDef(SrcReg, MRI);
+ if (RLL && RLL->getOpcode() == SystemZ::LGFR) {
+ LGFR = RLL;
+ RLL = getDef(LGFR->getOperand(1).getReg(), MRI);
+ }
+ if (!RLL || !isShift(RLL, SystemZ::RLL, 31))
+ return false;
+
+ MachineInstr *SRL = getDef(RLL->getOperand(1).getReg(), MRI);
+ if (!SRL || !isShift(SRL, SystemZ::SRL, SystemZ::IPM_CC))
+ return false;
+
+ MachineInstr *IPM = getDef(SRL->getOperand(1).getReg(), MRI);
+ if (!IPM || IPM->getOpcode() != SystemZ::IPM)
+ return false;
+
+ // Check that there are no assignments to CC between the IPM and Compare,
+ if (IPM->getParent() != Compare->getParent())
+ return false;
+ MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare;
+ for (++MBBI; MBBI != MBBE; ++MBBI) {
+ MachineInstr *MI = MBBI;
+ if (MI->modifiesRegister(SystemZ::CC, TRI))
+ return false;
+ }
+
+ Compare->eraseFromParent();
+ if (LGFR)
+ eraseIfDead(LGFR, MRI);
+ eraseIfDead(RLL, MRI);
+ eraseIfDead(SRL, MRI);
+ eraseIfDead(IPM, MRI);
+
+ return true;
+}
+
+bool
+SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const {
+ assert(!SrcReg2 && "Only optimizing constant comparisons so far");
+ bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0;
+ if (Value == 0 &&
+ !IsLogical &&
+ removeIPMBasedCompare(Compare, SrcReg, MRI, TM.getRegisterInfo()))
+ return true;
+ return false;
+}
+
// If Opcode is a move that has a conditional variant, return that variant,
// otherwise return 0.
static unsigned getConditionalMove(unsigned Opcode) {
@@ -356,18 +556,21 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool KillSrc) const {
// Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too.
if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
- copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_high),
- RI.getSubReg(SrcReg, SystemZ::subreg_high), KillSrc);
- copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_low),
- RI.getSubReg(SrcReg, SystemZ::subreg_low), KillSrc);
+ copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_h64),
+ RI.getSubReg(SrcReg, SystemZ::subreg_h64), KillSrc);
+ copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_l64),
+ RI.getSubReg(SrcReg, SystemZ::subreg_l64), KillSrc);
+ return;
+ }
+
+ if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) {
+ emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc);
return;
}
// Everything else needs only one instruction.
unsigned Opcode;
- if (SystemZ::GR32BitRegClass.contains(DestReg, SrcReg))
- Opcode = SystemZ::LR;
- else if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
+ if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LGR;
else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LER;
@@ -438,15 +641,15 @@ namespace {
static LogicOp interpretAndImmediate(unsigned Opcode) {
switch (Opcode) {
- case SystemZ::NILL32: return LogicOp(32, 0, 16);
- case SystemZ::NILH32: return LogicOp(32, 16, 16);
- case SystemZ::NILL: return LogicOp(64, 0, 16);
- case SystemZ::NILH: return LogicOp(64, 16, 16);
- case SystemZ::NIHL: return LogicOp(64, 32, 16);
- case SystemZ::NIHH: return LogicOp(64, 48, 16);
- case SystemZ::NILF32: return LogicOp(32, 0, 32);
- case SystemZ::NILF: return LogicOp(64, 0, 32);
- case SystemZ::NIHF: return LogicOp(64, 32, 32);
+ case SystemZ::NILMux: return LogicOp(32, 0, 16);
+ case SystemZ::NIHMux: return LogicOp(32, 16, 16);
+ case SystemZ::NILL64: return LogicOp(64, 0, 16);
+ case SystemZ::NILH64: return LogicOp(64, 16, 16);
+ case SystemZ::NIHL64: return LogicOp(64, 32, 16);
+ case SystemZ::NIHH64: return LogicOp(64, 48, 16);
+ case SystemZ::NIFMux: return LogicOp(32, 0, 32);
+ case SystemZ::NILF64: return LogicOp(64, 0, 32);
+ case SystemZ::NIHF64: return LogicOp(64, 32, 32);
default: return LogicOp();
}
}
@@ -473,6 +676,7 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LiveVariables *LV) const {
MachineInstr *MI = MBBI;
MachineBasicBlock *MBB = MI->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
unsigned Opcode = MI->getOpcode();
unsigned NumOps = MI->getNumOperands();
@@ -482,10 +686,23 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// because it tends to be shorter and because some instructions
// have memory forms that can be used during spilling.
if (TM.getSubtargetImpl()->hasDistinctOps()) {
+ MachineOperand &Dest = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ unsigned DestReg = Dest.getReg();
+ unsigned SrcReg = Src.getReg();
+ // AHIMux is only really a three-operand instruction when both operands
+ // are low registers. Try to constrain both operands to be low if
+ // possible.
+ if (Opcode == SystemZ::AHIMux &&
+ TargetRegisterInfo::isVirtualRegister(DestReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ MRI.getRegClass(DestReg)->contains(SystemZ::R1L) &&
+ MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) {
+ MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass);
+ MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass);
+ }
int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
if (ThreeOperandOpcode >= 0) {
- MachineOperand &Dest = MI->getOperand(0);
- MachineOperand &Src = MI->getOperand(1);
MachineInstrBuilder MIB =
BuildMI(*MBB, MBBI, MI->getDebugLoc(), get(ThreeOperandOpcode))
.addOperand(Dest);
@@ -500,34 +717,27 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// Try to convert an AND into an RISBG-type instruction.
if (LogicOp And = interpretAndImmediate(Opcode)) {
- unsigned NewOpcode;
- if (And.RegSize == 64)
- NewOpcode = SystemZ::RISBG;
- else if (TM.getSubtargetImpl()->hasHighWord())
- NewOpcode = SystemZ::RISBLG32;
- else
- // We can't use RISBG for 32-bit operations because it clobbers the
- // high word of the destination too.
- NewOpcode = 0;
- if (NewOpcode) {
- uint64_t Imm = MI->getOperand(2).getImm() << And.ImmLSB;
- // AND IMMEDIATE leaves the other bits of the register unchanged.
- Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
- unsigned Start, End;
- if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
- if (NewOpcode == SystemZ::RISBLG32) {
- Start &= 31;
- End &= 31;
- }
- MachineOperand &Dest = MI->getOperand(0);
- MachineOperand &Src = MI->getOperand(1);
- MachineInstrBuilder MIB =
- BuildMI(*MBB, MI, MI->getDebugLoc(), get(NewOpcode))
- .addOperand(Dest).addReg(0)
- .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg())
- .addImm(Start).addImm(End + 128).addImm(0);
- return finishConvertToThreeAddress(MI, MIB, LV);
+ uint64_t Imm = MI->getOperand(2).getImm() << And.ImmLSB;
+ // AND IMMEDIATE leaves the other bits of the register unchanged.
+ Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
+ unsigned Start, End;
+ if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
+ unsigned NewOpcode;
+ if (And.RegSize == 64)
+ NewOpcode = SystemZ::RISBG;
+ else {
+ NewOpcode = SystemZ::RISBMux;
+ Start &= 31;
+ End &= 31;
}
+ MachineOperand &Dest = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(NewOpcode))
+ .addOperand(Dest).addReg(0)
+ .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg())
+ .addImm(Start).addImm(End + 128).addImm(0);
+ return finishConvertToThreeAddress(MI, MIB, LV);
}
}
return 0;
@@ -540,8 +750,21 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
int FrameIndex) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
+ unsigned Opcode = MI->getOpcode();
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ if ((Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
+ isInt<8>(MI->getOperand(2).getImm()) &&
+ !MI->getOperand(3).getReg()) {
+ // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
+ return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::AGSI))
+ .addFrameIndex(FrameIndex).addImm(0)
+ .addImm(MI->getOperand(2).getImm());
+ }
+ return 0;
+ }
- // Eary exit for cases we don't care about
+ // All other cases require a single operand.
if (Ops.size() != 1)
return 0;
@@ -550,7 +773,16 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
.getRegClass(MI->getOperand(OpNum).getReg())->getSize() &&
"Invalid size combination");
- unsigned Opcode = MI->getOpcode();
+ if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) &&
+ OpNum == 0 &&
+ isInt<8>(MI->getOperand(2).getImm())) {
+ // A(G)HI %reg, CONST -> A(G)SI %mem, CONST
+ Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI);
+ return BuildMI(MF, MI->getDebugLoc(), get(Opcode))
+ .addFrameIndex(FrameIndex).addImm(0)
+ .addImm(MI->getOperand(2).getImm());
+ }
+
if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) {
bool Op0IsGPR = (Opcode == SystemZ::LGDR);
bool Op1IsGPR = (Opcode == SystemZ::LDGR);
@@ -577,10 +809,14 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
//
// Although MVC is in practice a fast choice in these cases, it is still
// logically a bytewise copy. This means that we cannot use it if the
- // load or store is volatile. It also means that the transformation is
- // not valid in cases where the two memories partially overlap; however,
- // that is not a problem here, because we know that one of the memories
- // is a full frame index.
+ // load or store is volatile. We also wouldn't be able to use MVC if
+ // the two memories partially overlap, but that case cannot occur here,
+ // because we know that one of the memories is a full frame index.
+ //
+ // For performance reasons, we also want to avoid using MVC if the addresses
+ // might be equal. We don't worry about that case here, because spill slot
+ // coloring happens later, and because we have special code to remove
+ // MVCs that turn out to be redundant.
if (OpNum == 0 && MI->hasOneMemOperand()) {
MachineMemOperand *MMO = *MI->memoperands_begin();
if (MMO->getSize() == Size && !MMO->isVolatile()) {
@@ -651,6 +887,138 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
splitMove(MI, SystemZ::STD);
return true;
+ case SystemZ::LBMux:
+ expandRXYPseudo(MI, SystemZ::LB, SystemZ::LBH);
+ return true;
+
+ case SystemZ::LHMux:
+ expandRXYPseudo(MI, SystemZ::LH, SystemZ::LHH);
+ return true;
+
+ case SystemZ::LLCRMux:
+ expandZExtPseudo(MI, SystemZ::LLCR, 8);
+ return true;
+
+ case SystemZ::LLHRMux:
+ expandZExtPseudo(MI, SystemZ::LLHR, 16);
+ return true;
+
+ case SystemZ::LLCMux:
+ expandRXYPseudo(MI, SystemZ::LLC, SystemZ::LLCH);
+ return true;
+
+ case SystemZ::LLHMux:
+ expandRXYPseudo(MI, SystemZ::LLH, SystemZ::LLHH);
+ return true;
+
+ case SystemZ::LMux:
+ expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH);
+ return true;
+
+ case SystemZ::STCMux:
+ expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
+ return true;
+
+ case SystemZ::STHMux:
+ expandRXYPseudo(MI, SystemZ::STH, SystemZ::STHH);
+ return true;
+
+ case SystemZ::STMux:
+ expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH);
+ return true;
+
+ case SystemZ::LHIMux:
+ expandRIPseudo(MI, SystemZ::LHI, SystemZ::IIHF, true);
+ return true;
+
+ case SystemZ::IIFMux:
+ expandRIPseudo(MI, SystemZ::IILF, SystemZ::IIHF, false);
+ return true;
+
+ case SystemZ::IILMux:
+ expandRIPseudo(MI, SystemZ::IILL, SystemZ::IIHL, false);
+ return true;
+
+ case SystemZ::IIHMux:
+ expandRIPseudo(MI, SystemZ::IILH, SystemZ::IIHH, false);
+ return true;
+
+ case SystemZ::NIFMux:
+ expandRIPseudo(MI, SystemZ::NILF, SystemZ::NIHF, false);
+ return true;
+
+ case SystemZ::NILMux:
+ expandRIPseudo(MI, SystemZ::NILL, SystemZ::NIHL, false);
+ return true;
+
+ case SystemZ::NIHMux:
+ expandRIPseudo(MI, SystemZ::NILH, SystemZ::NIHH, false);
+ return true;
+
+ case SystemZ::OIFMux:
+ expandRIPseudo(MI, SystemZ::OILF, SystemZ::OIHF, false);
+ return true;
+
+ case SystemZ::OILMux:
+ expandRIPseudo(MI, SystemZ::OILL, SystemZ::OIHL, false);
+ return true;
+
+ case SystemZ::OIHMux:
+ expandRIPseudo(MI, SystemZ::OILH, SystemZ::OIHH, false);
+ return true;
+
+ case SystemZ::XIFMux:
+ expandRIPseudo(MI, SystemZ::XILF, SystemZ::XIHF, false);
+ return true;
+
+ case SystemZ::TMLMux:
+ expandRIPseudo(MI, SystemZ::TMLL, SystemZ::TMHL, false);
+ return true;
+
+ case SystemZ::TMHMux:
+ expandRIPseudo(MI, SystemZ::TMLH, SystemZ::TMHH, false);
+ return true;
+
+ case SystemZ::AHIMux:
+ expandRIPseudo(MI, SystemZ::AHI, SystemZ::AIH, false);
+ return true;
+
+ case SystemZ::AHIMuxK:
+ expandRIEPseudo(MI, SystemZ::AHI, SystemZ::AHIK, SystemZ::AIH);
+ return true;
+
+ case SystemZ::AFIMux:
+ expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false);
+ return true;
+
+ case SystemZ::CFIMux:
+ expandRIPseudo(MI, SystemZ::CFI, SystemZ::CIH, false);
+ return true;
+
+ case SystemZ::CLFIMux:
+ expandRIPseudo(MI, SystemZ::CLFI, SystemZ::CLIH, false);
+ return true;
+
+ case SystemZ::CMux:
+ expandRXYPseudo(MI, SystemZ::C, SystemZ::CHF);
+ return true;
+
+ case SystemZ::CLMux:
+ expandRXYPseudo(MI, SystemZ::CL, SystemZ::CLHF);
+ return true;
+
+ case SystemZ::RISBMux: {
+ bool DestIsHigh = isHighReg(MI->getOperand(0).getReg());
+ bool SrcIsHigh = isHighReg(MI->getOperand(2).getReg());
+ if (SrcIsHigh == DestIsHigh)
+ MI->setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL));
+ else {
+ MI->setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH));
+ MI->getOperand(5).setImm(MI->getOperand(5).getImm() ^ 32);
+ }
+ return true;
+ }
+
case SystemZ::ADJDYNALLOC:
splitAdjDynAlloc(MI);
return true;
@@ -697,11 +1065,21 @@ SystemZInstrInfo::getBranchInfo(const MachineInstr *MI) const {
return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP,
MI->getOperand(2).getImm(), &MI->getOperand(3));
+ case SystemZ::CLIJ:
+ case SystemZ::CLRJ:
+ return SystemZII::Branch(SystemZII::BranchCL, SystemZ::CCMASK_ICMP,
+ MI->getOperand(2).getImm(), &MI->getOperand(3));
+
case SystemZ::CGIJ:
case SystemZ::CGRJ:
return SystemZII::Branch(SystemZII::BranchCG, SystemZ::CCMASK_ICMP,
MI->getOperand(2).getImm(), &MI->getOperand(3));
+ case SystemZ::CLGIJ:
+ case SystemZ::CLGRJ:
+ return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP,
+ MI->getOperand(2).getImm(), &MI->getOperand(3));
+
default:
llvm_unreachable("Unrecognized branch opcode");
}
@@ -712,7 +1090,13 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
unsigned &StoreOpcode) const {
if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) {
LoadOpcode = SystemZ::L;
- StoreOpcode = SystemZ::ST32;
+ StoreOpcode = SystemZ::ST;
+ } else if (RC == &SystemZ::GRH32BitRegClass) {
+ LoadOpcode = SystemZ::LFH;
+ StoreOpcode = SystemZ::STFH;
+ } else if (RC == &SystemZ::GRX32BitRegClass) {
+ LoadOpcode = SystemZ::LMux;
+ StoreOpcode = SystemZ::STMux;
} else if (RC == &SystemZ::GR64BitRegClass ||
RC == &SystemZ::ADDR64BitRegClass) {
LoadOpcode = SystemZ::LG;
@@ -830,6 +1214,14 @@ unsigned SystemZInstrInfo::getCompareAndBranch(unsigned Opcode,
return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CIJ : 0;
case SystemZ::CGHI:
return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CGIJ : 0;
+ case SystemZ::CLR:
+ return SystemZ::CLRJ;
+ case SystemZ::CLGR:
+ return SystemZ::CLGRJ;
+ case SystemZ::CLFI:
+ return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLIJ : 0;
+ case SystemZ::CLGFI:
+ return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLGIJ : 0;
default:
return 0;
}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 276fd3b..be4c8fe 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -70,10 +70,18 @@ namespace SystemZII {
// on the result.
BranchC,
+ // An instruction that peforms a 32-bit unsigned comparison and branches
+ // on the result.
+ BranchCL,
+
// An instruction that peforms a 64-bit signed comparison and branches
// on the result.
BranchCG,
+ // An instruction that peforms a 64-bit unsigned comparison and branches
+ // on the result.
+ BranchCLG,
+
// An instruction that decrements a 32-bit register and branches if
// the result is nonzero.
BranchCT,
@@ -108,7 +116,19 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
-
+ void expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned HighOpcode, bool ConvertHigh) const;
+ void expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned LowOpcodeK, unsigned HighOpcode) const;
+ void expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned HighOpcode) const;
+ void expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned Size) const;
+ void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ unsigned LowLowOpcode, unsigned Size, bool KillSrc) const;
+ virtual void anchor();
+
public:
explicit SystemZInstrInfo(SystemZTargetMachine &TM);
@@ -129,6 +149,12 @@ public:
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const LLVM_OVERRIDE;
+ bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &Mask, int &Value) const
+ LLVM_OVERRIDE;
+ bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int Mask, int Value,
+ const MachineRegisterInfo *MRI) const LLVM_OVERRIDE;
virtual bool isPredicable(MachineInstr *MI) const LLVM_OVERRIDE;
virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index b318d67..6524e44 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -32,12 +32,9 @@ let neverHasSideEffects = 1 in {
// Control flow instructions
//===----------------------------------------------------------------------===//
-// A return instruction. R1 is the condition-code mask (all 1s)
-// and R2 is the target address, which is always stored in %r14.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1,
- R1 = 15, R2 = 14, isCodeGenOnly = 1 in {
- def RET : InstRR<0x07, (outs), (ins), "br\t%r14", [(z_retflag)]>;
-}
+// A return instruction (br %r14).
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+ def Return : Alias<2, (outs), (ins), [(z_retflag)]>;
// Unconditional branches. R1 is the condition-code mask (all 1s).
let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
@@ -70,6 +67,8 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in {
"brc\t$R1, $I2", []>;
def AsmBRCL : InstRIL<0xC04, (outs), (ins uimm8zx4:$R1, brtarget32:$I2),
"brcl\t$R1, $I2", []>;
+ def AsmBCR : InstRR<0x07, (outs), (ins uimm8zx4:$R1, GR64:$R2),
+ "bcr\t$R1, $R2", []>;
}
// Fused compare-and-branch instructions. As for normal branches,
@@ -94,6 +93,18 @@ multiclass CompareBranches<Operand ccmask, string pos1, string pos2> {
def GIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3,
brtarget16:$RI4),
"cgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+ def LRJ : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
+ brtarget16:$RI4),
+ "clrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+ def LGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
+ brtarget16:$RI4),
+ "clgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+ def LIJ : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3,
+ brtarget16:$RI4),
+ "clij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+ def LGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3,
+ brtarget16:$RI4),
+ "clgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
}
}
let isCodeGenOnly = 1 in
@@ -108,6 +119,7 @@ multiclass CondExtendedMnemonic<bits<4> ccmask, string name> {
"j"##name##"\t$I2", []>;
def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2),
"jg"##name##"\t$I2", []>;
+ def BR : InstRR<0x07, (outs), (ins ADDR64:$R2), "b"##name##"r\t$R2", []>;
}
def LOCR : FixedCondUnaryRRF<"locr"##name, 0xB9F2, GR32, GR32, ccmask>;
def LOCGR : FixedCondUnaryRRF<"locgr"##name, 0xB9E2, GR64, GR64, ccmask>;
@@ -152,6 +164,18 @@ multiclass IntCondExtendedMnemonicA<bits<4> ccmask, string name> {
def CGI : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2,
brtarget16:$RI4),
"cgij"##name##"\t$R1, $I2, $RI4", []>;
+ def CLR : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2,
+ brtarget16:$RI4),
+ "clrj"##name##"\t$R1, $R2, $RI4", []>;
+ def CLGR : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2,
+ brtarget16:$RI4),
+ "clgrj"##name##"\t$R1, $R2, $RI4", []>;
+ def CLI : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2,
+ brtarget16:$RI4),
+ "clij"##name##"\t$R1, $I2, $RI4", []>;
+ def CLGI : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2,
+ brtarget16:$RI4),
+ "clgij"##name##"\t$R1, $I2, $RI4", []>;
}
}
multiclass IntCondExtendedMnemonic<bits<4> ccmask, string name1, string name2>
@@ -177,22 +201,31 @@ let Defs = [CC] in {
// Select instructions
//===----------------------------------------------------------------------===//
-def Select32 : SelectWrapper<GR32>;
-def Select64 : SelectWrapper<GR64>;
-
-defm CondStore8_32 : CondStores<GR32, nonvolatile_truncstorei8,
+def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>;
+def Select32 : SelectWrapper<GR32>;
+def Select64 : SelectWrapper<GR64>;
+
+// We don't define 32-bit Mux stores because the low-only STOC should
+// always be used if possible.
+defm CondStore8Mux : CondStores<GRX32, nonvolatile_truncstorei8,
+ nonvolatile_anyextloadi8, bdxaddr20only>,
+ Requires<[FeatureHighWord]>;
+defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16,
+ nonvolatile_anyextloadi16, bdxaddr20only>,
+ Requires<[FeatureHighWord]>;
+defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8,
nonvolatile_anyextloadi8, bdxaddr20only>;
-defm CondStore16_32 : CondStores<GR32, nonvolatile_truncstorei16,
+defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16,
nonvolatile_anyextloadi16, bdxaddr20only>;
-defm CondStore32_32 : CondStores<GR32, nonvolatile_store,
+defm CondStore32 : CondStores<GR32, nonvolatile_store,
nonvolatile_load, bdxaddr20only>;
-defm CondStore8 : CondStores<GR64, nonvolatile_truncstorei8,
- nonvolatile_anyextloadi8, bdxaddr20only>;
-defm CondStore16 : CondStores<GR64, nonvolatile_truncstorei16,
- nonvolatile_anyextloadi16, bdxaddr20only>;
-defm CondStore32 : CondStores<GR64, nonvolatile_truncstorei32,
- nonvolatile_anyextloadi32, bdxaddr20only>;
+defm : CondStores64<CondStore8, CondStore8Inv, nonvolatile_truncstorei8,
+ nonvolatile_anyextloadi8, bdxaddr20only>;
+defm : CondStores64<CondStore16, CondStore16Inv, nonvolatile_truncstorei16,
+ nonvolatile_anyextloadi16, bdxaddr20only>;
+defm : CondStores64<CondStore32, CondStore32Inv, nonvolatile_truncstorei32,
+ nonvolatile_anyextloadi32, bdxaddr20only>;
defm CondStore64 : CondStores<GR64, nonvolatile_store,
nonvolatile_load, bdxaddr20only>;
@@ -202,24 +235,30 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store,
// The definitions here are for the call-clobbered registers.
let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
- F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC],
- R1 = 14, isCodeGenOnly = 1 in {
- def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$I2, variable_ops),
- "bras\t%r14, $I2", []>;
- def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$I2, variable_ops),
- "brasl\t%r14, $I2", [(z_call pcrel32call:$I2)]>;
- def BASR : InstRR<0x0D, (outs), (ins ADDR64:$R2, variable_ops),
- "basr\t%r14, $R2", [(z_call ADDR64:$R2)]>;
+ F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC] in {
+ def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops),
+ [(z_call pcrel32:$I2)]>;
+ def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops),
+ [(z_call ADDR64:$R2)]>;
+}
+
+// Sibling calls. Indirect sibling calls must be via R1, since R2 upwards
+// are argument registers and since branching to R0 is a no-op.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ def CallJG : Alias<6, (outs), (ins pcrel32:$I2),
+ [(z_sibcall pcrel32:$I2)]>;
+ let Uses = [R1D] in
+ def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
}
// Define the general form of the call instructions for the asm parser.
// These instructions don't hard-code %r14 as the return address register.
-def AsmBRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2),
- "bras\t$R1, $I2", []>;
-def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2),
- "brasl\t$R1, $I2", []>;
-def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
- "basr\t$R1, $R2", []>;
+def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2),
+ "bras\t$R1, $I2", []>;
+def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2),
+ "brasl\t$R1, $I2", []>;
+def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
+ "basr\t$R1, $R2", []>;
//===----------------------------------------------------------------------===//
// Move instructions
@@ -227,6 +266,9 @@ def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
// Register moves.
let neverHasSideEffects = 1 in {
+ // Expands to LR, RISBHG or RISBLG, depending on the choice of registers.
+ def LRMux : UnaryRRPseudo<"l", null_frag, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
def LR : UnaryRR <"l", 0x18, null_frag, GR32, GR32>;
def LGR : UnaryRRE<"lg", 0xB904, null_frag, GR64, GR64>;
}
@@ -248,7 +290,10 @@ let Uses = [CC] in {
// Immediate moves.
let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
isReMaterializable = 1 in {
- // 16-bit sign-extended immediates.
+ // 16-bit sign-extended immediates. LHIMux expands to LHI or IIHF,
+ // deopending on the choice of register.
+ def LHIMux : UnaryRIPseudo<bitconvert, GRX32, imm32sx16>,
+ Requires<[FeatureHighWord]>;
def LHI : UnaryRI<"lhi", 0xA78, bitconvert, GR32, imm32sx16>;
def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>;
@@ -266,7 +311,12 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
// Register loads.
let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
+ // Expands to L, LY or LFH, depending on the choice of register.
+ def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>,
+ Requires<[FeatureHighWord]>;
defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32, 4>;
+ def LFH : UnaryRXY<"lfh", 0xE3CA, load, GRH32, 4>,
+ Requires<[FeatureHighWord]>;
def LG : UnaryRXY<"lg", 0xE304, load, GR64, 8>;
// These instructions are split after register allocation, so we don't
@@ -298,8 +348,12 @@ let Uses = [CC] in {
// Register stores.
let SimpleBDXStore = 1 in {
- let isCodeGenOnly = 1 in
- defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>;
+ // Expands to ST, STY or STFH, depending on the choice of register.
+ def STMux : StoreRXYPseudo<store, GRX32, 4>,
+ Requires<[FeatureHighWord]>;
+ defm ST : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>;
+ def STFH : StoreRXY<"stfh", 0xE3CB, store, GRH32, 4>,
+ Requires<[FeatureHighWord]>;
def STG : StoreRXY<"stg", 0xE324, store, GR64, 8>;
// These instructions are split after register allocation, so we don't
@@ -309,15 +363,13 @@ let SimpleBDXStore = 1 in {
[(store GR128:$src, bdxaddr20only128:$dst)]>;
}
}
-let isCodeGenOnly = 1 in
- def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
+def STRL : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
// Store on condition.
let isCodeGenOnly = 1, Uses = [CC] in {
- def STOC32 : CondStoreRSY<"stoc", 0xEBF3, GR32, 4>;
- def STOC : CondStoreRSY<"stoc", 0xEBF3, GR64, 4>;
- def STOCG : CondStoreRSY<"stocg", 0xEBE3, GR64, 8>;
+ def STOC : CondStoreRSY<"stoc", 0xEBF3, GR32, 4>;
+ def STOCG : CondStoreRSY<"stocg", 0xEBE3, GR64, 8>;
}
let Uses = [CC] in {
def AsmSTOC : AsmCondStoreRSY<"stoc", 0xEBF3, GR32, 4>;
@@ -334,33 +386,22 @@ def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>;
// Memory-to-memory moves.
let mayLoad = 1, mayStore = 1 in
- def MVC : InstSS<0xD2, (outs), (ins bdladdr12onlylen8:$BDL1,
- bdaddr12only:$BD2),
- "mvc\t$BDL1, $BD2", []>;
-
-let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in
- def MVCWrapper : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm32len8:$length),
- [(z_mvc bdaddr12only:$dest, bdaddr12only:$src,
- imm32len8:$length)]>;
-
-defm LoadStore8_32 : MVCLoadStore<anyextloadi8, truncstorei8, i32,
- MVCWrapper, 1>;
-defm LoadStore16_32 : MVCLoadStore<anyextloadi16, truncstorei16, i32,
- MVCWrapper, 2>;
-defm LoadStore32_32 : MVCLoadStore<load, store, i32, MVCWrapper, 4>;
-
-defm LoadStore8 : MVCLoadStore<anyextloadi8, truncstorei8, i64,
- MVCWrapper, 1>;
-defm LoadStore16 : MVCLoadStore<anyextloadi16, truncstorei16, i64,
- MVCWrapper, 2>;
-defm LoadStore32 : MVCLoadStore<anyextloadi32, truncstorei32, i64,
- MVCWrapper, 4>;
-defm LoadStore64 : MVCLoadStore<load, store, i64, MVCWrapper, 8>;
+ defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>;
+
+// String moves.
+let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L] in
+ defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
//===----------------------------------------------------------------------===//
// Sign extensions
//===----------------------------------------------------------------------===//
+//
+// Note that putting these before zero extensions mean that we will prefer
+// them for anyextload*. There's not really much to choose between the two
+// either way, but signed-extending loads have a short LH and a long LHY,
+// while zero-extending loads have only the long LLH.
+//
+//===----------------------------------------------------------------------===//
// 32-bit extensions from registers.
let neverHasSideEffects = 1 in {
@@ -380,40 +421,33 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
// Match 32-to-64-bit sign extensions in which the source is already
// in a 64-bit register.
def : Pat<(sext_inreg GR64:$src, i32),
- (LGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
-
-// 32-bit extensions from memory.
-def LB : UnaryRXY<"lb", 0xE376, sextloadi8, GR32, 1>;
-defm LH : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32, 2>;
-def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_sextloadi16, GR32>;
+ (LGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>;
+
+// 32-bit extensions from 8-bit memory. LBMux expands to LB or LBH,
+// depending on the choice of register.
+def LBMux : UnaryRXYPseudo<"lb", asextloadi8, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+def LB : UnaryRXY<"lb", 0xE376, asextloadi8, GR32, 1>;
+def LBH : UnaryRXY<"lbh", 0xE3C0, asextloadi8, GRH32, 1>,
+ Requires<[FeatureHighWord]>;
+
+// 32-bit extensions from 16-bit memory. LHMux expands to LH or LHH,
+// depending on the choice of register.
+def LHMux : UnaryRXYPseudo<"lh", asextloadi16, GRX32, 2>,
+ Requires<[FeatureHighWord]>;
+defm LH : UnaryRXPair<"lh", 0x48, 0xE378, asextloadi16, GR32, 2>;
+def LHH : UnaryRXY<"lhh", 0xE3C4, asextloadi16, GRH32, 2>,
+ Requires<[FeatureHighWord]>;
+def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_asextloadi16, GR32>;
// 64-bit extensions from memory.
-def LGB : UnaryRXY<"lgb", 0xE377, sextloadi8, GR64, 1>;
-def LGH : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64, 2>;
-def LGF : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64, 4>;
-def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_sextloadi16, GR64>;
-def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_sextloadi32, GR64>;
+def LGB : UnaryRXY<"lgb", 0xE377, asextloadi8, GR64, 1>;
+def LGH : UnaryRXY<"lgh", 0xE315, asextloadi16, GR64, 2>;
+def LGF : UnaryRXY<"lgf", 0xE314, asextloadi32, GR64, 4>;
+def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_asextloadi16, GR64>;
+def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_asextloadi32, GR64>;
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
- def LTGF : UnaryRXY<"ltgf", 0xE332, sextloadi32, GR64, 4>;
-
-// If the sign of a load-extend operation doesn't matter, use the signed ones.
-// There's not really much to choose between the sign and zero extensions,
-// but LH is more compact than LLH for small offsets.
-def : Pat<(i32 (extloadi8 bdxaddr20only:$src)), (LB bdxaddr20only:$src)>;
-def : Pat<(i32 (extloadi16 bdxaddr12pair:$src)), (LH bdxaddr12pair:$src)>;
-def : Pat<(i32 (extloadi16 bdxaddr20pair:$src)), (LHY bdxaddr20pair:$src)>;
-
-def : Pat<(i64 (extloadi8 bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>;
-def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>;
-def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>;
-
-// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
-// However, BDXs have two extra operands and are therefore 6 units more
-// complex.
-let AddedComplexity = 7 in {
- def : Pat<(i32 (extloadi16 pcrel32:$src)), (LHRL pcrel32:$src)>;
- def : Pat<(i64 (extloadi16 pcrel32:$src)), (LGHRL pcrel32:$src)>;
-}
+ def LTGF : UnaryRXY<"ltgf", 0xE332, asextloadi32, GR64, 4>;
//===----------------------------------------------------------------------===//
// Zero extensions
@@ -421,8 +455,14 @@ let AddedComplexity = 7 in {
// 32-bit extensions from registers.
let neverHasSideEffects = 1 in {
- def LLCR : UnaryRRE<"llc", 0xB994, zext8, GR32, GR32>;
- def LLHR : UnaryRRE<"llh", 0xB995, zext16, GR32, GR32>;
+ // Expands to LLCR or RISB[LH]G, depending on the choice of registers.
+ def LLCRMux : UnaryRRPseudo<"llc", zext8, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+ def LLCR : UnaryRRE<"llc", 0xB994, zext8, GR32, GR32>;
+ // Expands to LLHR or RISB[LH]G, depending on the choice of registers.
+ def LLHRMux : UnaryRRPseudo<"llh", zext16, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+ def LLHR : UnaryRRE<"llh", 0xB995, zext16, GR32, GR32>;
}
// 64-bit extensions from registers.
@@ -435,19 +475,31 @@ let neverHasSideEffects = 1 in {
// Match 32-to-64-bit zero extensions in which the source is already
// in a 64-bit register.
def : Pat<(and GR64:$src, 0xffffffff),
- (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+ (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>;
-// 32-bit extensions from memory.
-def LLC : UnaryRXY<"llc", 0xE394, zextloadi8, GR32, 1>;
-def LLH : UnaryRXY<"llh", 0xE395, zextloadi16, GR32, 2>;
-def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_zextloadi16, GR32>;
+// 32-bit extensions from 8-bit memory. LLCMux expands to LLC or LLCH,
+// depending on the choice of register.
+def LLCMux : UnaryRXYPseudo<"llc", azextloadi8, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+def LLC : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>;
+def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GR32, 1>,
+ Requires<[FeatureHighWord]>;
+
+// 32-bit extensions from 16-bit memory. LLHMux expands to LLH or LLHH,
+// depending on the choice of register.
+def LLHMux : UnaryRXYPseudo<"llh", azextloadi16, GRX32, 2>,
+ Requires<[FeatureHighWord]>;
+def LLH : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>;
+def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GR32, 2>,
+ Requires<[FeatureHighWord]>;
+def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>;
// 64-bit extensions from memory.
-def LLGC : UnaryRXY<"llgc", 0xE390, zextloadi8, GR64, 1>;
-def LLGH : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64, 2>;
-def LLGF : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64, 4>;
-def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_zextloadi16, GR64>;
-def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>;
+def LLGC : UnaryRXY<"llgc", 0xE390, azextloadi8, GR64, 1>;
+def LLGH : UnaryRXY<"llgh", 0xE391, azextloadi16, GR64, 2>;
+def LLGF : UnaryRXY<"llgf", 0xE316, azextloadi32, GR64, 4>;
+def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_azextloadi16, GR64>;
+def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_azextloadi32, GR64>;
//===----------------------------------------------------------------------===//
// Truncations
@@ -455,21 +507,31 @@ def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>;
// Truncations of 64-bit registers to 32-bit registers.
def : Pat<(i32 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
+ (EXTRACT_SUBREG GR64:$src, subreg_l32)>;
-// Truncations of 32-bit registers to memory.
-let isCodeGenOnly = 1 in {
- defm STC32 : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>;
- defm STH32 : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>;
- def STHRL32 : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>;
-}
+// Truncations of 32-bit registers to 8-bit memory. STCMux expands to
+// STC, STCY or STCH, depending on the choice of register.
+def STCMux : StoreRXYPseudo<truncstorei8, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>;
+def STCH : StoreRXY<"stch", 0xE3C3, truncstorei8, GRH32, 1>,
+ Requires<[FeatureHighWord]>;
+
+// Truncations of 32-bit registers to 16-bit memory. STHMux expands to
+// STH, STHY or STHH, depending on the choice of register.
+def STHMux : StoreRXYPseudo<truncstorei16, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>;
+def STHH : StoreRXY<"sthh", 0xE3C7, truncstorei16, GRH32, 2>,
+ Requires<[FeatureHighWord]>;
+def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>;
// Truncations of 64-bit registers to memory.
-defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR64, 1>;
-defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64, 2>;
-def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR64>;
-defm ST : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64, 4>;
-def STRL : StoreRILPC<"strl", 0xC4F, aligned_truncstorei32, GR64>;
+defm : StoreGR64Pair<STC, STCY, truncstorei8>;
+defm : StoreGR64Pair<STH, STHY, truncstorei16>;
+def : StoreGR64PC<STHRL, aligned_truncstorei16>;
+defm : StoreGR64Pair<ST, STY, truncstorei32>;
+def : StoreGR64PC<STRL, aligned_truncstorei32>;
//===----------------------------------------------------------------------===//
// Multi-register moves
@@ -528,11 +590,31 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
}
//===----------------------------------------------------------------------===//
-// Negation
+// Absolute and Negation
//===----------------------------------------------------------------------===//
let Defs = [CC] in {
let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+ def LPR : UnaryRR <"lp", 0x10, z_iabs32, GR32, GR32>;
+ def LPGR : UnaryRRE<"lpg", 0xB900, z_iabs64, GR64, GR64>;
+ }
+ let CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LPGFR : UnaryRRE<"lpgf", 0xB910, null_frag, GR64, GR32>;
+}
+defm : SXU<z_iabs64, LPGFR>;
+
+let Defs = [CC] in {
+ let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+ def LNR : UnaryRR <"ln", 0x11, z_inegabs32, GR32, GR32>;
+ def LNGR : UnaryRRE<"lng", 0xB901, z_inegabs64, GR64, GR64>;
+ }
+ let CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LNGFR : UnaryRRE<"lngf", 0xB911, null_frag, GR64, GR32>;
+}
+defm : SXU<z_inegabs64, LNGFR>;
+
+let Defs = [CC] in {
+ let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>;
def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
}
@@ -546,43 +628,51 @@ defm : SXU<ineg, LCGFR>;
//===----------------------------------------------------------------------===//
let isCodeGenOnly = 1 in
- defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8, 1>;
-defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8, 1>;
+ defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, azextloadi8, 1>;
+defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, azextloadi8, 1>;
-defm : InsertMem<"inserti8", IC32, GR32, zextloadi8, bdxaddr12pair>;
-defm : InsertMem<"inserti8", IC32Y, GR32, zextloadi8, bdxaddr20pair>;
+defm : InsertMem<"inserti8", IC32, GR32, azextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>;
-defm : InsertMem<"inserti8", IC, GR64, zextloadi8, bdxaddr12pair>;
-defm : InsertMem<"inserti8", ICY, GR64, zextloadi8, bdxaddr20pair>;
+defm : InsertMem<"inserti8", IC, GR64, azextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>;
// Insertions of a 16-bit immediate, leaving other bits unaffected.
// We don't have or_as_insert equivalents of these operations because
// OI is available instead.
-let isCodeGenOnly = 1 in {
- def IILL32 : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>;
- def IILH32 : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>;
-}
-def IILL : BinaryRI<"iill", 0xA53, insertll, GR64, imm64ll16>;
-def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR64, imm64lh16>;
-def IIHL : BinaryRI<"iihl", 0xA51, inserthl, GR64, imm64hl16>;
-def IIHH : BinaryRI<"iihh", 0xA50, inserthh, GR64, imm64hh16>;
+//
+// IIxMux expands to II[LH]x, depending on the choice of register.
+def IILMux : BinaryRIPseudo<insertll, GRX32, imm32ll16>,
+ Requires<[FeatureHighWord]>;
+def IIHMux : BinaryRIPseudo<insertlh, GRX32, imm32lh16>,
+ Requires<[FeatureHighWord]>;
+def IILL : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>;
+def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>;
+def IIHL : BinaryRI<"iihl", 0xA51, insertll, GRH32, imm32ll16>;
+def IIHH : BinaryRI<"iihh", 0xA50, insertlh, GRH32, imm32lh16>;
+def IILL64 : BinaryAliasRI<insertll, GR64, imm64ll16>;
+def IILH64 : BinaryAliasRI<insertlh, GR64, imm64lh16>;
+def IIHL64 : BinaryAliasRI<inserthl, GR64, imm64hl16>;
+def IIHH64 : BinaryAliasRI<inserthh, GR64, imm64hh16>;
// ...likewise for 32-bit immediates. For GR32s this is a general
// full-width move. (We use IILF rather than something like LLILF
// for 32-bit moves because IILF leaves the upper 32 bits of the
// GR64 unchanged.)
-let isCodeGenOnly = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
- isReMaterializable = 1 in {
- def IILF32 : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>;
+let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in {
+ def IIFMux : UnaryRIPseudo<bitconvert, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def IILF : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>;
+ def IIHF : UnaryRIL<"iihf", 0xC08, bitconvert, GRH32, uimm32>;
}
-def IILF : BinaryRIL<"iilf", 0xC09, insertlf, GR64, imm64lf32>;
-def IIHF : BinaryRIL<"iihf", 0xC08, inserthf, GR64, imm64hf32>;
+def IILF64 : BinaryAliasRIL<insertlf, GR64, imm64lf32>;
+def IIHF64 : BinaryAliasRIL<inserthf, GR64, imm64hf32>;
// An alternative model of inserthf, with the first operand being
// a zero-extended value.
def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
- (IIHF (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit),
- imm64hf32:$imm)>;
+ (IIHF64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
+ imm64hf32:$imm)>;
//===----------------------------------------------------------------------===//
// Addition
@@ -598,17 +688,22 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
def AGFR : BinaryRRE<"agf", 0xB918, null_frag, GR64, GR32>;
// Addition of signed 16-bit immediates.
+ defm AHIMux : BinaryRIAndKPseudo<"ahimux", add, GRX32, imm32sx16>;
defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, add, GR32, imm32sx16>;
defm AGHI : BinaryRIAndK<"aghi", 0xA7B, 0xECD9, add, GR64, imm64sx16>;
// Addition of signed 32-bit immediates.
+ def AFIMux : BinaryRIPseudo<add, GRX32, simm32>,
+ Requires<[FeatureHighWord]>;
def AFI : BinaryRIL<"afi", 0xC29, add, GR32, simm32>;
+ def AIH : BinaryRIL<"aih", 0xCC8, add, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>;
// Addition of memory.
- defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16, 2>;
+ defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, asextloadi16, 2>;
defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load, 4>;
- def AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32, 4>;
+ def AGF : BinaryRXY<"agf", 0xE318, add, GR64, asextloadi32, 4>;
def AG : BinaryRXY<"ag", 0xE308, add, GR64, load, 8>;
// Addition to memory.
@@ -638,7 +733,7 @@ let Defs = [CC] in {
// Addition of memory.
defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>;
- def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32, 4>;
+ def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>;
def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load, 8>;
}
defm : ZXB<addc, GR64, ALGFR>;
@@ -667,9 +762,9 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
defm SGR : BinaryRREAndK<"sg", 0xB909, 0xB9E9, sub, GR64, GR64>;
// Subtraction of memory.
- defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, sextloadi16, 2>;
+ defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>;
defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>;
- def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32, 4>;
+ def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, asextloadi32, 4>;
def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load, 8>;
}
defm : SXB<sub, GR64, SGFR>;
@@ -688,7 +783,7 @@ let Defs = [CC] in {
// Subtraction of memory.
defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load, 4>;
- def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32, 4>;
+ def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, azextloadi32, 4>;
def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load, 8>;
}
defm : ZXB<subc, GR64, SLGFR>;
@@ -718,22 +813,33 @@ let Defs = [CC] in {
let isConvertibleToThreeAddress = 1 in {
// ANDs of a 16-bit immediate, leaving other bits unaffected.
// The CC result only reflects the 16-bit field, not the full register.
- let isCodeGenOnly = 1 in {
- def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
- def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
- }
- def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>;
- def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>;
- def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>;
- def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
+ //
+ // NIxMux expands to NI[LH]x, depending on the choice of register.
+ def NILMux : BinaryRIPseudo<and, GRX32, imm32ll16c>,
+ Requires<[FeatureHighWord]>;
+ def NIHMux : BinaryRIPseudo<and, GRX32, imm32lh16c>,
+ Requires<[FeatureHighWord]>;
+ def NILL : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
+ def NILH : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
+ def NIHL : BinaryRI<"nihl", 0xA55, and, GRH32, imm32ll16c>;
+ def NIHH : BinaryRI<"nihh", 0xA54, and, GRH32, imm32lh16c>;
+ def NILL64 : BinaryAliasRI<and, GR64, imm64ll16c>;
+ def NILH64 : BinaryAliasRI<and, GR64, imm64lh16c>;
+ def NIHL64 : BinaryAliasRI<and, GR64, imm64hl16c>;
+ def NIHH64 : BinaryAliasRI<and, GR64, imm64hh16c>;
// ANDs of a 32-bit immediate, leaving other bits unaffected.
// The CC result only reflects the 32-bit field, which means we can
// use it as a zero indicator for i32 operations but not otherwise.
- let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in
- def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
- def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
- def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ // Expands to NILF or NIHF, depending on the choice of register.
+ def NIFMux : BinaryRIPseudo<and, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def NILF : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
+ def NIHF : BinaryRIL<"nihf", 0xC0A, and, GRH32, uimm32>;
+ }
+ def NILF64 : BinaryAliasRIL<and, GR64, imm64lf32c>;
+ def NIHF64 : BinaryAliasRIL<and, GR64, imm64hf32c>;
}
// ANDs of memory.
@@ -744,6 +850,10 @@ let Defs = [CC] in {
// AND to memory
defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
+
+ // Block AND.
+ let mayLoad = 1, mayStore = 1 in
+ defm NC : MemorySS<"nc", 0xD4, z_nc, z_nc_loop>;
}
defm : RMWIByte<and, bdaddr12pair, NI>;
defm : RMWIByte<and, bdaddr20pair, NIY>;
@@ -761,22 +871,33 @@ let Defs = [CC] in {
// ORs of a 16-bit immediate, leaving other bits unaffected.
// The CC result only reflects the 16-bit field, not the full register.
- let isCodeGenOnly = 1 in {
- def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
- def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
- }
- def OILL : BinaryRI<"oill", 0xA5B, or, GR64, imm64ll16>;
- def OILH : BinaryRI<"oilh", 0xA5A, or, GR64, imm64lh16>;
- def OIHL : BinaryRI<"oihl", 0xA59, or, GR64, imm64hl16>;
- def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>;
+ //
+ // OIxMux expands to OI[LH]x, depending on the choice of register.
+ def OILMux : BinaryRIPseudo<or, GRX32, imm32ll16>,
+ Requires<[FeatureHighWord]>;
+ def OIHMux : BinaryRIPseudo<or, GRX32, imm32lh16>,
+ Requires<[FeatureHighWord]>;
+ def OILL : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
+ def OILH : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
+ def OIHL : BinaryRI<"oihl", 0xA59, or, GRH32, imm32ll16>;
+ def OIHH : BinaryRI<"oihh", 0xA58, or, GRH32, imm32lh16>;
+ def OILL64 : BinaryAliasRI<or, GR64, imm64ll16>;
+ def OILH64 : BinaryAliasRI<or, GR64, imm64lh16>;
+ def OIHL64 : BinaryAliasRI<or, GR64, imm64hl16>;
+ def OIHH64 : BinaryAliasRI<or, GR64, imm64hh16>;
// ORs of a 32-bit immediate, leaving other bits unaffected.
// The CC result only reflects the 32-bit field, which means we can
// use it as a zero indicator for i32 operations but not otherwise.
- let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in
- def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
- def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>;
- def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>;
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ // Expands to OILF or OIHF, depending on the choice of register.
+ def OIFMux : BinaryRIPseudo<or, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def OILF : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
+ def OIHF : BinaryRIL<"oihf", 0xC0C, or, GRH32, uimm32>;
+ }
+ def OILF64 : BinaryAliasRIL<or, GR64, imm64lf32>;
+ def OIHF64 : BinaryAliasRIL<or, GR64, imm64hf32>;
// ORs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
@@ -786,6 +907,10 @@ let Defs = [CC] in {
// OR to memory
defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
+
+ // Block OR.
+ let mayLoad = 1, mayStore = 1 in
+ defm OC : MemorySS<"oc", 0xD6, z_oc, z_oc_loop>;
}
defm : RMWIByte<or, bdaddr12pair, OI>;
defm : RMWIByte<or, bdaddr20pair, OIY>;
@@ -804,10 +929,15 @@ let Defs = [CC] in {
// XORs of a 32-bit immediate, leaving other bits unaffected.
// The CC result only reflects the 32-bit field, which means we can
// use it as a zero indicator for i32 operations but not otherwise.
- let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in
- def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
- def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>;
- def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>;
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ // Expands to XILF or XIHF, depending on the choice of register.
+ def XIFMux : BinaryRIPseudo<xor, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def XILF : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
+ def XIHF : BinaryRIL<"xihf", 0xC06, xor, GRH32, uimm32>;
+ }
+ def XILF64 : BinaryAliasRIL<xor, GR64, imm64lf32>;
+ def XIHF64 : BinaryAliasRIL<xor, GR64, imm64hf32>;
// XORs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
@@ -817,6 +947,10 @@ let Defs = [CC] in {
// XOR to memory
defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
+
+ // Block XOR.
+ let mayLoad = 1, mayStore = 1 in
+ defm XC : MemorySS<"xc", 0xD7, z_xc, z_xc_loop>;
}
defm : RMWIByte<xor, bdaddr12pair, XI>;
defm : RMWIByte<xor, bdaddr20pair, XIY>;
@@ -842,9 +976,9 @@ def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>;
def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>;
// Multiplication of memory.
-defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16, 2>;
+defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, asextloadi16, 2>;
defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>;
-def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32, 4>;
+def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>;
def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>;
// Multiplication of a register, producing two results.
@@ -909,13 +1043,15 @@ let Defs = [CC] in {
// Forms of RISBG that only affect one word of the destination register.
// They do not set CC.
-let isCodeGenOnly = 1 in
- def RISBLG32 : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR32>,
- Requires<[FeatureHighWord]>;
-def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GR64, GR64>,
- Requires<[FeatureHighWord]>;
-def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR64, GR64>,
- Requires<[FeatureHighWord]>;
+def RISBMux : RotateSelectRIEfPseudo<GRX32, GRX32>, Requires<[FeatureHighWord]>;
+def RISBLL : RotateSelectAliasRIEf<GR32, GR32>, Requires<[FeatureHighWord]>;
+def RISBLH : RotateSelectAliasRIEf<GR32, GRH32>, Requires<[FeatureHighWord]>;
+def RISBHL : RotateSelectAliasRIEf<GRH32, GR32>, Requires<[FeatureHighWord]>;
+def RISBHH : RotateSelectAliasRIEf<GRH32, GRH32>, Requires<[FeatureHighWord]>;
+def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR64>,
+ Requires<[FeatureHighWord]>;
+def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GRH32, GR64>,
+ Requires<[FeatureHighWord]>;
// Rotate second operand left and perform a logical operation with selected
// bits of the first operand. The CC result only describes the selected bits,
@@ -930,39 +1066,50 @@ let Defs = [CC] in {
// Comparison
//===----------------------------------------------------------------------===//
-// Signed comparisons.
+// Signed comparisons. We put these before the unsigned comparisons because
+// some of the signed forms have COMPARE AND BRANCH equivalents whereas none
+// of the unsigned forms do.
let Defs = [CC], CCValues = 0xE in {
// Comparison with a register.
- def CR : CompareRR <"c", 0x19, z_cmp, GR32, GR32>;
+ def CR : CompareRR <"c", 0x19, z_scmp, GR32, GR32>;
def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>;
- def CGR : CompareRRE<"cg", 0xB920, z_cmp, GR64, GR64>;
+ def CGR : CompareRRE<"cg", 0xB920, z_scmp, GR64, GR64>;
// Comparison with a signed 16-bit immediate.
- def CHI : CompareRI<"chi", 0xA7E, z_cmp, GR32, imm32sx16>;
- def CGHI : CompareRI<"cghi", 0xA7F, z_cmp, GR64, imm64sx16>;
-
- // Comparison with a signed 32-bit immediate.
- def CFI : CompareRIL<"cfi", 0xC2D, z_cmp, GR32, simm32>;
- def CGFI : CompareRIL<"cgfi", 0xC2C, z_cmp, GR64, imm64sx32>;
+ def CHI : CompareRI<"chi", 0xA7E, z_scmp, GR32, imm32sx16>;
+ def CGHI : CompareRI<"cghi", 0xA7F, z_scmp, GR64, imm64sx16>;
+
+ // Comparison with a signed 32-bit immediate. CFIMux expands to CFI or CIH,
+ // depending on the choice of register.
+ def CFIMux : CompareRIPseudo<z_scmp, GRX32, simm32>,
+ Requires<[FeatureHighWord]>;
+ def CFI : CompareRIL<"cfi", 0xC2D, z_scmp, GR32, simm32>;
+ def CIH : CompareRIL<"cih", 0xCCD, z_scmp, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
+ def CGFI : CompareRIL<"cgfi", 0xC2C, z_scmp, GR64, imm64sx32>;
// Comparison with memory.
- defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16, 2>;
- defm C : CompareRXPair<"c", 0x59, 0xE359, z_cmp, GR32, load, 4>;
- def CGH : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16, 2>;
- def CGF : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32, 4>;
- def CG : CompareRXY<"cg", 0xE320, z_cmp, GR64, load, 8>;
- def CHRL : CompareRILPC<"chrl", 0xC65, z_cmp, GR32, aligned_sextloadi16>;
- def CRL : CompareRILPC<"crl", 0xC6D, z_cmp, GR32, aligned_load>;
- def CGHRL : CompareRILPC<"cghrl", 0xC64, z_cmp, GR64, aligned_sextloadi16>;
- def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_cmp, GR64, aligned_sextloadi32>;
- def CGRL : CompareRILPC<"cgrl", 0xC68, z_cmp, GR64, aligned_load>;
+ defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, asextloadi16, 2>;
+ def CMux : CompareRXYPseudo<z_scmp, GRX32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, load, 4>;
+ def CHF : CompareRXY<"chf", 0xE3CD, z_scmp, GRH32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, asextloadi16, 2>;
+ def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, asextloadi32, 4>;
+ def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, load, 8>;
+ def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_asextloadi16>;
+ def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_load>;
+ def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_asextloadi16>;
+ def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_asextloadi32>;
+ def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_load>;
// Comparison between memory and a signed 16-bit immediate.
- def CHHSI : CompareSIL<"chhsi", 0xE554, z_cmp, sextloadi16, imm32sx16>;
- def CHSI : CompareSIL<"chsi", 0xE55C, z_cmp, load, imm32sx16>;
- def CGHSI : CompareSIL<"cghsi", 0xE558, z_cmp, load, imm64sx16>;
+ def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, asextloadi16, imm32sx16>;
+ def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, load, imm32sx16>;
+ def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, load, imm64sx16>;
}
-defm : SXB<z_cmp, GR64, CGFR>;
+defm : SXB<z_scmp, GR64, CGFR>;
// Unsigned comparisons.
let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
@@ -971,35 +1118,79 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>;
def CLGR : CompareRRE<"clg", 0xB921, z_ucmp, GR64, GR64>;
- // Comparison with a signed 32-bit immediate.
+ // Comparison with an unsigned 32-bit immediate. CLFIMux expands to CLFI
+ // or CLIH, depending on the choice of register.
+ def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>;
+ def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GR32, uimm32>,
+ Requires<[FeatureHighWord]>;
def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>;
// Comparison with memory.
+ def CLMux : CompareRXYPseudo<z_ucmp, GRX32, load, 4>,
+ Requires<[FeatureHighWord]>;
defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load, 4>;
- def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32, 4>;
+ def CLHF : CompareRXY<"clhf", 0xE3CF, z_ucmp, GRH32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, azextloadi32, 4>;
def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load, 8>;
def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32,
- aligned_zextloadi16>;
+ aligned_azextloadi16>;
def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32,
aligned_load>;
def CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64,
- aligned_zextloadi16>;
+ aligned_azextloadi16>;
def CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64,
- aligned_zextloadi32>;
+ aligned_azextloadi32>;
def CLGRL : CompareRILPC<"clgrl", 0xC6A, z_ucmp, GR64,
aligned_load>;
// Comparison between memory and an unsigned 8-bit immediate.
- defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, zextloadi8, imm32zx8>;
+ defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, azextloadi8, imm32zx8>;
// Comparison between memory and an unsigned 16-bit immediate.
- def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, zextloadi16, imm32zx16>;
- def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>;
- def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>;
+ def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, azextloadi16, imm32zx16>;
+ def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>;
+ def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>;
}
defm : ZXB<z_ucmp, GR64, CLGFR>;
+// Memory-to-memory comparison.
+let mayLoad = 1, Defs = [CC] in
+ defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>;
+
+// String comparison.
+let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+ defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>;
+
+// Test under mask.
+let Defs = [CC] in {
+ // TMxMux expands to TM[LH]x, depending on the choice of register.
+ def TMLMux : CompareRIPseudo<z_tm_reg, GRX32, imm32ll16>,
+ Requires<[FeatureHighWord]>;
+ def TMHMux : CompareRIPseudo<z_tm_reg, GRX32, imm32lh16>,
+ Requires<[FeatureHighWord]>;
+ def TMLL : CompareRI<"tmll", 0xA71, z_tm_reg, GR32, imm32ll16>;
+ def TMLH : CompareRI<"tmlh", 0xA70, z_tm_reg, GR32, imm32lh16>;
+ def TMHL : CompareRI<"tmhl", 0xA73, z_tm_reg, GRH32, imm32ll16>;
+ def TMHH : CompareRI<"tmhh", 0xA72, z_tm_reg, GRH32, imm32lh16>;
+
+ def TMLL64 : CompareAliasRI<z_tm_reg, GR64, imm64ll16>;
+ def TMLH64 : CompareAliasRI<z_tm_reg, GR64, imm64lh16>;
+ def TMHL64 : CompareAliasRI<z_tm_reg, GR64, imm64hl16>;
+ def TMHH64 : CompareAliasRI<z_tm_reg, GR64, imm64hh16>;
+
+ defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>;
+}
+
+//===----------------------------------------------------------------------===//
+// Prefetch
+//===----------------------------------------------------------------------===//
+
+def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>;
+def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
+
//===----------------------------------------------------------------------===//
// Atomic operations
//===----------------------------------------------------------------------===//
@@ -1024,60 +1215,60 @@ def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64<atomic_load_sub_64>;
def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>;
def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>;
def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32<atomic_load_and_32>;
-def ATOMIC_LOAD_NILL32 : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>;
-def ATOMIC_LOAD_NILH32 : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>;
-def ATOMIC_LOAD_NILF32 : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
+def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>;
+def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>;
+def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64<atomic_load_and_64>;
-def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>;
-def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>;
-def ATOMIC_LOAD_NIHL : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>;
-def ATOMIC_LOAD_NIHH : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>;
-def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>;
-def ATOMIC_LOAD_NIHF : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>;
+def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>;
+def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>;
+def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>;
+def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>;
+def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>;
+def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>;
def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg<z_atomic_loadw_or>;
def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>;
def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32<atomic_load_or_32>;
-def ATOMIC_LOAD_OILL32 : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
-def ATOMIC_LOAD_OILH32 : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
-def ATOMIC_LOAD_OILF32 : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
+def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
+def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
+def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64<atomic_load_or_64>;
-def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
-def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
-def ATOMIC_LOAD_OIHL : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
-def ATOMIC_LOAD_OIHH : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
-def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
-def ATOMIC_LOAD_OIHF : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
+def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
+def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
+def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
+def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
+def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
+def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg<z_atomic_loadw_xor>;
def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>;
def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32<atomic_load_xor_32>;
-def ATOMIC_LOAD_XILF32 : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
+def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64<atomic_load_xor_64>;
-def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
-def ATOMIC_LOAD_XIHF : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
+def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
+def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg<z_atomic_loadw_nand>;
def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm<z_atomic_loadw_nand,
imm32lh16c>;
def ATOMIC_LOAD_NRi : AtomicLoadBinaryReg32<atomic_load_nand_32>;
-def ATOMIC_LOAD_NILL32i : AtomicLoadBinaryImm32<atomic_load_nand_32,
+def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm32<atomic_load_nand_32,
imm32ll16c>;
-def ATOMIC_LOAD_NILH32i : AtomicLoadBinaryImm32<atomic_load_nand_32,
+def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm32<atomic_load_nand_32,
imm32lh16c>;
-def ATOMIC_LOAD_NILF32i : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>;
+def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>;
def ATOMIC_LOAD_NGRi : AtomicLoadBinaryReg64<atomic_load_nand_64>;
-def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NILL64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64ll16c>;
-def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NILH64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64lh16c>;
-def ATOMIC_LOAD_NIHLi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NIHL64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64hl16c>;
-def ATOMIC_LOAD_NIHHi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NIHH64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64hh16c>;
-def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NILF64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64lf32c>;
-def ATOMIC_LOAD_NIHFi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NIHF64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64hf32c>;
def ATOMIC_LOADW_MIN : AtomicLoadWBinaryReg<z_atomic_loadw_min>;
@@ -1119,6 +1310,10 @@ let Defs = [CC] in {
// Miscellaneous Instructions.
//===----------------------------------------------------------------------===//
+// Extract CC into bits 29 and 28 of a register.
+let Uses = [CC] in
+ def IPM : InherentRRE<"ipm", 0xB222, GR32, (z_ipm)>;
+
// Read a 32-bit access register into a GR32. As with all GR32 operations,
// the upper 32 bits of the enclosing GR64 remain unchanged, which is useful
// when a 64-bit address is stored in a pair of access registers.
@@ -1134,19 +1329,11 @@ let Defs = [CC] in {
def FLOGR : UnaryRRE<"flog", 0xB983, null_frag, GR128, GR64>;
}
def : Pat<(ctlz GR64:$src),
- (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_high)>;
+ (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
def : Pat<(i64 (anyext GR32:$src)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
-
-// There are no 32-bit equivalents of LLILL and LLILH, so use a full
-// 64-bit move followed by a subreg. This preserves the invariant that
-// all GR32 operations only modify the low 32 bits.
-def : Pat<(i32 imm32ll16:$src),
- (EXTRACT_SUBREG (LLILL (LL16 imm:$src)), subreg_32bit)>;
-def : Pat<(i32 imm32lh16:$src),
- (EXTRACT_SUBREG (LLILH (LH16 imm:$src)), subreg_32bit)>;
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
// Extend GR32s and GR64s to GR128s.
let usesCustomInserter = 1 in {
@@ -1155,6 +1342,10 @@ let usesCustomInserter = 1 in {
def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
}
+// Search a block of memory for a character.
+let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+ defm SRST : StringRRE<"srst", 0xb25e, z_search_string>;
+
//===----------------------------------------------------------------------===//
// Peepholes.
//===----------------------------------------------------------------------===//
@@ -1163,14 +1354,14 @@ let usesCustomInserter = 1 in {
defm : ZXB<add, GR64, ALGFR>;
def : Pat<(add GR64:$src1, imm64zx32:$src2),
(ALGFI GR64:$src1, imm64zx32:$src2)>;
-def : Pat<(add GR64:$src1, (zextloadi32 bdxaddr20only:$addr)),
+def : Pat<(add GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
(ALGF GR64:$src1, bdxaddr20only:$addr)>;
// Use SL* for GR64 subtractions of unsigned 32-bit values.
defm : ZXB<sub, GR64, SLGFR>;
def : Pat<(add GR64:$src1, imm64zx32n:$src2),
(SLGFI GR64:$src1, imm64zx32n:$src2)>;
-def : Pat<(sub GR64:$src1, (zextloadi32 bdxaddr20only:$addr)),
+def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
(SLGF GR64:$src1, bdxaddr20only:$addr)>;
// Optimize sign-extended 1/0 selects to -1/0 selects. This is important
@@ -1184,3 +1375,19 @@ def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid,
(i32 63)),
(i32 63)),
(Select64 (LGHI -1), (LGHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>;
+
+// Peepholes for turning scalar operations into block operations.
+defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 1>;
+defm : BlockLoadStore<anyextloadi16, i32, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 2>;
+defm : BlockLoadStore<load, i32, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 4>;
+defm : BlockLoadStore<anyextloadi8, i64, MVCSequence, NCSequence,
+ OCSequence, XCSequence, 1>;
+defm : BlockLoadStore<anyextloadi16, i64, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 2>;
+defm : BlockLoadStore<anyextloadi32, i64, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 4>;
+defm : BlockLoadStore<load, i64, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 8>;
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 114f74e..ba027d4 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -225,11 +225,13 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
Terminator.ExtraRelaxSize = 6;
break;
case SystemZ::CRJ:
- // Relaxes to a CR/BRCL sequence, which is 2 bytes longer.
+ case SystemZ::CLRJ:
+ // Relaxes to a C(L)R/BRCL sequence, which is 2 bytes longer.
Terminator.ExtraRelaxSize = 2;
break;
case SystemZ::CGRJ:
- // Relaxes to a CGR/BRCL sequence, which is 4 bytes longer.
+ case SystemZ::CLGRJ:
+ // Relaxes to a C(L)GR/BRCL sequence, which is 4 bytes longer.
Terminator.ExtraRelaxSize = 4;
break;
case SystemZ::CIJ:
@@ -237,6 +239,11 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
// Relaxes to a C(G)HI/BRCL sequence, which is 4 bytes longer.
Terminator.ExtraRelaxSize = 4;
break;
+ case SystemZ::CLIJ:
+ case SystemZ::CLGIJ:
+ // Relaxes to a CL(G)FI/BRCL sequence, which is 6 bytes longer.
+ Terminator.ExtraRelaxSize = 6;
+ break;
default:
llvm_unreachable("Unrecognized branch instruction");
}
@@ -401,6 +408,18 @@ void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) {
case SystemZ::CGIJ:
splitCompareBranch(Branch, SystemZ::CGHI);
break;
+ case SystemZ::CLRJ:
+ splitCompareBranch(Branch, SystemZ::CLR);
+ break;
+ case SystemZ::CLGRJ:
+ splitCompareBranch(Branch, SystemZ::CLGR);
+ break;
+ case SystemZ::CLIJ:
+ splitCompareBranch(Branch, SystemZ::CLFI);
+ break;
+ case SystemZ::CLGIJ:
+ splitCompareBranch(Branch, SystemZ::CLGFI);
+ break;
default:
llvm_unreachable("Unrecognized branch");
}
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp
index 432a0d3..ff9a6c0 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.cpp
+++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -15,15 +15,6 @@
using namespace llvm;
-// If Opcode is an interprocedural reference that can be shortened,
-// return the short form, otherwise return 0.
-static unsigned getShortenedInstr(unsigned Opcode) {
- switch (Opcode) {
- case SystemZ::BRASL: return SystemZ::BRAS;
- }
- return Opcode;
-}
-
// Return the VK_* enumeration for MachineOperand target flags Flags.
static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
switch (Flags & SystemZII::MO_SYMBOL_MODIFIER) {
@@ -35,70 +26,71 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
llvm_unreachable("Unrecognised MO_ACCESS_MODEL");
}
-SystemZMCInstLower::SystemZMCInstLower(Mangler *mang, MCContext &ctx,
+SystemZMCInstLower::SystemZMCInstLower(MCContext &ctx,
SystemZAsmPrinter &asmprinter)
- : Mang(mang), Ctx(ctx), AsmPrinter(asmprinter) {}
+ : Ctx(ctx), AsmPrinter(asmprinter) {}
-MCOperand SystemZMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
- const MCSymbol *Symbol,
- int64_t Offset) const {
- MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags());
- const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx);
- if (Offset) {
- const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
- Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+const MCExpr *
+SystemZMCInstLower::getExpr(const MachineOperand &MO,
+ MCSymbolRefExpr::VariantKind Kind) const {
+ const MCSymbol *Symbol;
+ bool HasOffset = true;
+ switch (MO.getType()) {
+ case MachineOperand::MO_MachineBasicBlock:
+ Symbol = MO.getMBB()->getSymbol();
+ HasOffset = false;
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ Symbol = AsmPrinter.getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+ HasOffset = false;
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+ break;
+
+ case MachineOperand::MO_BlockAddress:
+ Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+ break;
+
+ default:
+ llvm_unreachable("unknown operand type");
}
- return MCOperand::CreateExpr(Expr);
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx);
+ if (HasOffset)
+ if (int64_t Offset = MO.getOffset()) {
+ const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
+ Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+ }
+ return Expr;
}
MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const {
switch (MO.getType()) {
- default:
- llvm_unreachable("unknown operand type");
-
case MachineOperand::MO_Register:
return MCOperand::CreateReg(MO.getReg());
case MachineOperand::MO_Immediate:
return MCOperand::CreateImm(MO.getImm());
- case MachineOperand::MO_MachineBasicBlock:
- return lowerSymbolOperand(MO, MO.getMBB()->getSymbol(),
- /* MO has no offset field */0);
-
- case MachineOperand::MO_GlobalAddress:
- return lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()),
- MO.getOffset());
-
- case MachineOperand::MO_ExternalSymbol: {
- StringRef Name = MO.getSymbolName();
- return lowerSymbolOperand(MO, AsmPrinter.GetExternalSymbolSymbol(Name),
- MO.getOffset());
- }
-
- case MachineOperand::MO_JumpTableIndex:
- return lowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()),
- /* MO has no offset field */0);
-
- case MachineOperand::MO_ConstantPoolIndex:
- return lowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()),
- MO.getOffset());
-
- case MachineOperand::MO_BlockAddress: {
- const BlockAddress *BA = MO.getBlockAddress();
- return lowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(BA),
- MO.getOffset());
+ default: {
+ MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags());
+ return MCOperand::CreateExpr(getExpr(MO, Kind));
}
}
}
void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
- unsigned Opcode = MI->getOpcode();
- // When emitting binary code, start with the shortest form of an instruction
- // and then relax it where necessary.
- if (!AsmPrinter.OutStreamer.hasRawTextSupport())
- Opcode = getShortenedInstr(Opcode);
- OutMI.setOpcode(Opcode);
+ OutMI.setOpcode(MI->getOpcode());
for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI->getOperand(I);
// Ignore all implicit register operands.
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.h b/lib/Target/SystemZ/SystemZMCInstLower.h
index db5bdb0..f6d5ac8 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.h
+++ b/lib/Target/SystemZ/SystemZMCInstLower.h
@@ -10,27 +10,24 @@
#ifndef LLVM_SYSTEMZMCINSTLOWER_H
#define LLVM_SYSTEMZMCINSTLOWER_H
+#include "llvm/MC/MCExpr.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
-class MCContext;
class MCInst;
class MCOperand;
-class MCSymbol;
class MachineInstr;
class MachineOperand;
class Mangler;
class SystemZAsmPrinter;
class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower {
- Mangler *Mang;
MCContext &Ctx;
SystemZAsmPrinter &AsmPrinter;
public:
- SystemZMCInstLower(Mangler *mang, MCContext &ctx,
- SystemZAsmPrinter &asmPrinter);
+ SystemZMCInstLower(MCContext &ctx, SystemZAsmPrinter &asmPrinter);
// Lower MachineInstr MI to MCInst OutMI.
void lower(const MachineInstr *MI, MCInst &OutMI) const;
@@ -38,9 +35,9 @@ public:
// Return an MCOperand for MO.
MCOperand lowerOperand(const MachineOperand& MO) const;
- // Return an MCOperand for MO, given that it equals Symbol + Offset.
- MCOperand lowerSymbolOperand(const MachineOperand &MO,
- const MCSymbol *Symbol, int64_t Offset) const;
+ // Return an MCExpr for symbolic operand MO with variant kind Kind.
+ const MCExpr *getExpr(const MachineOperand &MO,
+ MCSymbolRefExpr::VariantKind Kind) const;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
new file mode 100644
index 0000000..00572d0
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
@@ -0,0 +1,17 @@
+//== SystemZMachineFuctionInfo.cpp - SystemZ machine function info-*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMachineFunctionInfo.h"
+
+using namespace llvm;
+
+
+// pin vtable to this file
+void SystemZMachineFunctionInfo::anchor() {}
+
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 69c2691..845291f 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -15,6 +15,7 @@
namespace llvm {
class SystemZMachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
unsigned LowSavedGPR;
unsigned HighSavedGPR;
unsigned VarArgsFirstGPR;
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 9d79439..3ad146c 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -46,7 +46,8 @@ class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
// address with address size VT. SELF is the name of the operand and
// ASMOP is the associated asm operand.
class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop>
- : ComplexPattern<vt, 1, "selectPCRelAddress", [z_pcrel_wrapper]>,
+ : ComplexPattern<vt, 1, "selectPCRelAddress",
+ [z_pcrel_wrapper, z_pcrel_offset]>,
PCRelOperand<vt, asmop> {
let MIOperandInfo = (ops !cast<Operand>(self));
}
@@ -219,11 +220,6 @@ def uimm8 : Immediate<i8, [{}], UIMM8, "U8Imm">;
// i32 immediates
//===----------------------------------------------------------------------===//
-// Immediates for 8-bit lengths.
-def imm32len8 : Immediate<i32, [{
- return isUInt<8>(N->getZExtValue() - 1);
-}], NOOP_SDNodeXForm, "U32Imm">;
-
// Immediates for the lower and upper 16 bits of an i32, with the other
// bits of the i32 being zero.
def imm32ll16 : Immediate<i32, [{
@@ -338,6 +334,10 @@ def imm64sx8 : Immediate<i64, [{
return isInt<8>(N->getSExtValue());
}], SIMM8, "S8Imm">;
+def imm64zx8 : Immediate<i64, [{
+ return isUInt<8>(N->getSExtValue());
+}], UIMM8, "U8Imm">;
+
def imm64sx16 : Immediate<i64, [{
return isInt<16>(N->getSExtValue());
}], SIMM16, "S16Imm">;
@@ -358,7 +358,7 @@ def imm64zx32n : Immediate<i64, [{
return isUInt<32>(-N->getSExtValue());
}], NEGIMM32, "U32Imm">;
-def imm64 : ImmLeaf<i64, [{}]>;
+def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>;
//===----------------------------------------------------------------------===//
// Floating-point immediates
@@ -396,19 +396,6 @@ def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> {
let DecoderMethod = "decodePC32DBLOperand";
}
-// A PC-relative offset of a global value when the value is used as a
-// call target. The offset is sign-extended and multiplied by 2.
-def pcrel16call : PCRelAddress<i64, "pcrel16call", PCRel16> {
- let PrintMethod = "printCallOperand";
- let EncoderMethod = "getPLT16DBLEncoding";
- let DecoderMethod = "decodePC16DBLOperand";
-}
-def pcrel32call : PCRelAddress<i64, "pcrel32call", PCRel32> {
- let PrintMethod = "printCallOperand";
- let EncoderMethod = "getPLT32DBLEncoding";
- let DecoderMethod = "decodePC32DBLOperand";
-}
-
//===----------------------------------------------------------------------===//
// Addressing modes
//===----------------------------------------------------------------------===//
@@ -435,6 +422,7 @@ def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">;
// <type> is one of:
// shift : base + displacement (32-bit)
// bdaddr : base + displacement
+// mviaddr : like bdaddr, but reject cases with a natural index
// bdxaddr : base + displacement + index
// laaddr : like bdxaddr, but used for Load Address operations
// dynalloc : base + displacement + index + ADJDYNALLOC
@@ -460,6 +448,8 @@ def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">;
def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">;
def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">;
def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">;
+def mviaddr12pair : BDMode <"MVIAddr", "64", "12", "Pair">;
+def mviaddr20pair : BDMode <"MVIAddr", "64", "20", "Pair">;
def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">;
def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">;
def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">;
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 6a3af2b..31cabaa 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -15,6 +15,9 @@ def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>,
SDTCisVT<1, i64>]>;
def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
def SDT_ZCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_ZICmp : SDTypeProfile<0, 3,
+ [SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
def SDT_ZBRCCMask : SDTypeProfile<0, 3,
[SDTCisVT<0, i8>,
SDTCisVT<1, i8>,
@@ -27,6 +30,10 @@ def SDT_ZSelectCCMask : SDTypeProfile<1, 4,
def SDT_ZWrapPtr : SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>,
SDTCisPtrTy<0>]>;
+def SDT_ZWrapOffset : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisPtrTy<0>]>;
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
def SDT_ZExtractAccess : SDTypeProfile<1, 1,
[SDTCisVT<0, i32>,
@@ -54,10 +61,24 @@ def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6,
SDTCisVT<4, i32>,
SDTCisVT<5, i32>,
SDTCisVT<6, i32>]>;
-def SDT_ZCopy : SDTypeProfile<0, 3,
+def SDT_ZMemMemLength : SDTypeProfile<0, 3,
[SDTCisPtrTy<0>,
SDTCisPtrTy<1>,
- SDTCisVT<2, i32>]>;
+ SDTCisVT<2, i64>]>;
+def SDT_ZMemMemLoop : SDTypeProfile<0, 4,
+ [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i64>,
+ SDTCisVT<3, i64>]>;
+def SDT_ZString : SDTypeProfile<1, 3,
+ [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisPtrTy<2>,
+ SDTCisVT<3, i32>]>;
+def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
+def SDT_ZPrefetch : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i8>,
+ SDTCisPtrTy<1>]>;
//===----------------------------------------------------------------------===//
// Node definitions
@@ -76,9 +97,15 @@ def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
+def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
-def z_cmp : SDNode<"SystemZISD::CMP", SDT_ZCmp, [SDNPOutGlue]>;
-def z_ucmp : SDNode<"SystemZISD::UCMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET",
+ SDT_ZWrapOffset, []>;
+def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp, [SDNPOutGlue]>;
+def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp, [SDNPOutGlue]>;
def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
[SDNPHasChain, SDNPInGlue]>;
def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
@@ -109,13 +136,56 @@ def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">;
def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">;
def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>;
-def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZCopy,
+def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_mvc_loop : SDNode<"SystemZISD::MVC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_nc_loop : SDNode<"SystemZISD::NC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_oc_loop : SDNode<"SystemZISD::OC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_xc_loop : SDNode<"SystemZISD::XC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic,
+ [SDNPInGlue]>;
+def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// Pattern fragments
//===----------------------------------------------------------------------===//
+// Signed and unsigned comparisons.
+def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
+ unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ return Type != SystemZICMP::UnsignedOnly;
+}]>;
+def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
+ unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ return Type != SystemZICMP::SignedOnly;
+}]>;
+
+// Register- and memory-based TEST UNDER MASK.
+def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>;
+def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>;
+
// Register sign-extend operations. Sub-32-bit values are represented as i32s.
def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
@@ -130,6 +200,36 @@ def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
+// Extending loads in which the extension type can be signed.
+def asextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+ unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
+ return Type == ISD::EXTLOAD || Type == ISD::SEXTLOAD;
+}]>;
+def asextloadi8 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def asextloadi16 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def asextloadi32 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+// Extending loads in which the extension type can be unsigned.
+def azextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+ unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
+ return Type == ISD::EXTLOAD || Type == ISD::ZEXTLOAD;
+}]>;
+def azextloadi8 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def azextloadi16 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def azextloadi32 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
// Extending loads in which the extension type doesn't matter.
def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
return cast<LoadSDNode>(N)->getExtensionType() != ISD::NON_EXTLOAD;
@@ -150,11 +250,11 @@ class AlignedLoad<SDPatternOperator load>
LoadSDNode *Load = cast<LoadSDNode>(N);
return Load->getAlignment() >= Load->getMemoryVT().getStoreSize();
}]>;
-def aligned_load : AlignedLoad<load>;
-def aligned_sextloadi16 : AlignedLoad<sextloadi16>;
-def aligned_sextloadi32 : AlignedLoad<sextloadi32>;
-def aligned_zextloadi16 : AlignedLoad<zextloadi16>;
-def aligned_zextloadi32 : AlignedLoad<zextloadi32>;
+def aligned_load : AlignedLoad<load>;
+def aligned_asextloadi16 : AlignedLoad<asextloadi16>;
+def aligned_asextloadi32 : AlignedLoad<asextloadi32>;
+def aligned_azextloadi16 : AlignedLoad<azextloadi16>;
+def aligned_azextloadi32 : AlignedLoad<azextloadi32>;
// Aligned stores.
class AlignedStore<SDPatternOperator store>
@@ -189,6 +289,31 @@ def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>;
def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>;
def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>;
+// A store of a load that can be implemented using MVC.
+def mvc_store : PatFrag<(ops node:$value, node:$addr),
+ (unindexedstore node:$value, node:$addr),
+ [{ return storeLoadCanUseMVC(N); }]>;
+
+// Binary read-modify-write operations on memory in which the other
+// operand is also memory and for which block operations like NC can
+// be used. There are two patterns for each operator, depending on
+// which operand contains the "other" load.
+multiclass block_op<SDPatternOperator operator> {
+ def "1" : PatFrag<(ops node:$value, node:$addr),
+ (unindexedstore (operator node:$value,
+ (unindexedload node:$addr)),
+ node:$addr),
+ [{ return storeLoadCanUseBlockBinary(N, 0); }]>;
+ def "2" : PatFrag<(ops node:$value, node:$addr),
+ (unindexedstore (operator (unindexedload node:$addr),
+ node:$value),
+ node:$addr),
+ [{ return storeLoadCanUseBlockBinary(N, 1); }]>;
+}
+defm block_and : block_op<and>;
+defm block_or : block_op<or>;
+defm block_xor : block_op<xor>;
+
// Insertions.
def inserti8 : PatFrag<(ops node:$src1, node:$src2),
(or (and node:$src1, -256), node:$src2)>;
@@ -221,6 +346,16 @@ def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2),
APInt::getLowBitsSet(BitWidth, 8));
}]>;
+// Integer absolute, matching the canonical form generated by DAGCombiner.
+def z_iabs32 : PatFrag<(ops node:$src),
+ (xor (add node:$src, (sra node:$src, (i32 31))),
+ (sra node:$src, (i32 31)))>;
+def z_iabs64 : PatFrag<(ops node:$src),
+ (xor (add node:$src, (sra node:$src, (i32 63))),
+ (sra node:$src, (i32 63)))>;
+def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>;
+def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>;
+
// Fused multiply-add and multiply-subtract, but with the order of the
// operands matching SystemZ's MA and MS instructions.
def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td
index c442ae0..7706351 100644
--- a/lib/Target/SystemZ/SystemZPatterns.td
+++ b/lib/Target/SystemZ/SystemZPatterns.td
@@ -13,7 +13,7 @@ multiclass SXU<SDPatternOperator operator, Instruction insn> {
def : Pat<(operator (sext (i32 GR32:$src))),
(insn GR32:$src)>;
def : Pat<(operator (sext_inreg GR64:$src, i32)),
- (insn (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+ (insn (EXTRACT_SUBREG GR64:$src, subreg_l32))>;
}
// Record that INSN performs a 64-bit version of binary operator OPERATOR
@@ -24,7 +24,7 @@ multiclass SXB<SDPatternOperator operator, RegisterOperand cls,
def : Pat<(operator cls:$src1, (sext GR32:$src2)),
(insn cls:$src1, GR32:$src2)>;
def : Pat<(operator cls:$src1, (sext_inreg GR64:$src2, i32)),
- (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>;
+ (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>;
}
// Like SXB, but for zero extension.
@@ -33,7 +33,7 @@ multiclass ZXB<SDPatternOperator operator, RegisterOperand cls,
def : Pat<(operator cls:$src1, (zext GR32:$src2)),
(insn cls:$src1, GR32:$src2)>;
def : Pat<(operator cls:$src1, (and GR64:$src2, 0xffffffff)),
- (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>;
+ (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>;
}
// Record that INSN performs a binary read-modify-write operation,
@@ -66,22 +66,87 @@ multiclass InsertMem<string type, Instruction insn, RegisterOperand cls,
(insn cls:$src1, mode:$src2)>;
}
-// Use MVC instruction INSN for a load of type LOAD followed by a store
-// of type STORE. VT is the type of the intermediate register and LENGTH
-// is the number of bytes to copy (which may be smaller than VT).
-multiclass MVCLoadStore<SDPatternOperator load, SDPatternOperator store,
- ValueType vt, Instruction insn, bits<5> length> {
- def Pat : PatFrag<(ops node:$dest, node:$src),
- (store (vt (load node:$src)), node:$dest),
- [{ return storeLoadCanUseMVC(N); }]>;
+// INSN stores the low 32 bits of a GPR to a memory with addressing mode MODE.
+// Record that it is equivalent to using OPERATOR to store a GR64.
+class StoreGR64<Instruction insn, SDPatternOperator operator,
+ AddressingMode mode>
+ : Pat<(operator GR64:$R1, mode:$XBD2),
+ (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), mode:$XBD2)>;
- def : Pat<(!cast<SDPatternOperator>(NAME##"Pat") bdaddr12only:$dest,
- bdaddr12only:$src),
+// INSN and INSNY are an RX/RXY pair of instructions that store the low
+// 32 bits of a GPR to memory. Record that they are equivalent to using
+// OPERATOR to store a GR64.
+multiclass StoreGR64Pair<Instruction insn, Instruction insny,
+ SDPatternOperator operator> {
+ def : StoreGR64<insn, operator, bdxaddr12pair>;
+ def : StoreGR64<insny, operator, bdxaddr20pair>;
+}
+
+// INSN stores the low 32 bits of a GPR using PC-relative addressing.
+// Record that it is equivalent to using OPERATOR to store a GR64.
+class StoreGR64PC<Instruction insn, SDPatternOperator operator>
+ : Pat<(operator GR64:$R1, pcrel32:$XBD2),
+ (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), pcrel32:$XBD2)> {
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+// INSN and INSNINV conditionally store the low 32 bits of a GPR to memory,
+// with INSN storing when the condition is true and INSNINV storing when the
+// condition is false. Record that they are equivalent to a LOAD/select/STORE
+// sequence for GR64s.
+multiclass CondStores64<Instruction insn, Instruction insninv,
+ SDPatternOperator store, SDPatternOperator load,
+ AddressingMode mode> {
+ def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr),
+ uimm8zx4:$valid, uimm8zx4:$cc),
+ mode:$addr),
+ (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
+ uimm8zx4:$valid, uimm8zx4:$cc)>;
+ def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new,
+ uimm8zx4:$valid, uimm8zx4:$cc),
+ mode:$addr),
+ (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
+ uimm8zx4:$valid, uimm8zx4:$cc)>;
+}
+
+// Try to use MVC instruction INSN for a load of type LOAD followed by a store
+// of the same size. VT is the type of the intermediate (legalized) value and
+// LENGTH is the number of bytes loaded by LOAD.
+multiclass MVCLoadStore<SDPatternOperator load, ValueType vt, Instruction insn,
+ bits<5> length> {
+ def : Pat<(mvc_store (vt (load bdaddr12only:$src)), bdaddr12only:$dest),
(insn bdaddr12only:$dest, bdaddr12only:$src, length)>;
}
+// Use NC-like instruction INSN for block_op operation OPERATOR.
+// The other operand is a load of type LOAD, which accesses LENGTH bytes.
+// VT is the intermediate legalized type in which the binary operation
+// is actually done.
+multiclass BinaryLoadStore<SDPatternOperator operator, SDPatternOperator load,
+ ValueType vt, Instruction insn, bits<5> length> {
+ def : Pat<(operator (vt (load bdaddr12only:$src)), bdaddr12only:$dest),
+ (insn bdaddr12only:$dest, bdaddr12only:$src, length)>;
+}
+
+// A convenient way of generating all block peepholes for a particular
+// LOAD/VT/LENGTH combination.
+multiclass BlockLoadStore<SDPatternOperator load, ValueType vt,
+ Instruction mvc, Instruction nc, Instruction oc,
+ Instruction xc, bits<5> length> {
+ defm : MVCLoadStore<load, vt, mvc, length>;
+ defm : BinaryLoadStore<block_and1, load, vt, nc, length>;
+ defm : BinaryLoadStore<block_and2, load, vt, nc, length>;
+ defm : BinaryLoadStore<block_or1, load, vt, oc, length>;
+ defm : BinaryLoadStore<block_or2, load, vt, oc, length>;
+ defm : BinaryLoadStore<block_xor1, load, vt, xc, length>;
+ defm : BinaryLoadStore<block_xor2, load, vt, xc, length>;
+}
+
// Record that INSN is a LOAD AND TEST that can be used to compare
// registers in CLS against zero. The instruction has separate R1 and R2
// operands, but they must be the same when the instruction is used like this.
class CompareZeroFP<Instruction insn, RegisterOperand cls>
- : Pat<(z_cmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
+ : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index 7e14aa7..f241fb0 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -31,8 +31,16 @@ def FeatureHighWord : SystemZFeature<
"Assume that the high-word facility is installed"
>;
-def : Processor<"z10", NoItineraries, []>;
-def : Processor<"z196", NoItineraries,
- [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord]>;
+def FeatureFPExtension : SystemZFeature<
+ "fp-extension", "FPExtension",
+ "Assume that the floating-point extension facility is installed"
+>;
+
+def : Processor<"generic", NoItineraries, []>;
+def : Processor<"z10", NoItineraries, []>;
+def : Processor<"z196", NoItineraries,
+ [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
+ FeatureFPExtension]>;
def : Processor<"zEC12", NoItineraries,
- [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord]>;
+ [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
+ FeatureFPExtension]>;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 8ce6d6a..b61ae88 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -42,13 +42,15 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (TFI->hasFP(MF)) {
// R11D is the frame pointer. Reserve all aliases.
Reserved.set(SystemZ::R11D);
- Reserved.set(SystemZ::R11W);
+ Reserved.set(SystemZ::R11L);
+ Reserved.set(SystemZ::R11H);
Reserved.set(SystemZ::R10Q);
}
// R15D is the stack pointer. Reserve all aliases.
Reserved.set(SystemZ::R15D);
- Reserved.set(SystemZ::R15W);
+ Reserved.set(SystemZ::R15L);
+ Reserved.set(SystemZ::R15H);
Reserved.set(SystemZ::R14Q);
return Reserved;
}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index c447e4d..13f45fa 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -22,10 +22,10 @@ namespace SystemZ {
// Return the subreg to use for referring to the even and odd registers
// in a GR128 pair. Is32Bit says whether we want a GR32 or GR64.
inline unsigned even128(bool Is32bit) {
- return Is32bit ? subreg_32bit : subreg_high;
+ return Is32bit ? subreg_hl32 : subreg_h64;
}
inline unsigned odd128(bool Is32bit) {
- return Is32bit ? subreg_low32 : subreg_low;
+ return Is32bit ? subreg_l32 : subreg_l64;
}
}
@@ -48,6 +48,10 @@ public:
LLVM_OVERRIDE {
return true;
}
+ virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const
+ LLVM_OVERRIDE {
+ return true;
+ }
virtual const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0)
const LLVM_OVERRIDE;
virtual BitVector getReservedRegs(const MachineFunction &MF)
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index ffffe72..93d7c83 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -21,11 +21,12 @@ class SystemZRegWithSubregs<string n, list<Register> subregs>
}
let Namespace = "SystemZ" in {
-def subreg_32bit : SubRegIndex<32>; // could also be named "subreg_high32"
-// Indices are used in a variety of ways, so don't set an Offset.
-def subreg_high : SubRegIndex<64, -1>;
-def subreg_low : SubRegIndex<64, -1>;
-def subreg_low32 : ComposedSubRegIndex<subreg_low, subreg_32bit>;
+def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_ll32.
+def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32.
+def subreg_l64 : SubRegIndex<64, 0>;
+def subreg_h64 : SubRegIndex<64, 64>;
+def subreg_hh32 : ComposedSubRegIndex<subreg_h64, subreg_h32>;
+def subreg_hl32 : ComposedSubRegIndex<subreg_h64, subreg_l32>;
}
// Define a register class that contains values of type TYPE and an
@@ -55,36 +56,49 @@ class GPR32<bits<16> num, string n> : SystemZReg<n> {
}
// One of the 16 64-bit general-purpose registers.
-class GPR64<bits<16> num, string n, GPR32 low>
- : SystemZRegWithSubregs<n, [low]> {
+class GPR64<bits<16> num, string n, GPR32 low, GPR32 high>
+ : SystemZRegWithSubregs<n, [low, high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_32bit];
+ let SubRegIndices = [subreg_l32, subreg_h32];
}
// 8 even-odd pairs of GPR64s.
-class GPR128<bits<16> num, string n, GPR64 high, GPR64 low>
- : SystemZRegWithSubregs<n, [high, low]> {
+class GPR128<bits<16> num, string n, GPR64 low, GPR64 high>
+ : SystemZRegWithSubregs<n, [low, high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_high, subreg_low];
+ let SubRegIndices = [subreg_l64, subreg_h64];
}
// General-purpose registers
foreach I = 0-15 in {
- def R#I#W : GPR32<I, "r"#I>;
- def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"W")>, DwarfRegNum<[I]>;
+ def R#I#L : GPR32<I, "r"#I>;
+ def R#I#H : GPR32<I, "r"#I>;
+ def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"L"), !cast<GPR32>("R"#I#"H")>,
+ DwarfRegNum<[I]>;
}
foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in {
- def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#I#"D"),
- !cast<GPR64>("R"#!add(I, 1)#"D")>;
+ def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#!add(I, 1)#"D"),
+ !cast<GPR64>("R"#I#"D")>;
}
/// Allocate the callee-saved R6-R13 backwards. That way they can be saved
/// together with R14 and R15 in one prolog instruction.
-defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uW", 0, 5),
- (sequence "R%uW", 15, 6))>;
-defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5),
- (sequence "R%uD", 15, 6))>;
+defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uL", 0, 5),
+ (sequence "R%uL", 15, 6))>;
+defm GRH32 : SystemZRegClass<"GRH32", i32, 32, (add (sequence "R%uH", 0, 5),
+ (sequence "R%uH", 15, 6))>;
+defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5),
+ (sequence "R%uD", 15, 6))>;
+
+// Combine the low and high GR32s into a single class. This can only be
+// used for virtual registers if the high-word facility is available.
+defm GRX32 : SystemZRegClass<"GRX32", i32, 32,
+ (add (sequence "R%uL", 0, 5),
+ (sequence "R%uH", 0, 5),
+ R15L, R15H, R14L, R14H, R13L, R13H,
+ R12L, R12H, R11L, R11H, R10L, R10H,
+ R9L, R9H, R8L, R8H, R7L, R7H, R6L, R6H)>;
// The architecture doesn't really have any i128 support, so model the
// register pairs as untyped instead.
@@ -94,7 +108,7 @@ defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q,
// Base and index registers. Everything except R0, which in an address
// context evaluates as 0.
-defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0W)>;
+defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0L)>;
defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>;
// Not used directly, but needs to exist for ADDR32 and ADDR64 subregs
@@ -114,14 +128,14 @@ class FPR32<bits<16> num, string n> : SystemZReg<n> {
class FPR64<bits<16> num, string n, FPR32 low>
: SystemZRegWithSubregs<n, [low]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_32bit];
+ let SubRegIndices = [subreg_h32];
}
// 8 pairs of FPR64s, with a one-register gap inbetween.
-class FPR128<bits<16> num, string n, FPR64 high, FPR64 low>
- : SystemZRegWithSubregs<n, [high, low]> {
+class FPR128<bits<16> num, string n, FPR64 low, FPR64 high>
+ : SystemZRegWithSubregs<n, [low, high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_high, subreg_low];
+ let SubRegIndices = [subreg_l64, subreg_h64];
}
// Floating-point registers
@@ -132,8 +146,8 @@ foreach I = 0-15 in {
}
foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
- def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#I#"D"),
- !cast<FPR64>("F"#!add(I, 2)#"D")>;
+ def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#!add(I, 2)#"D"),
+ !cast<FPR64>("F"#I#"D")>;
}
// There's no store-multiple instruction for FPRs, so we're not fussy
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 4ca9292..c7ebb5d 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -25,6 +25,34 @@ SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
}
+// Decide whether it is best to use a loop or straight-line code for
+// a block operation of Size bytes with source address Src and destination
+// address Dest. Sequence is the opcode to use for straight-line code
+// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP).
+// Return the chain for the completed operation.
+static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence,
+ unsigned Loop, SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size) {
+ EVT PtrVT = Src.getValueType();
+ // The heuristic we use is to prefer loops for anything that would
+ // require 7 or more MVCs. With these kinds of sizes there isn't
+ // much to choose between straight-line code and looping code,
+ // since the time will be dominated by the MVCs themselves.
+ // However, the loop has 4 or 5 instructions (depending on whether
+ // the base addresses can be proved equal), so there doesn't seem
+ // much point using a loop for 5 * 256 bytes or fewer. Anything in
+ // the range (5 * 256, 6 * 256) will need another instruction after
+ // the loop, so it doesn't seem worth using a loop then either.
+ // The next value up, 6 * 256, can be implemented in the same
+ // number of straight-line MVCs as 6 * 256 - 1.
+ if (Size > 6 * 256)
+ return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src,
+ DAG.getConstant(Size, PtrVT),
+ DAG.getConstant(Size / 256, PtrVT));
+ return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src,
+ DAG.getConstant(Size, PtrVT));
+}
+
SDValue SystemZSelectionDAGInfo::
EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
@@ -34,14 +62,9 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
if (IsVolatile)
return SDValue();
- if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
- uint64_t Bytes = CSize->getZExtValue();
- if (Bytes >= 1 && Bytes <= 0x100) {
- // A single MVC.
- return DAG.getNode(SystemZISD::MVC, DL, MVT::Other,
- Chain, Dst, Src, Size);
- }
- }
+ if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size))
+ return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
+ Chain, Dst, Src, CSize->getZExtValue());
return SDValue();
}
@@ -65,7 +88,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Dst, SDValue Byte, SDValue Size,
unsigned Align, bool IsVolatile,
MachinePointerInfo DstPtrInfo) const {
- EVT DstVT = Dst.getValueType();
+ EVT PtrVT = Dst.getValueType();
if (IsVolatile)
return SDValue();
@@ -89,8 +112,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
Align, DstPtrInfo);
if (Size2 == 0)
return Chain1;
- Dst = DAG.getNode(ISD::ADD, DL, DstVT, Dst,
- DAG.getConstant(Size1, DstVT));
+ Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
+ DAG.getConstant(Size1, PtrVT));
DstPtrInfo = DstPtrInfo.getWithOffset(Size1);
SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2,
std::min(Align, Size1), DstPtrInfo);
@@ -103,8 +126,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
false, false, Align);
if (Bytes == 1)
return Chain1;
- SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst,
- DAG.getConstant(1, DstVT));
+ SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
+ DAG.getConstant(1, PtrVT));
SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2,
DstPtrInfo.getWithOffset(1),
false, false, 1);
@@ -112,16 +135,159 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
}
}
assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already");
- if (Bytes <= 0x101) {
- // Copy the byte to the first location and then use MVC to copy
- // it to the rest.
- Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
- false, false, Align);
- SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst,
- DAG.getConstant(1, DstVT));
- return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, Chain, Dst2, Dst,
- DAG.getConstant(Bytes - 1, MVT::i32));
- }
+
+ // Handle the special case of a memset of 0, which can use XC.
+ ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte);
+ if (CByte && CByte->getZExtValue() == 0)
+ return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP,
+ Chain, Dst, Dst, Bytes);
+
+ // Copy the byte to the first location and then use MVC to copy
+ // it to the rest.
+ Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
+ false, false, Align);
+ SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
+ DAG.getConstant(1, PtrVT));
+ return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
+ Chain, DstPlus1, Dst, Bytes - 1);
}
return SDValue();
}
+
+// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size),
+// deciding whether to use a loop or straight-line code.
+static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2, uint64_t Size) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ EVT PtrVT = Src1.getValueType();
+ // A two-CLC sequence is a clear win over a loop, not least because it
+ // needs only one branch. A three-CLC sequence needs the same number
+ // of branches as a loop (i.e. 2), but is shorter. That brings us to
+ // lengths greater than 768 bytes. It seems relatively likely that
+ // a difference will be found within the first 768 bytes, so we just
+ // optimize for the smallest number of branch instructions, in order
+ // to avoid polluting the prediction buffer too much. A loop only ever
+ // needs 2 branches, whereas a straight-line sequence would need 3 or more.
+ if (Size > 3 * 256)
+ return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(Size, PtrVT),
+ DAG.getConstant(Size / 256, PtrVT));
+ return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(Size, PtrVT));
+}
+
+// Convert the current CC value into an integer that is 0 if CC == 0,
+// less than zero if CC == 1 and greater than zero if CC >= 2.
+// The sequence starts with IPM, which puts CC into bits 29 and 28
+// of an integer and clears bits 30 and 31.
+static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) {
+ SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+ SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+ DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
+ SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
+ DAG.getConstant(31, MVT::i32));
+ return ROTL;
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2, SDValue Size,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const {
+ if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
+ uint64_t Bytes = CSize->getZExtValue();
+ assert(Bytes > 0 && "Caller should have handled 0-size case");
+ Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes);
+ SDValue Glue = Chain.getValue(1);
+ return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
+ }
+ return std::make_pair(SDValue(), SDValue());
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue Char, SDValue Length,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Use SRST to find the character. End is its address on success.
+ EVT PtrVT = Src.getValueType();
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
+ Length = DAG.getZExtOrTrunc(Length, DL, PtrVT);
+ Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32);
+ Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char,
+ DAG.getConstant(255, MVT::i32));
+ SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length);
+ SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
+ Limit, Src, Char);
+ Chain = End.getValue(1);
+ SDValue Glue = End.getValue(2);
+
+ // Now select between End and null, depending on whether the character
+ // was found.
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(End);
+ Ops.push_back(DAG.getConstant(0, PtrVT));
+ Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32));
+ Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32));
+ Ops.push_back(Glue);
+ VTs = DAG.getVTList(PtrVT, MVT::Glue);
+ End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size());
+ return std::make_pair(End, Chain);
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Dest, SDValue Src,
+ MachinePointerInfo DestPtrInfo,
+ MachinePointerInfo SrcPtrInfo, bool isStpcpy) const {
+ SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other);
+ SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src,
+ DAG.getConstant(0, MVT::i32));
+ return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1));
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const {
+ SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue);
+ SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(0, MVT::i32));
+ Chain = Unused.getValue(1);
+ SDValue Glue = Chain.getValue(2);
+ return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
+}
+
+// Search from Src for a null character, stopping once Src reaches Limit.
+// Return a pair of values, the first being the number of nonnull characters
+// and the second being the out chain.
+//
+// This can be used for strlen by setting Limit to 0.
+static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL,
+ SDValue Chain, SDValue Src,
+ SDValue Limit) {
+ EVT PtrVT = Src.getValueType();
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
+ SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
+ Limit, Src, DAG.getConstant(0, MVT::i32));
+ Chain = End.getValue(1);
+ SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src);
+ return std::make_pair(Len, Chain);
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, MachinePointerInfo SrcPtrInfo) const {
+ EVT PtrVT = Src.getValueType();
+ return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT));
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue MaxLength,
+ MachinePointerInfo SrcPtrInfo) const {
+ EVT PtrVT = Src.getValueType();
+ MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT);
+ SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength);
+ return getBoundedStrlen(DAG, DL, Chain, Src, Limit);
+}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 9138a9c..281d1e2 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -38,7 +38,41 @@ public:
EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL,
SDValue Chain, SDValue Dst, SDValue Byte,
SDValue Size, unsigned Align, bool IsVolatile,
- MachinePointerInfo DstPtrInfo) const;
+ MachinePointerInfo DstPtrInfo) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2, SDValue Size,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue Char, SDValue Length,
+ MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Dest, SDValue Src,
+ MachinePointerInfo DestPtrInfo,
+ MachinePointerInfo SrcPtrInfo,
+ bool isStpcpy) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, MachinePointerInfo SrcPtrInfo) const
+ LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue MaxLength,
+ MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE;
};
}
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
new file mode 100644
index 0000000..537a545
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -0,0 +1,163 @@
+//===-- SystemZShortenInst.cpp - Instruction-shortening pass --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to replace instructions with shorter forms. For example,
+// IILF can be replaced with LLILL or LLILH if the constant fits and if the
+// other 32 bits of the GR64 destination are not live.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-shorten-inst"
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+namespace {
+ class SystemZShortenInst : public MachineFunctionPass {
+ public:
+ static char ID;
+ SystemZShortenInst(const SystemZTargetMachine &tm);
+
+ virtual const char *getPassName() const {
+ return "SystemZ Instruction Shortening";
+ }
+
+ bool processBlock(MachineBasicBlock *MBB);
+ bool runOnMachineFunction(MachineFunction &F);
+
+ private:
+ bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
+ unsigned LLIxL, unsigned LLIxH);
+
+ const SystemZInstrInfo *TII;
+
+ // LowGPRs[I] has bit N set if LLVM register I includes the low
+ // word of GPR N. HighGPRs is the same for the high word.
+ unsigned LowGPRs[SystemZ::NUM_TARGET_REGS];
+ unsigned HighGPRs[SystemZ::NUM_TARGET_REGS];
+ };
+
+ char SystemZShortenInst::ID = 0;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) {
+ return new SystemZShortenInst(TM);
+}
+
+SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)
+ : MachineFunctionPass(ID), TII(0), LowGPRs(), HighGPRs() {
+ // Set up LowGPRs and HighGPRs.
+ for (unsigned I = 0; I < 16; ++I) {
+ LowGPRs[SystemZMC::GR32Regs[I]] |= 1 << I;
+ LowGPRs[SystemZMC::GR64Regs[I]] |= 1 << I;
+ HighGPRs[SystemZMC::GRH32Regs[I]] |= 1 << I;
+ HighGPRs[SystemZMC::GR64Regs[I]] |= 1 << I;
+ if (unsigned GR128 = SystemZMC::GR128Regs[I]) {
+ LowGPRs[GR128] |= 3 << I;
+ HighGPRs[GR128] |= 3 << I;
+ }
+ }
+}
+
+// MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH
+// are the halfword immediate loads for the same word. Try to use one of them
+// instead of IIxF. If MI loads the high word, GPRMap[X] is the set of high
+// words referenced by LLVM register X while LiveOther is the mask of low
+// words that are currently live, and vice versa.
+bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap,
+ unsigned LiveOther, unsigned LLIxL,
+ unsigned LLIxH) {
+ unsigned Reg = MI.getOperand(0).getReg();
+ assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
+ unsigned GPRs = GPRMap[Reg];
+ assert(GPRs != 0 && "Register must be a GPR");
+ if (GPRs & LiveOther)
+ return false;
+
+ uint64_t Imm = MI.getOperand(1).getImm();
+ if (SystemZ::isImmLL(Imm)) {
+ MI.setDesc(TII->get(LLIxL));
+ MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg));
+ return true;
+ }
+ if (SystemZ::isImmLH(Imm)) {
+ MI.setDesc(TII->get(LLIxH));
+ MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg));
+ MI.getOperand(1).setImm(Imm >> 16);
+ return true;
+ }
+ return false;
+}
+
+// Process all instructions in MBB. Return true if something changed.
+bool SystemZShortenInst::processBlock(MachineBasicBlock *MBB) {
+ bool Changed = false;
+
+ // Work out which words are live on exit from the block.
+ unsigned LiveLow = 0;
+ unsigned LiveHigh = 0;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(),
+ LE = (*SI)->livein_end(); LI != LE; ++LI) {
+ unsigned Reg = *LI;
+ assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
+ LiveLow |= LowGPRs[Reg];
+ LiveHigh |= HighGPRs[Reg];
+ }
+ }
+
+ // Iterate backwards through the block looking for instructions to change.
+ for (MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin(),
+ MBBE = MBB->rend(); MBBI != MBBE; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == SystemZ::IILF)
+ Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL,
+ SystemZ::LLILH);
+ else if (Opcode == SystemZ::IIHF)
+ Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL,
+ SystemZ::LLIHH);
+ unsigned UsedLow = 0;
+ unsigned UsedHigh = 0;
+ for (MachineInstr::mop_iterator MOI = MI.operands_begin(),
+ MOE = MI.operands_end(); MOI != MOE; ++MOI) {
+ MachineOperand &MO = *MOI;
+ if (MO.isReg()) {
+ if (unsigned Reg = MO.getReg()) {
+ assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
+ if (MO.isDef()) {
+ LiveLow &= ~LowGPRs[Reg];
+ LiveHigh &= ~HighGPRs[Reg];
+ } else if (!MO.isUndef()) {
+ UsedLow |= LowGPRs[Reg];
+ UsedHigh |= HighGPRs[Reg];
+ }
+ }
+ }
+ }
+ LiveLow |= UsedLow;
+ LiveHigh |= UsedHigh;
+ }
+
+ return Changed;
+}
+
+bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
+ TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo());
+
+ bool Changed = false;
+ for (MachineFunction::iterator MFI = F.begin(), MFE = F.end();
+ MFI != MFE; ++MFI)
+ Changed |= processBlock(MFI);
+
+ return Changed;
+}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index 036ec05..3971d5e 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -9,6 +9,7 @@
#include "SystemZSubtarget.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Host.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#define GET_SUBTARGETINFO_TARGET_DESC
@@ -17,14 +18,22 @@
using namespace llvm;
+// Pin the vtabel to this file.
+void SystemZSubtarget::anchor() {}
+
SystemZSubtarget::SystemZSubtarget(const std::string &TT,
const std::string &CPU,
const std::string &FS)
: SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
- HasLoadStoreOnCond(false), HasHighWord(false), TargetTriple(TT) {
+ HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
+ TargetTriple(TT) {
std::string CPUName = CPU;
if (CPUName.empty())
- CPUName = "z10";
+ CPUName = "generic";
+#if defined(__linux__) && defined(__s390x__)
+ if (CPUName == "generic")
+ CPUName = sys::getHostCPUName();
+#endif
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 4efb58d..5817491 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -26,10 +26,12 @@ class GlobalValue;
class StringRef;
class SystemZSubtarget : public SystemZGenSubtargetInfo {
+ virtual void anchor();
protected:
bool HasDistinctOps;
bool HasLoadStoreOnCond;
bool HasHighWord;
+ bool HasFPExtension;
private:
Triple TargetTriple;
@@ -38,6 +40,9 @@ public:
SystemZSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS);
+ // This is important for reducing register pressure in vector code.
+ virtual bool useAA() const LLVM_OVERRIDE { return true; }
+
// Automatically generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -50,6 +55,9 @@ public:
// Return true if the target has the high-word facility.
bool hasHighWord() const { return HasHighWord; }
+ // Return true if the target has the floating-point extension facility.
+ bool hasFPExtension() const { return HasFPExtension; }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 856183c..dee92e9 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -10,6 +10,7 @@
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -47,12 +48,18 @@ public:
return getTM<SystemZTargetMachine>();
}
+ virtual void addIRPasses() LLVM_OVERRIDE;
virtual bool addInstSelector() LLVM_OVERRIDE;
virtual bool addPreSched2() LLVM_OVERRIDE;
virtual bool addPreEmitPass() LLVM_OVERRIDE;
};
} // end anonymous namespace
+void SystemZPassConfig::addIRPasses() {
+ TargetPassConfig::addIRPasses();
+ addPass(createPartiallyInlineLibCallsPass());
+}
+
bool SystemZPassConfig::addInstSelector() {
addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
return false;
@@ -90,6 +97,8 @@ bool SystemZPassConfig::addPreEmitPass() {
// preventing that would be a win or not.
if (getOptLevel() != CodeGenOpt::None)
addPass(createSystemZElimComparePass(getSystemZTargetMachine()));
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZShortenInstPass(getSystemZTargetMachine()));
addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
return true;
}
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 3d92f29..2190198 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -88,6 +88,14 @@ LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef TD, unsigned AS) {
return wrap(unwrap(TD)->getIntPtrType(getGlobalContext(), AS));
}
+LLVMTypeRef LLVMIntPtrTypeInContext(LLVMContextRef C, LLVMTargetDataRef TD) {
+ return wrap(unwrap(TD)->getIntPtrType(*unwrap(C)));
+}
+
+LLVMTypeRef LLVMIntPtrTypeForASInContext(LLVMContextRef C, LLVMTargetDataRef TD, unsigned AS) {
+ return wrap(unwrap(TD)->getIntPtrType(*unwrap(C), AS));
+}
+
unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
}
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index 8696b57..3e68fe1 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -38,6 +38,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"_ZnwjRKSt9nothrow_t",
"_Znwm",
"_ZnwmRKSt9nothrow_t",
+ "__cospi",
+ "__cospif",
"__cxa_atexit",
"__cxa_guard_abort",
"__cxa_guard_acquire",
@@ -45,6 +47,10 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"__isoc99_scanf",
"__isoc99_sscanf",
"__memcpy_chk",
+ "__sincospi_stret",
+ "__sincospi_stretf",
+ "__sinpi",
+ "__sinpif",
"__sqrt_finite",
"__sqrtf_finite",
"__sqrtl_finite",
@@ -331,6 +337,24 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"write"
};
+static bool hasSinCosPiStret(const Triple &T) {
+ // Only Darwin variants have _stret versions of combined trig functions.
+ if (!T.isMacOSX() && T.getOS() != Triple::IOS)
+ return false;
+
+ // The ABI is rather complicated on x86, so don't do anything special there.
+ if (T.getArch() == Triple::x86)
+ return false;
+
+ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 9))
+ return false;
+
+ if (T.getOS() == Triple::IOS && T.isOSVersionLT(7, 0))
+ return false;
+
+ return true;
+}
+
/// initialize - Initialize the set of available library functions based on the
/// specified target triple. This should be carefully written so that a missing
/// target triple gets a sane set of defaults.
@@ -350,13 +374,22 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
if (T.isMacOSX()) {
if (T.isMacOSXVersionLT(10, 5))
TLI.setUnavailable(LibFunc::memset_pattern16);
- } else if (T.getOS() == Triple::IOS) {
+ } else if (T.isiOS()) {
if (T.isOSVersionLT(3, 0))
TLI.setUnavailable(LibFunc::memset_pattern16);
} else {
TLI.setUnavailable(LibFunc::memset_pattern16);
}
+ if (!hasSinCosPiStret(T)) {
+ TLI.setUnavailable(LibFunc::sinpi);
+ TLI.setUnavailable(LibFunc::sinpif);
+ TLI.setUnavailable(LibFunc::cospi);
+ TLI.setUnavailable(LibFunc::cospif);
+ TLI.setUnavailable(LibFunc::sincospi_stret);
+ TLI.setUnavailable(LibFunc::sincospi_stretf);
+ }
+
if (T.isMacOSX() && T.getArch() == Triple::x86 &&
!T.isMacOSXVersionLT(10, 7)) {
// x86-32 OSX has a scheme where fwrite and fputs (and some other functions
@@ -562,7 +595,7 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
}
// The following functions are available on at least Linux:
- if (T.getOS() != Triple::Linux) {
+ if (!T.isOSLinux()) {
TLI.setUnavailable(LibFunc::dunder_strdup);
TLI.setUnavailable(LibFunc::dunder_strtok_r);
TLI.setUnavailable(LibFunc::dunder_isoc99_scanf);
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index cd810b6..7b8d110 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -97,10 +97,20 @@ static bool IsNullTerminatedString(const Constant *C) {
return false;
}
+/// Return the MCSymbol for the specified global value. This
+/// symbol is the main label that is the address of the global.
+MCSymbol *TargetLoweringObjectFile::getSymbol(Mangler &M,
+ const GlobalValue *GV) const {
+ SmallString<60> NameStr;
+ M.getNameWithPrefix(NameStr, GV, false);
+ return Ctx->GetOrCreateSymbol(NameStr.str());
+}
+
+
MCSymbol *TargetLoweringObjectFile::
getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI) const {
- return Mang->getSymbol(GV);
+ return getSymbol(*Mang, GV);
}
void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
@@ -293,7 +303,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI, unsigned Encoding,
MCStreamer &Streamer) const {
const MCSymbolRefExpr *Ref =
- MCSymbolRefExpr::Create(Mang->getSymbol(GV), getContext());
+ MCSymbolRefExpr::Create(getSymbol(*Mang, GV), getContext());
return getTTypeReference(Ref, Encoding, Streamer);
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index df4a03c..cb42e83 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -164,6 +164,11 @@ CodeGenOpt::Level TargetMachine::getOptLevel() const {
return CodeGenInfo->getOptLevel();
}
+void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const {
+ if (CodeGenInfo)
+ CodeGenInfo->setOptLevel(Level);
+}
+
bool TargetMachine::getAsmVerbosityDefault() {
return AsmVerbosityDefault;
}
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 7419122..3d5f827 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
#include <cassert>
#include <cstdlib>
@@ -60,13 +61,44 @@ inline LLVMTargetRef wrap(const Target * P) {
}
LLVMTargetRef LLVMGetFirstTarget() {
- const Target* target = &*TargetRegistry::begin();
- return wrap(target);
+ if(TargetRegistry::begin() == TargetRegistry::end()) {
+ return NULL;
+ }
+
+ const Target* target = &*TargetRegistry::begin();
+ return wrap(target);
}
LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) {
return wrap(unwrap(T)->getNext());
}
+LLVMTargetRef LLVMGetTargetFromName(const char *Name) {
+ StringRef NameRef = Name;
+ for (TargetRegistry::iterator IT = TargetRegistry::begin(),
+ IE = TargetRegistry::end(); IT != IE; ++IT) {
+ if (IT->getName() == NameRef)
+ return wrap(&*IT);
+ }
+
+ return NULL;
+}
+
+LLVMBool LLVMGetTargetFromTriple(const char* TripleStr, LLVMTargetRef *T,
+ char **ErrorMessage) {
+ std::string Error;
+
+ *T = wrap(TargetRegistry::lookupTarget(TripleStr, Error));
+
+ if (!*T) {
+ if (ErrorMessage)
+ *ErrorMessage = strdup(Error.c_str());
+
+ return 1;
+ }
+
+ return 0;
+}
+
const char * LLVMGetTargetName(LLVMTargetRef T) {
return unwrap(T)->getName();
}
@@ -87,9 +119,10 @@ LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) {
return unwrap(T)->hasMCAsmBackend();
}
-LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple,
- char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc,
- LLVMCodeModel CodeModel) {
+LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
+ const char* Triple, const char* CPU, const char* Features,
+ LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc,
+ LLVMCodeModel CodeModel) {
Reloc::Model RM;
switch (Reloc){
case LLVMRelocStatic:
@@ -158,6 +191,11 @@ LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
return wrap(unwrap(T)->getDataLayout());
}
+void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
+ LLVMBool VerboseAsm) {
+ unwrap(T)->setAsmVerbosityDefault(VerboseAsm);
+}
+
static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) {
TargetMachine* TM = unwrap(T);
@@ -201,11 +239,11 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
std::string error;
raw_fd_ostream dest(Filename, error, sys::fs::F_Binary);
- formatted_raw_ostream destf(dest);
if (!error.empty()) {
*ErrorMessage = strdup(error.c_str());
return true;
}
+ formatted_raw_ostream destf(dest);
bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage);
dest.flush();
return Result;
@@ -225,3 +263,7 @@ LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
Data.length(), "");
return Result;
}
+
+char *LLVMGetDefaultTargetTriple(void) {
+ return strdup(sys::getDefaultTargetTriple().c_str());
+}
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index af0cef6..10e8db5 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/ADT/SmallVector.h"
using namespace llvm;
@@ -22,6 +23,21 @@ TargetSubtargetInfo::TargetSubtargetInfo() {}
TargetSubtargetInfo::~TargetSubtargetInfo() {}
+// Temporary option to compare overall performance change when moving from the
+// SD scheduler to the MachineScheduler pass pipeline. It should be removed
+// before 3.4. The normal way to enable/disable the MachineScheduling pass
+// itself is by using -enable-misched. For targets that already use MI sched
+// (via MySubTarget::enableMachineScheduler()) -misched-bench=false negates the
+// subtarget hook.
+static cl::opt<bool> BenchMachineSched("misched-bench", cl::Hidden,
+ cl::desc("Migrate from the target's default SD scheduler to MI scheduler"));
+
+bool TargetSubtargetInfo::useMachineScheduler() const {
+ if (BenchMachineSched.getNumOccurrences())
+ return BenchMachineSched;
+ return enableMachineScheduler();
+}
+
bool TargetSubtargetInfo::enableMachineScheduler() const {
return false;
}
@@ -35,3 +51,6 @@ bool TargetSubtargetInfo::enablePostRAScheduler(
return false;
}
+bool TargetSubtargetInfo::useAA() const {
+ return false;
+}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index ad83d97..bc8f367 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -80,7 +80,7 @@ private:
PostfixStack.push_back(std::make_pair(Op, Val));
}
- void popOperator() { InfixOperatorStack.pop_back_val(); }
+ void popOperator() { InfixOperatorStack.pop_back(); }
void pushOperator(InfixCalculatorTok Op) {
// Push the new operator if the stack is empty.
if (InfixOperatorStack.empty()) {
@@ -118,12 +118,12 @@ private:
if (StackOp == IC_RPAREN) {
++ParenCount;
- InfixOperatorStack.pop_back_val();
+ InfixOperatorStack.pop_back();
} else if (StackOp == IC_LPAREN) {
--ParenCount;
- InfixOperatorStack.pop_back_val();
+ InfixOperatorStack.pop_back();
} else {
- InfixOperatorStack.pop_back_val();
+ InfixOperatorStack.pop_back();
PostfixStack.push_back(std::make_pair(StackOp, 0));
}
}
@@ -495,16 +495,17 @@ private:
X86Operand *ParseATTOperand();
X86Operand *ParseIntelOperand();
X86Operand *ParseIntelOffsetOfOperator();
- X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
+ bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
X86Operand *ParseIntelOperator(unsigned OpKind);
- X86Operand *ParseIntelMemOperand(unsigned SegReg, int64_t ImmDisp,
- SMLoc StartLoc);
- X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
+ X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
+ X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
+ unsigned Size);
+ bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
int64_t ImmDisp, unsigned Size);
- X86Operand *ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
- InlineAsmIdentifierInfo &Info,
- bool IsUnevaluatedOperand, SMLoc &End);
+ bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
+ InlineAsmIdentifierInfo &Info,
+ bool IsUnevaluatedOperand, SMLoc &End);
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
@@ -555,8 +556,9 @@ private:
/// }
public:
- X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
- : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
+ X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ const MCInstrInfo &MII)
+ : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -814,6 +816,9 @@ struct X86Operand : public MCParsedAsmOperand {
bool isMem256() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 256);
}
+ bool isMem512() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 512);
+ }
bool isMemVX32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
@@ -840,17 +845,36 @@ struct X86Operand : public MCParsedAsmOperand {
getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
}
- bool isMem512() const {
- return Kind == Memory && (!Mem.Size || Mem.Size == 512);
- }
-
bool isAbsMem() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
!getMemIndexReg() && getMemScale() == 1;
}
+ bool isMemOffs8() const {
+ return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+ !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 8);
+ }
+ bool isMemOffs16() const {
+ return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+ !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 16);
+ }
+ bool isMemOffs32() const {
+ return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+ !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 32);
+ }
+ bool isMemOffs64() const {
+ return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+ !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 64);
+ }
+
bool isReg() const { return Kind == Register; }
+ bool isGR32orGR64() const {
+ return Kind == Register &&
+ (X86MCRegisterClasses[X86::GR32RegClassID].contains(getReg()) ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible.
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
@@ -864,53 +888,40 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
- void addImmOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
+ static unsigned getGR32FromGR64(unsigned RegNo) {
+ switch (RegNo) {
+ default: llvm_unreachable("Unexpected register");
+ case X86::RAX: return X86::EAX;
+ case X86::RCX: return X86::ECX;
+ case X86::RDX: return X86::EDX;
+ case X86::RBX: return X86::EBX;
+ case X86::RBP: return X86::EBP;
+ case X86::RSP: return X86::ESP;
+ case X86::RSI: return X86::ESI;
+ case X86::RDI: return X86::EDI;
+ case X86::R8: return X86::R8D;
+ case X86::R9: return X86::R9D;
+ case X86::R10: return X86::R10D;
+ case X86::R11: return X86::R11D;
+ case X86::R12: return X86::R12D;
+ case X86::R13: return X86::R13D;
+ case X86::R14: return X86::R14D;
+ case X86::R15: return X86::R15D;
+ case X86::RIP: return X86::EIP;
+ }
}
- void addMem8Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem16Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem32Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem64Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem80Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem128Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem256Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMemVX32Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMemVY32Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMemVX64Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMemVY64Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addGR32orGR64Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ unsigned RegNo = getReg();
+ if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
+ RegNo = getGR32FromGR64(RegNo);
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
}
- void addMemVZ32Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMemVZ64Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem512Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
}
void addMemOperands(MCInst &Inst, unsigned N) const {
@@ -931,6 +942,15 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
}
+ void addMemOffsOperands(MCInst &Inst, unsigned N) const {
+ assert((N == 1) && "Invalid number of operands!");
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+ }
+
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
@@ -1249,8 +1269,7 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
}
}
-X86Operand *
-X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
+bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
const AsmToken &Tok = Parser.getTok();
bool Done = false;
@@ -1272,7 +1291,7 @@ X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
Done = true;
break;
}
- return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+ return Error(Tok.getLoc(), "unknown token in expression");
}
case AsmToken::EndOfStatement: {
Done = true;
@@ -1291,18 +1310,18 @@ X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
} else {
if (!isParsingInlineAsm()) {
if (getParser().parsePrimaryExpr(Val, End))
- return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+ return Error(Tok.getLoc(), "Unexpected identifier!");
} else {
InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
- if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
- /*Unevaluated*/ false, End))
- return Err;
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/false, End))
+ return true;
}
SM.onIdentifierExpr(Val, Identifier);
UpdateLocLex = false;
break;
}
- return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+ return Error(Tok.getLoc(), "Unexpected identifier!");
}
case AsmToken::Integer:
if (isParsingInlineAsm() && SM.getAddImmPrefix())
@@ -1320,14 +1339,14 @@ X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
case AsmToken::RParen: SM.onRParen(); break;
}
if (SM.hadError())
- return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+ return Error(Tok.getLoc(), "unknown token in expression");
if (!Done && UpdateLocLex) {
End = Tok.getLoc();
Parser.Lex(); // Consume the token.
}
}
- return 0;
+ return false;
}
X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
@@ -1344,8 +1363,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
// may have already parsed an immediate displacement before the bracketed
// expression.
IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
- if (X86Operand *Err = ParseIntelExpression(SM, End))
- return Err;
+ if (ParseIntelExpression(SM, End))
+ return 0;
const MCExpr *Disp;
if (const MCExpr *Sym = SM.getSym()) {
@@ -1363,8 +1382,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
// Parse the dot operator (e.g., [ebx].foo.bar).
if (Tok.getString().startswith(".")) {
const MCExpr *NewDisp;
- if (X86Operand *Err = ParseIntelDotOperator(Disp, NewDisp))
- return Err;
+ if (ParseIntelDotOperator(Disp, NewDisp))
+ return 0;
End = Tok.getEndLoc();
Parser.Lex(); // Eat the field.
@@ -1392,11 +1411,10 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
}
// Inline assembly may use variable names with namespace alias qualifiers.
-X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
- StringRef &Identifier,
- InlineAsmIdentifierInfo &Info,
- bool IsUnevaluatedOperand,
- SMLoc &End) {
+bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
+ StringRef &Identifier,
+ InlineAsmIdentifierInfo &Info,
+ bool IsUnevaluatedOperand, SMLoc &End) {
assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
Val = 0;
@@ -1421,68 +1439,89 @@ X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
- return 0;
+ return false;
}
-/// ParseIntelMemOperand - Parse intel style memory operand.
-X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
- int64_t ImmDisp,
- SMLoc Start) {
- const AsmToken &Tok = Parser.getTok();
- SMLoc End;
-
- unsigned Size = getIntelMemOperandSize(Tok.getString());
- if (Size) {
- Parser.Lex(); // Eat operand size (e.g., byte, word).
- if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
- return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
- Parser.Lex(); // Eat ptr.
- }
-
- // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+/// \brief Parse intel style segment override.
+X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
+ SMLoc Start,
+ unsigned Size) {
+ assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
+ const AsmToken &Tok = Parser.getTok(); // Eat colon.
+ if (Tok.isNot(AsmToken::Colon))
+ return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
+ Parser.Lex(); // Eat ':'
+
+ int64_t ImmDisp = 0;
if (getLexer().is(AsmToken::Integer)) {
+ ImmDisp = Tok.getIntVal();
+ AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
+
if (isParsingInlineAsm())
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
- Tok.getLoc()));
- int64_t ImmDisp = Tok.getIntVal();
- Parser.Lex(); // Eat the integer.
- if (getLexer().isNot(AsmToken::LBrac))
- return ErrorOperand(Start, "Expected '[' token!");
- return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
+ InstInfo->AsmRewrites->push_back(
+ AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
+
+ if (getLexer().isNot(AsmToken::LBrac)) {
+ // An immediate following a 'segment register', 'colon' token sequence can
+ // be followed by a bracketed expression. If it isn't we know we have our
+ // final segment override.
+ const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
+ return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
+ /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
+ Size);
+ }
}
if (getLexer().is(AsmToken::LBrac))
return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
- if (!ParseRegister(SegReg, Start, End)) {
- // Handel SegReg : [ ... ]
- if (getLexer().isNot(AsmToken::Colon))
- return ErrorOperand(Start, "Expected ':' token!");
- Parser.Lex(); // Eat :
- if (getLexer().isNot(AsmToken::LBrac))
- return ErrorOperand(Start, "Expected '[' token!");
- return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
+ const MCExpr *Val;
+ SMLoc End;
+ if (!isParsingInlineAsm()) {
+ if (getParser().parsePrimaryExpr(Val, End))
+ return ErrorOperand(Tok.getLoc(), "unknown token in expression");
+
+ return X86Operand::CreateMem(Val, Start, End, Size);
}
+ InlineAsmIdentifierInfo Info;
+ StringRef Identifier = Tok.getString();
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/false, End))
+ return 0;
+ return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
+ /*Scale=*/1, Start, End, Size, Identifier, Info);
+}
+
+/// ParseIntelMemOperand - Parse intel style memory operand.
+X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
+ unsigned Size) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc End;
+
+ // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+ if (getLexer().is(AsmToken::LBrac))
+ return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
+
const MCExpr *Val;
if (!isParsingInlineAsm()) {
if (getParser().parsePrimaryExpr(Val, End))
- return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+ return ErrorOperand(Tok.getLoc(), "unknown token in expression");
return X86Operand::CreateMem(Val, Start, End, Size);
}
InlineAsmIdentifierInfo Info;
StringRef Identifier = Tok.getString();
- if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
- /*Unevaluated*/ false, End))
- return Err;
- return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/false, End))
+ return 0;
+ return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
/*Scale=*/1, Start, End, Size, Identifier, Info);
}
/// Parse the '.' operator.
-X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
+bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
const MCExpr *&NewDisp) {
const AsmToken &Tok = Parser.getTok();
int64_t OrigDispVal, DotDispVal;
@@ -1491,7 +1530,7 @@ X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
OrigDispVal = OrigDisp->getValue();
else
- return ErrorOperand(Tok.getLoc(), "Non-constant offsets are not supported!");
+ return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
// Drop the '.'.
StringRef DotDispStr = Tok.getString().drop_front(1);
@@ -1506,10 +1545,10 @@ X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
DotDisp))
- return ErrorOperand(Tok.getLoc(), "Unable to lookup field reference!");
+ return Error(Tok.getLoc(), "Unable to lookup field reference!");
DotDispVal = DotDisp;
} else
- return ErrorOperand(Tok.getLoc(), "Unexpected token type!");
+ return Error(Tok.getLoc(), "Unexpected token type!");
if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
@@ -1520,7 +1559,7 @@ X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
}
NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
- return 0;
+ return false;
}
/// Parse the 'offset' operator. This operator is used to specify the
@@ -1534,9 +1573,9 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
InlineAsmIdentifierInfo Info;
SMLoc Start = Tok.getLoc(), End;
StringRef Identifier = Tok.getString();
- if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
- /*Unevaluated*/ false, End))
- return Err;
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/false, End))
+ return 0;
// Don't emit the offset operator.
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
@@ -1570,9 +1609,12 @@ X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
InlineAsmIdentifierInfo Info;
SMLoc Start = Tok.getLoc(), End;
StringRef Identifier = Tok.getString();
- if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
- /*Unevaluated*/ true, End))
- return Err;
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/true, End))
+ return 0;
+
+ if (!Info.OpDecl)
+ return ErrorOperand(Start, "unable to lookup expression");
unsigned CVal = 0;
switch(OpKind) {
@@ -1593,7 +1635,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
X86Operand *X86AsmParser::ParseIntelOperand() {
const AsmToken &Tok = Parser.getTok();
- SMLoc Start = Tok.getLoc(), End;
+ SMLoc Start, End;
// Offset, length, type and size operators.
if (isParsingInlineAsm()) {
@@ -1608,14 +1650,23 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
return ParseIntelOperator(IOK_TYPE);
}
+ unsigned Size = getIntelMemOperandSize(Tok.getString());
+ if (Size) {
+ Parser.Lex(); // Eat operand size (e.g., byte, word).
+ if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
+ return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
+ Parser.Lex(); // Eat ptr.
+ }
+ Start = Tok.getLoc();
+
// Immediate.
if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
getLexer().is(AsmToken::LParen)) {
AsmToken StartTok = Tok;
IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
/*AddImmPrefix=*/false);
- if (X86Operand *Err = ParseIntelExpression(SM, End))
- return Err;
+ if (ParseIntelExpression(SM, End))
+ return 0;
int64_t Imm = SM.getImm();
if (isParsingInlineAsm()) {
@@ -1639,23 +1690,22 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
"before bracketed expr.");
// Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
- return ParseIntelMemOperand(/*SegReg=*/0, Imm, Start);
+ return ParseIntelMemOperand(Imm, Start, Size);
}
// Register.
unsigned RegNo = 0;
if (!ParseRegister(RegNo, Start, End)) {
// If this is a segment register followed by a ':', then this is the start
- // of a memory reference, otherwise this is a normal register reference.
+ // of a segment override, otherwise this is a normal register reference.
if (getLexer().isNot(AsmToken::Colon))
return X86Operand::CreateReg(RegNo, Start, End);
- getParser().Lex(); // Eat the colon.
- return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
+ return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
}
// Memory operand.
- return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
+ return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
}
X86Operand *X86AsmParser::ParseATTOperand() {
@@ -1967,6 +2017,47 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
}
}
+ if (STI.getFeatureBits() & X86::FeatureAVX512) {
+ // Parse mask register {%k1}
+ if (getLexer().is(AsmToken::LCurly)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(X86Operand::CreateToken("{", Loc));
+ Parser.Lex(); // Eat the {
+ if (X86Operand *Op = ParseOperand()) {
+ Operands.push_back(Op);
+ if (!getLexer().is(AsmToken::RCurly)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "Expected } at this point");
+ }
+ Loc = Parser.getTok().getLoc();
+ Operands.push_back(X86Operand::CreateToken("}", Loc));
+ Parser.Lex(); // Eat the }
+ } else {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+ }
+ // Parse "zeroing non-masked" semantic {z}
+ if (getLexer().is(AsmToken::LCurly)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(X86Operand::CreateToken("{z}", Loc));
+ Parser.Lex(); // Eat the {
+ if (!getLexer().is(AsmToken::Identifier) || getLexer().getTok().getIdentifier() != "z") {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "Expected z at this point");
+ }
+ Parser.Lex(); // Eat the z
+ if (!getLexer().is(AsmToken::RCurly)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "Expected } at this point");
+ }
+ Parser.Lex(); // Eat the }
+ }
+ }
+
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
@@ -2218,6 +2309,55 @@ processInstruction(MCInst &Inst,
case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
+ case X86::VMOVAPDrr:
+ case X86::VMOVAPDYrr:
+ case X86::VMOVAPSrr:
+ case X86::VMOVAPSYrr:
+ case X86::VMOVDQArr:
+ case X86::VMOVDQAYrr:
+ case X86::VMOVDQUrr:
+ case X86::VMOVDQUYrr:
+ case X86::VMOVUPDrr:
+ case X86::VMOVUPDYrr:
+ case X86::VMOVUPSrr:
+ case X86::VMOVUPSYrr: {
+ if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
+ !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
+ return false;
+
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ }
+ Inst.setOpcode(NewOpc);
+ return true;
+ }
+ case X86::VMOVSDrr:
+ case X86::VMOVSSrr: {
+ if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
+ !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
+ return false;
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
+ case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
+ }
+ Inst.setOpcode(NewOpc);
+ return true;
+ }
}
}
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 82af6fa..903e36c 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -231,16 +231,18 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
default:
break;
case 1:
- type = TYPE_MOFFS8;
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
break;
case 2:
- type = TYPE_MOFFS16;
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
break;
case 4:
- type = TYPE_MOFFS32;
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
break;
case 8:
- type = TYPE_MOFFS64;
break;
}
}
@@ -263,16 +265,18 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
Opcode != X86::VINSERTPSrr)
- type = TYPE_MOFFS8;
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
break;
case ENCODING_IW:
- type = TYPE_MOFFS16;
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
break;
case ENCODING_ID:
- type = TYPE_MOFFS32;
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
break;
case ENCODING_IO:
- type = TYPE_MOFFS64;
break;
}
}
@@ -292,30 +296,21 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
case TYPE_REL8:
isBranch = true;
pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
- // fall through to sign extend the immediate if needed.
- case TYPE_MOFFS8:
if(immediate & 0x80)
immediate |= ~(0xffull);
break;
- case TYPE_MOFFS16:
- if(immediate & 0x8000)
- immediate |= ~(0xffffull);
- break;
case TYPE_REL32:
case TYPE_REL64:
isBranch = true;
pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
- // fall through to sign extend the immediate if needed.
- case TYPE_MOFFS32:
if(immediate & 0x80000000)
immediate |= ~(0xffffffffull);
break;
- case TYPE_MOFFS64:
default:
// operand is 64 bits wide. Do nothing.
break;
}
-
+
if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
insn.immediateOffset, insn.immediateSize,
mcInst, Dis))
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index bb195ee..c81a857 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -25,8 +25,6 @@
#define TRUE 1
#define FALSE 0
-typedef int8_t bool;
-
#ifndef NDEBUG
#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
#else
@@ -81,6 +79,15 @@ static int modRMRequired(OpcodeType type,
case THREEBYTE_A7:
decision = &THREEBYTEA7_SYM;
break;
+ case XOP8_MAP:
+ decision = &XOP8_MAP_SYM;
+ break;
+ case XOP9_MAP:
+ decision = &XOP9_MAP_SYM;
+ break;
+ case XOPA_MAP:
+ decision = &XOPA_MAP_SYM;
+ break;
}
return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
@@ -122,6 +129,15 @@ static InstrUID decode(OpcodeType type,
case THREEBYTE_A7:
dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
+ case XOP8_MAP:
+ dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case XOP9_MAP:
+ dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case XOPA_MAP:
+ dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
}
switch (dec->modrm_type) {
@@ -305,6 +321,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
BOOL prefixGroups[4] = { FALSE };
uint64_t prefixLocation;
uint8_t byte = 0;
+ uint8_t nextByte;
BOOL hasAdSize = FALSE;
BOOL hasOpSize = FALSE;
@@ -314,20 +331,21 @@ static int readPrefixes(struct InternalInstruction* insn) {
while (isPrefix) {
prefixLocation = insn->readerCursor;
+ /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
if (consumeByte(insn, &byte))
- return -1;
+ break;
/*
* If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
* break and let it be disassembled as a normal "instruction".
*/
+ if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
+ break;
+
if (insn->readerCursor - 1 == insn->startLocation
- && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) {
- uint8_t nextByte;
- if (byte == 0xf0)
- break;
- if (lookAtByte(insn, &nextByte))
- return -1;
+ && (byte == 0xf2 || byte == 0xf3)
+ && !lookAtByte(insn, &nextByte))
+ {
/*
* If the byte is 0xf2 or 0xf3, and any of the following conditions are
* met:
@@ -426,7 +444,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
dbgprintf(insn, "Found prefix 0x%hhx", byte);
}
- insn->vexSize = 0;
+ insn->vexXopType = TYPE_NO_VEX_XOP;
if (byte == 0xc4) {
uint8_t byte1;
@@ -437,7 +455,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
}
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
- insn->vexSize = 3;
+ insn->vexXopType = TYPE_VEX_3B;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
else {
@@ -445,22 +463,22 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
- if (insn->vexSize == 3) {
- insn->vexPrefix[0] = byte;
- consumeByte(insn, &insn->vexPrefix[1]);
- consumeByte(insn, &insn->vexPrefix[2]);
+ if (insn->vexXopType == TYPE_VEX_3B) {
+ insn->vexXopPrefix[0] = byte;
+ consumeByte(insn, &insn->vexXopPrefix[1]);
+ consumeByte(insn, &insn->vexXopPrefix[2]);
/* We simulate the REX prefix for simplicity's sake */
if (insn->mode == MODE_64BIT) {
insn->rexPrefix = 0x40
- | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
- | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
- | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
- | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+ | (wFromVEX3of3(insn->vexXopPrefix[2]) << 3)
+ | (rFromVEX2of3(insn->vexXopPrefix[1]) << 2)
+ | (xFromVEX2of3(insn->vexXopPrefix[1]) << 1)
+ | (bFromVEX2of3(insn->vexXopPrefix[1]) << 0);
}
- switch (ppFromVEX3of3(insn->vexPrefix[2]))
+ switch (ppFromVEX3of3(insn->vexXopPrefix[2]))
{
default:
break;
@@ -469,7 +487,9 @@ static int readPrefixes(struct InternalInstruction* insn) {
break;
}
- dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
+ insn->vexXopPrefix[0], insn->vexXopPrefix[1],
+ insn->vexXopPrefix[2]);
}
}
else if (byte == 0xc5) {
@@ -481,22 +501,22 @@ static int readPrefixes(struct InternalInstruction* insn) {
}
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
- insn->vexSize = 2;
+ insn->vexXopType = TYPE_VEX_2B;
}
else {
unconsumeByte(insn);
}
- if (insn->vexSize == 2) {
- insn->vexPrefix[0] = byte;
- consumeByte(insn, &insn->vexPrefix[1]);
+ if (insn->vexXopType == TYPE_VEX_2B) {
+ insn->vexXopPrefix[0] = byte;
+ consumeByte(insn, &insn->vexXopPrefix[1]);
if (insn->mode == MODE_64BIT) {
insn->rexPrefix = 0x40
- | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+ | (rFromVEX2of2(insn->vexXopPrefix[1]) << 2);
}
- switch (ppFromVEX2of2(insn->vexPrefix[1]))
+ switch (ppFromVEX2of2(insn->vexXopPrefix[1]))
{
default:
break;
@@ -505,7 +525,53 @@ static int readPrefixes(struct InternalInstruction* insn) {
break;
}
- dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexXopPrefix[0], insn->vexXopPrefix[1]);
+ }
+ }
+ else if (byte == 0x8f) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of XOP");
+ return -1;
+ }
+
+ if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
+ insn->vexXopType = TYPE_XOP;
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+
+ if (insn->vexXopType == TYPE_XOP) {
+ insn->vexXopPrefix[0] = byte;
+ consumeByte(insn, &insn->vexXopPrefix[1]);
+ consumeByte(insn, &insn->vexXopPrefix[2]);
+
+ /* We simulate the REX prefix for simplicity's sake */
+
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (wFromXOP3of3(insn->vexXopPrefix[2]) << 3)
+ | (rFromXOP2of3(insn->vexXopPrefix[1]) << 2)
+ | (xFromXOP2of3(insn->vexXopPrefix[1]) << 1)
+ | (bFromXOP2of3(insn->vexXopPrefix[1]) << 0);
+ }
+
+ switch (ppFromXOP3of3(insn->vexXopPrefix[2]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
+ }
+
+ dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
+ insn->vexXopPrefix[0], insn->vexXopPrefix[1],
+ insn->vexXopPrefix[2]);
}
}
else {
@@ -580,37 +646,49 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->opcodeType = ONEBYTE;
- if (insn->vexSize == 3)
+ if (insn->vexXopType == TYPE_VEX_3B)
{
- switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
+ switch (mmmmmFromVEX2of3(insn->vexXopPrefix[1]))
{
default:
- dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
+ dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
+ mmmmmFromVEX2of3(insn->vexXopPrefix[1]));
return -1;
- case 0:
- break;
case VEX_LOB_0F:
- insn->twoByteEscape = 0x0f;
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
case VEX_LOB_0F38:
- insn->twoByteEscape = 0x0f;
- insn->threeByteEscape = 0x38;
insn->opcodeType = THREEBYTE_38;
return consumeByte(insn, &insn->opcode);
case VEX_LOB_0F3A:
- insn->twoByteEscape = 0x0f;
- insn->threeByteEscape = 0x3a;
insn->opcodeType = THREEBYTE_3A;
return consumeByte(insn, &insn->opcode);
}
}
- else if (insn->vexSize == 2)
+ else if (insn->vexXopType == TYPE_VEX_2B)
{
- insn->twoByteEscape = 0x0f;
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
}
+ else if (insn->vexXopType == TYPE_XOP)
+ {
+ switch (mmmmmFromXOP2of3(insn->vexXopPrefix[1]))
+ {
+ default:
+ dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
+ mmmmmFromVEX2of3(insn->vexXopPrefix[1]));
+ return -1;
+ case XOP_MAP_SELECT_8:
+ insn->opcodeType = XOP8_MAP;
+ return consumeByte(insn, &insn->opcode);
+ case XOP_MAP_SELECT_9:
+ insn->opcodeType = XOP9_MAP;
+ return consumeByte(insn, &insn->opcode);
+ case XOP_MAP_SELECT_A:
+ insn->opcodeType = XOPA_MAP;
+ return consumeByte(insn, &insn->opcode);
+ }
+ }
if (consumeByte(insn, &current))
return -1;
@@ -618,16 +696,12 @@ static int readOpcode(struct InternalInstruction* insn) {
if (current == 0x0f) {
dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
- insn->twoByteEscape = current;
-
if (consumeByte(insn, &current))
return -1;
if (current == 0x38) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
- insn->threeByteEscape = current;
-
if (consumeByte(insn, &current))
return -1;
@@ -635,8 +709,6 @@ static int readOpcode(struct InternalInstruction* insn) {
} else if (current == 0x3a) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
- insn->threeByteEscape = current;
-
if (consumeByte(insn, &current))
return -1;
@@ -644,8 +716,6 @@ static int readOpcode(struct InternalInstruction* insn) {
} else if (current == 0xa6) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
- insn->threeByteEscape = current;
-
if (consumeByte(insn, &current))
return -1;
@@ -653,8 +723,6 @@ static int readOpcode(struct InternalInstruction* insn) {
} else if (current == 0xa7) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
- insn->threeByteEscape = current;
-
if (consumeByte(insn, &current))
return -1;
@@ -768,11 +836,27 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
if (insn->mode == MODE_64BIT)
attrMask |= ATTR_64BIT;
- if (insn->vexSize) {
+ if (insn->vexXopType != TYPE_NO_VEX_XOP) {
attrMask |= ATTR_VEX;
- if (insn->vexSize == 3) {
- switch (ppFromVEX3of3(insn->vexPrefix[2])) {
+ if (insn->vexXopType == TYPE_VEX_3B) {
+ switch (ppFromVEX3of3(insn->vexXopPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX3of3(insn->vexXopPrefix[2]))
+ attrMask |= ATTR_VEXL;
+ }
+ else if (insn->vexXopType == TYPE_VEX_2B) {
+ switch (ppFromVEX2of2(insn->vexXopPrefix[1])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
break;
@@ -784,11 +868,11 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
break;
}
- if (lFromVEX3of3(insn->vexPrefix[2]))
+ if (lFromVEX2of2(insn->vexXopPrefix[1]))
attrMask |= ATTR_VEXL;
}
- else if (insn->vexSize == 2) {
- switch (ppFromVEX2of2(insn->vexPrefix[1])) {
+ else if (insn->vexXopType == TYPE_XOP) {
+ switch (ppFromXOP3of3(insn->vexXopPrefix[2])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
break;
@@ -800,7 +884,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
break;
}
- if (lFromVEX2of2(insn->vexPrefix[1]))
+ if (lFromXOP3of3(insn->vexXopPrefix[2]))
attrMask |= ATTR_VEXL;
}
else {
@@ -826,42 +910,6 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
/* The following clauses compensate for limitations of the tables. */
- if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
- !(attrMask & ATTR_OPSIZE)) {
- /*
- * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
- * has precedence since there are no L-bit with W-bit entries in the tables.
- * So if the L-bit isn't significant we should use the W-bit instead.
- * We only need to do this if the instruction doesn't specify OpSize since
- * there is a VEX_L_W_OPSIZE table.
- */
-
- const struct InstructionSpecifier *spec;
- uint16_t instructionIDWithWBit;
- const struct InstructionSpecifier *specWithWBit;
-
- spec = specifierForUID(instructionID);
-
- if (getIDWithAttrMask(&instructionIDWithWBit,
- insn,
- (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
- insn->instructionID = instructionID;
- insn->spec = spec;
- return 0;
- }
-
- specWithWBit = specifierForUID(instructionIDWithWBit);
-
- if (instructionID != instructionIDWithWBit) {
- insn->instructionID = instructionIDWithWBit;
- insn->spec = specWithWBit;
- } else {
- insn->instructionID = instructionID;
- insn->spec = spec;
- }
- return 0;
- }
-
if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
/*
* The instruction tables make no distinction between instructions that
@@ -1502,10 +1550,12 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
static int readVVVV(struct InternalInstruction* insn) {
dbgprintf(insn, "readVVVV()");
- if (insn->vexSize == 3)
- insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
- else if (insn->vexSize == 2)
- insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
+ if (insn->vexXopType == TYPE_VEX_3B)
+ insn->vvvv = vvvvFromVEX3of3(insn->vexXopPrefix[2]);
+ else if (insn->vexXopType == TYPE_VEX_2B)
+ insn->vvvv = vvvvFromVEX2of2(insn->vexXopPrefix[1]);
+ else if (insn->vexXopType == TYPE_XOP)
+ insn->vvvv = vvvvFromXOP3of3(insn->vexXopPrefix[2]);
else
return -1;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index dcb6aad..6d03d5c 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -59,6 +59,15 @@ extern "C" {
#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
#define ppFromVEX2of2(vex) ((vex) & 0x3)
+#define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)
+#define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)
+#define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)
+#define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)
+#define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)
+#define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
+#define ppFromXOP3of3(xop) ((xop) & 0x3)
+
/*
* These enums represent Intel registers for use by the decoder.
*/
@@ -447,6 +456,12 @@ typedef enum {
VEX_LOB_0F3A = 0x3
} VEXLeadingOpcodeByte;
+typedef enum {
+ XOP_MAP_SELECT_8 = 0x8,
+ XOP_MAP_SELECT_9 = 0x9,
+ XOP_MAP_SELECT_A = 0xA
+} XOPMapSelect;
+
/*
* VEXPrefixCode - Possible values for the VEX.pp field
*/
@@ -458,6 +473,13 @@ typedef enum {
VEX_PREFIX_F2 = 0x3
} VEXPrefixCode;
+typedef enum {
+ TYPE_NO_VEX_XOP = 0x0,
+ TYPE_VEX_2B = 0x1,
+ TYPE_VEX_3B = 0x2,
+ TYPE_XOP = 0x3
+} VEXXOPType;
+
typedef uint8_t BOOL;
/*
@@ -514,10 +536,10 @@ struct InternalInstruction {
uint8_t prefixPresent[0x100];
/* contains the location (for use with the reader) of the prefix byte */
uint64_t prefixLocations[0x100];
- /* The value of the VEX prefix, if present */
- uint8_t vexPrefix[3];
+ /* The value of the VEX/XOP prefix, if present */
+ uint8_t vexXopPrefix[3];
/* The length of the VEX prefix (0 if not present) */
- uint8_t vexSize;
+ VEXXOPType vexXopType;
/* The value of the REX prefix, if present */
uint8_t rexPrefix;
/* The location where a mandatory prefix would have to be (i.e., right before
@@ -541,10 +563,6 @@ struct InternalInstruction {
/* opcode state */
- /* The value of the two-byte escape prefix (usually 0x0f) */
- uint8_t twoByteEscape;
- /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
- uint8_t threeByteEscape;
/* The last byte of the opcode, not counting any ModR/M extension */
uint8_t opcode;
/* The ModR/M byte of the instruction, if it is an opcode extension */
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index d291441..dd1719c 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -32,6 +32,9 @@
#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
#define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes
#define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes
+#define XOP8_MAP_SYM x86DisassemblerXOP8Opcodes
+#define XOP9_MAP_SYM x86DisassemblerXOP9Opcodes
+#define XOPA_MAP_SYM x86DisassemblerXOPAOpcodes
#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
#define CONTEXTS_STR "x86DisassemblerContexts"
@@ -41,6 +44,9 @@
#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
#define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes"
#define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes"
+#define XOP8_MAP_STR "x86DisassemblerXOP8Opcodes"
+#define XOP9_MAP_STR "x86DisassemblerXOP9Opcodes"
+#define XOPA_MAP_STR "x86DisassemblerXOPAOpcodes"
/*
* Attributes of an instruction that must be known before the opcode can be
@@ -116,10 +122,10 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\
ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") \
- ENUM_ENTRY(IC_VEX_L_W, 3, "requires VEX, L and W") \
- ENUM_ENTRY(IC_VEX_L_W_XS, 4, "requires VEX, L, W and XS prefix") \
- ENUM_ENTRY(IC_VEX_L_W_XD, 4, "requires VEX, L, W and XD prefix") \
- ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 4, "requires VEX, L, W and OpSize") \
+ ENUM_ENTRY(IC_VEX_L_W, 4, "requires VEX, L and W") \
+ ENUM_ENTRY(IC_VEX_L_W_XS, 5, "requires VEX, L, W and XS prefix") \
+ ENUM_ENTRY(IC_VEX_L_W_XD, 5, "requires VEX, L, W and XD prefix") \
+ ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") \
ENUM_ENTRY(IC_EVEX, 1, "requires an EVEX prefix") \
ENUM_ENTRY(IC_EVEX_XS, 2, "requires EVEX and the XS prefix") \
ENUM_ENTRY(IC_EVEX_XD, 2, "requires EVEX and the XD prefix") \
@@ -215,7 +221,55 @@ enum attributeBits {
ENUM_ENTRY(IC_EVEX_L2_W_K_B, 3, "requires EVEX_B, EVEX_K, L2 and W") \
ENUM_ENTRY(IC_EVEX_L2_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XS prefix") \
ENUM_ENTRY(IC_EVEX_L2_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XD prefix") \
- ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and OpSize")
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_KZ_B, 1, "requires EVEX_B and EVEX_KZ prefix") \
+ ENUM_ENTRY(IC_EVEX_XS_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XS prefix") \
+ ENUM_ENTRY(IC_EVEX_XD_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XD prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the OpSize prefix") \
+ ENUM_ENTRY(IC_EVEX_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the W prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L prefix") \
+ ENUM_ENTRY(IC_EVEX_L_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ, L and W") \
+ ENUM_ENTRY(IC_EVEX_L_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L2 prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L2 and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L2 and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ, L2 and W") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_KZ, 1, "requires an EVEX_KZ prefix") \
+ ENUM_ENTRY(IC_EVEX_XS_KZ, 2, "requires EVEX_KZ and the XS prefix") \
+ ENUM_ENTRY(IC_EVEX_XD_KZ, 2, "requires EVEX_KZ and the XD prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_KZ, 2, "requires EVEX_KZ and the OpSize prefix") \
+ ENUM_ENTRY(IC_EVEX_W_KZ, 3, "requires EVEX_KZ and the W prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS_KZ, 4, "requires EVEX_KZ, W, and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD_KZ, 4, "requires EVEX_KZ, W, and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ, 4, "requires EVEX_KZ, W, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_KZ, 3, "requires EVEX_KZ and the L prefix") \
+ ENUM_ENTRY(IC_EVEX_L_XS_KZ, 4, "requires EVEX_KZ and the L and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L_XD_KZ, 4, "requires EVEX_KZ and the L and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L_OPSIZE_KZ, 4, "requires EVEX_KZ, L, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_W_KZ, 3, "requires EVEX_KZ, L and W") \
+ ENUM_ENTRY(IC_EVEX_L_W_XS_KZ, 4, "requires EVEX_KZ, L, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_XD_KZ, 4, "requires EVEX_KZ, L, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_KZ, 3, "requires EVEX_KZ and the L2 prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_XS_KZ, 4, "requires EVEX_KZ and the L2 and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_XD_KZ, 4, "requires EVEX_KZ and the L2 and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
#define ENUM_ENTRY(n, r, d) n,
typedef enum {
@@ -234,7 +288,10 @@ typedef enum {
THREEBYTE_38 = 2,
THREEBYTE_3A = 3,
THREEBYTE_A6 = 4,
- THREEBYTE_A7 = 5
+ THREEBYTE_A7 = 5,
+ XOP8_MAP = 6,
+ XOP9_MAP = 7,
+ XOPA_MAP = 8
} OpcodeType;
/*
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index b9d0082..4439311 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -215,3 +215,19 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
O << markup(">");
}
+
+void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &DispSpec = MI->getOperand(Op);
+
+ O << markup("<mem:");
+
+ if (DispSpec.isImm()) {
+ O << formatImm(DispSpec.getImm());
+ } else {
+ assert(DispSpec.isExpr() && "non-immediate displacement?");
+ O << *DispSpec.getExpr();
+ }
+
+ O << markup(">");
+}
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 8d05256..a8fab72 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -42,7 +42,8 @@ public:
void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
-
+ void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+
void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
@@ -86,6 +87,19 @@ public:
void printf512mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
+
+ void printMemOffs8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemOffset(MI, OpNo, O);
+ }
+ void printMemOffs16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemOffset(MI, OpNo, O);
+ }
+ void printMemOffs32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemOffset(MI, OpNo, O);
+ }
+ void printMemOffs64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemOffset(MI, OpNo, O);
+ }
};
}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 9dfc9a9..e7e7b15 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -200,3 +200,19 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
O << ']';
}
+
+void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &DispSpec = MI->getOperand(Op);
+
+ O << '[';
+
+ if (DispSpec.isImm()) {
+ O << formatImm(DispSpec.getImm());
+ } else {
+ assert(DispSpec.isExpr() && "non-immediate displacement?");
+ O << *DispSpec.getExpr();
+ }
+
+ O << ']';
+}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 45beeda..590bf68 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -39,7 +39,8 @@ public:
void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O);
void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
+ void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "opaque ptr ";
printMemReference(MI, OpNo, O);
@@ -97,6 +98,23 @@ public:
O << "zmmword ptr ";
printMemReference(MI, OpNo, O);
}
+
+ void printMemOffs8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "byte ptr ";
+ printMemOffset(MI, OpNo, O);
+ }
+ void printMemOffs16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "word ptr ";
+ printMemOffset(MI, OpNo, O);
+ }
+ void printMemOffs32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "dword ptr ";
+ printMemOffset(MI, OpNo, O);
+ }
+ void printMemOffs64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "qword ptr ";
+ printMemOffset(MI, OpNo, O);
+ }
};
}
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 598ddee..f8e359b 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -19,10 +20,10 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -67,9 +68,16 @@ public:
class X86AsmBackend : public MCAsmBackend {
StringRef CPU;
+ bool HasNopl;
public:
X86AsmBackend(const Target &T, StringRef _CPU)
- : MCAsmBackend(), CPU(_CPU) {}
+ : MCAsmBackend(), CPU(_CPU) {
+ HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
+ CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
+ CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
+ CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" &&
+ CPU != "c3" && CPU != "c3-2";
+ }
unsigned getNumFixupKinds() const {
return X86::NumTargetFixupKinds;
@@ -308,8 +316,8 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
// This CPU doesnt support long nops. If needed add more.
// FIXME: Can we get this from the subtarget somehow?
- if (CPU == "generic" || CPU == "i386" || CPU == "i486" || CPU == "i586" ||
- CPU == "pentium" || CPU == "pentium-mmx" || CPU == "geode") {
+ // FIXME: We could generated something better than plain 0x90.
+ if (!HasNopl) {
for (uint64_t i = 0; i < Count; ++i)
OW->Write8(0x90);
return true;
@@ -334,6 +342,7 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
/* *** */
namespace {
+
class ELFX86AsmBackend : public X86AsmBackend {
public:
uint8_t OSABI;
@@ -382,35 +391,368 @@ public:
}
};
+namespace CU {
+
+ /// Compact unwind encoding values.
+ enum CompactUnwindEncodings {
+ /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
+ /// the return address, then [RE]SP is moved to [RE]BP.
+ UNWIND_MODE_BP_FRAME = 0x01000000,
+
+ /// A frameless function with a small constant stack size.
+ UNWIND_MODE_STACK_IMMD = 0x02000000,
+
+ /// A frameless function with a large constant stack size.
+ UNWIND_MODE_STACK_IND = 0x03000000,
+
+ /// No compact unwind encoding is available.
+ UNWIND_MODE_DWARF = 0x04000000,
+
+ /// Mask for encoding the frame registers.
+ UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
+
+ /// Mask for encoding the frameless registers.
+ UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
+ };
+
+} // end CU namespace
+
class DarwinX86AsmBackend : public X86AsmBackend {
+ const MCRegisterInfo &MRI;
+
+ /// \brief Number of registers that can be saved in a compact unwind encoding.
+ enum { CU_NUM_SAVED_REGS = 6 };
+
+ mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
+ bool Is64Bit;
+
+ unsigned OffsetSize; ///< Offset of a "push" instruction.
+ unsigned PushInstrSize; ///< Size of a "push" instruction.
+ unsigned MoveInstrSize; ///< Size of a "move" instruction.
+ unsigned StackDivide; ///< Amount to adjust stack stize by.
+protected:
+ /// \brief Implementation of algorithm to generate the compact unwind encoding
+ /// for the CFI instructions.
+ uint32_t
+ generateCompactUnwindEncodingImpl(ArrayRef<MCCFIInstruction> Instrs) const {
+ if (Instrs.empty()) return 0;
+
+ // Reset the saved registers.
+ unsigned SavedRegIdx = 0;
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+
+ bool HasFP = false;
+
+ // Encode that we are using EBP/RBP as the frame pointer.
+ uint32_t CompactUnwindEncoding = 0;
+
+ unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
+ unsigned InstrOffset = 0;
+ unsigned StackAdjust = 0;
+ unsigned StackSize = 0;
+ unsigned PrevStackSize = 0;
+ unsigned NumDefCFAOffsets = 0;
+
+ for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
+ const MCCFIInstruction &Inst = Instrs[i];
+
+ switch (Inst.getOperation()) {
+ default:
+ // Any other CFI directives indicate a frame that we aren't prepared
+ // to represent via compact unwind, so just bail out.
+ return 0;
+ case MCCFIInstruction::OpDefCfaRegister: {
+ // Defines a frame pointer. E.g.
+ //
+ // movq %rsp, %rbp
+ // L0:
+ // .cfi_def_cfa_register %rbp
+ //
+ HasFP = true;
+ assert(MRI.getLLVMRegNum(Inst.getRegister(), true) ==
+ (Is64Bit ? X86::RBP : X86::EBP) && "Invalid frame pointer!");
+
+ // Reset the counts.
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+ StackAdjust = 0;
+ SavedRegIdx = 0;
+ InstrOffset += MoveInstrSize;
+ break;
+ }
+ case MCCFIInstruction::OpDefCfaOffset: {
+ // Defines a new offset for the CFA. E.g.
+ //
+ // With frame:
+ //
+ // pushq %rbp
+ // L0:
+ // .cfi_def_cfa_offset 16
+ //
+ // Without frame:
+ //
+ // subq $72, %rsp
+ // L0:
+ // .cfi_def_cfa_offset 80
+ //
+ PrevStackSize = StackSize;
+ StackSize = std::abs(Inst.getOffset()) / StackDivide;
+ ++NumDefCFAOffsets;
+ break;
+ }
+ case MCCFIInstruction::OpOffset: {
+ // Defines a "push" of a callee-saved register. E.g.
+ //
+ // pushq %r15
+ // pushq %r14
+ // pushq %rbx
+ // L0:
+ // subq $120, %rsp
+ // L1:
+ // .cfi_offset %rbx, -40
+ // .cfi_offset %r14, -32
+ // .cfi_offset %r15, -24
+ //
+ if (SavedRegIdx == CU_NUM_SAVED_REGS)
+ // If there are too many saved registers, we cannot use a compact
+ // unwind encoding.
+ return CU::UNWIND_MODE_DWARF;
+
+ unsigned Reg = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ SavedRegs[SavedRegIdx++] = Reg;
+ StackAdjust += OffsetSize;
+ InstrOffset += PushInstrSize;
+ break;
+ }
+ }
+ }
+
+ StackAdjust /= StackDivide;
+
+ if (HasFP) {
+ if ((StackAdjust & 0xFF) != StackAdjust)
+ // Offset was too big for a compact unwind encoding.
+ return CU::UNWIND_MODE_DWARF;
+
+ // Get the encoding of the saved registers when we have a frame pointer.
+ uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
+ if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
+
+ CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
+ CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
+ CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
+ } else {
+ // If the amount of the stack allocation is the size of a register, then
+ // we "push" the RAX/EAX register onto the stack instead of adjusting the
+ // stack pointer with a SUB instruction. We don't support the push of the
+ // RAX/EAX register with compact unwind. So we check for that situation
+ // here.
+ if ((NumDefCFAOffsets == SavedRegIdx + 1 &&
+ StackSize - PrevStackSize == 1) ||
+ (Instrs.size() == 1 && NumDefCFAOffsets == 1 && StackSize == 2))
+ return CU::UNWIND_MODE_DWARF;
+
+ SubtractInstrIdx += InstrOffset;
+ ++StackAdjust;
+
+ if ((StackSize & 0xFF) == StackSize) {
+ // Frameless stack with a small stack size.
+ CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
+
+ // Encode the stack size.
+ CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
+ } else {
+ if ((StackAdjust & 0x7) != StackAdjust)
+ // The extra stack adjustments are too big for us to handle.
+ return CU::UNWIND_MODE_DWARF;
+
+ // Frameless stack with an offset too large for us to encode compactly.
+ CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
+
+ // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
+ // instruction.
+ CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
+
+ // Encode any extra stack stack adjustments (done via push
+ // instructions).
+ CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
+ }
+
+ // Encode the number of registers saved. (Reverse the list first.)
+ std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
+ CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
+
+ // Get the encoding of the saved registers when we don't have a frame
+ // pointer.
+ uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
+ if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
+
+ // Encode the register encoding.
+ CompactUnwindEncoding |=
+ RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
+ }
+
+ return CompactUnwindEncoding;
+ }
+
+private:
+ /// \brief Get the compact unwind number for a given register. The number
+ /// corresponds to the enum lists in compact_unwind_encoding.h.
+ int getCompactUnwindRegNum(unsigned Reg) const {
+ static const uint16_t CU32BitRegs[7] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const uint16_t CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const uint16_t *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
+ for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
+ if (*CURegs == Reg)
+ return Idx;
+
+ return -1;
+ }
+
+ /// \brief Return the registers encoded for a compact encoding with a frame
+ /// pointer.
+ uint32_t encodeCompactUnwindRegistersWithFrame() const {
+ // Encode the registers in the order they were saved --- 3-bits per
+ // register. The list of saved registers is assumed to be in reverse
+ // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
+ uint32_t RegEnc = 0;
+ for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
+ unsigned Reg = SavedRegs[i];
+ if (Reg == 0) break;
+
+ int CURegNum = getCompactUnwindRegNum(Reg);
+ if (CURegNum == -1) return ~0U;
+
+ // Encode the 3-bit register number in order, skipping over 3-bits for
+ // each register.
+ RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
+ }
+
+ assert((RegEnc & 0x3FFFF) == RegEnc &&
+ "Invalid compact register encoding!");
+ return RegEnc;
+ }
+
+ /// \brief Create the permutation encoding used with frameless stacks. It is
+ /// passed the number of registers to be saved and an array of the registers
+ /// saved.
+ uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
+ // The saved registers are numbered from 1 to 6. In order to encode the
+ // order in which they were saved, we re-number them according to their
+ // place in the register order. The re-numbering is relative to the last
+ // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
+ // that order:
+ //
+ // Orig Re-Num
+ // ---- ------
+ // 6 6
+ // 2 2
+ // 4 3
+ // 5 3
+ //
+ for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
+ int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
+ if (CUReg == -1) return ~0U;
+ SavedRegs[i] = CUReg;
+ }
+
+ // Reverse the list.
+ std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
+
+ uint32_t RenumRegs[CU_NUM_SAVED_REGS];
+ for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
+ unsigned Countless = 0;
+ for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
+ if (SavedRegs[j] < SavedRegs[i])
+ ++Countless;
+
+ RenumRegs[i] = SavedRegs[i] - Countless - 1;
+ }
+
+ // Take the renumbered values and encode them into a 10-bit number.
+ uint32_t permutationEncoding = 0;
+ switch (RegCount) {
+ case 6:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 5:
+ permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
+ + 6 * RenumRegs[3] + 2 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 4:
+ permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
+ + 3 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 3:
+ permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 2:
+ permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 1:
+ permutationEncoding |= RenumRegs[5];
+ break;
+ }
+
+ assert((permutationEncoding & 0x3FF) == permutationEncoding &&
+ "Invalid compact register encoding!");
+ return permutationEncoding;
+ }
+
public:
- DarwinX86AsmBackend(const Target &T, StringRef CPU)
- : X86AsmBackend(T, CPU) { }
+ DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef CPU,
+ bool Is64Bit)
+ : X86AsmBackend(T, CPU), MRI(MRI), Is64Bit(Is64Bit) {
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+ OffsetSize = Is64Bit ? 8 : 4;
+ MoveInstrSize = Is64Bit ? 3 : 2;
+ StackDivide = Is64Bit ? 8 : 4;
+ PushInstrSize = 1;
+ }
};
class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
+ bool SupportsCU;
public:
- DarwinX86_32AsmBackend(const Target &T, StringRef CPU)
- : DarwinX86AsmBackend(T, CPU) {}
+ DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef CPU, bool SupportsCU)
+ : DarwinX86AsmBackend(T, MRI, CPU, false), SupportsCU(SupportsCU) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
- object::mach::CTM_i386,
- object::mach::CSX86_ALL);
+ MachO::CPU_TYPE_I386,
+ MachO::CPU_SUBTYPE_I386_ALL);
+ }
+
+ /// \brief Generate the compact unwind encoding for the CFI instructions.
+ virtual uint32_t
+ generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const {
+ return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0;
}
};
class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
+ bool SupportsCU;
+ const MachO::CPUSubTypeX86 Subtype;
public:
- DarwinX86_64AsmBackend(const Target &T, StringRef CPU)
- : DarwinX86AsmBackend(T, CPU) {
+ DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef CPU, bool SupportsCU,
+ MachO::CPUSubTypeX86 st)
+ : DarwinX86AsmBackend(T, MRI, CPU, true), SupportsCU(SupportsCU),
+ Subtype(st) {
HasReliableSymbolDifference = true;
}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
- object::mach::CTM_x86_64,
- object::mach::CSX86_ALL);
+ MachO::CPU_TYPE_X86_64, Subtype);
}
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -445,15 +787,26 @@ public:
return false;
}
}
+
+ /// \brief Generate the compact unwind encoding for the CFI instructions.
+ virtual uint32_t
+ generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const {
+ return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0;
+ }
};
} // end anonymous namespace
-MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
+ StringRef CPU) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
- return new DarwinX86_32AsmBackend(T, CPU);
+ return new DarwinX86_32AsmBackend(T, MRI, CPU,
+ TheTriple.isMacOSX() &&
+ !TheTriple.isMacOSXVersionLT(10, 7));
if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
return new WindowsX86AsmBackend(T, false, CPU);
@@ -462,11 +815,21 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT, String
return new ELFX86_32AsmBackend(T, OSABI, CPU);
}
-MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
+ StringRef CPU) {
Triple TheTriple(TT);
- if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
- return new DarwinX86_64AsmBackend(T, CPU);
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+ MachO::CPUSubTypeX86 CS =
+ StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
+ .Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H)
+ .Default(MachO::CPU_SUBTYPE_X86_64_ALL);
+ return new DarwinX86_64AsmBackend(T, MRI, CPU,
+ TheTriple.isMacOSX() &&
+ !TheTriple.isMacOSXVersionLT(10, 7), CS);
+ }
if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
return new WindowsX86AsmBackend(T, true, CPU);
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 25d1af3..1ef9814 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -354,6 +354,9 @@ namespace X86II {
// XOP9 - Prefix to exclude use of imm byte.
XOP9 = 21 << Op0Shift,
+ // XOPA - Prefix to encode 0xA in VEX.MMMM of XOP instructions.
+ XOPA = 22 << Op0Shift,
+
//===------------------------------------------------------------------===//
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
// They are used to specify GPRs and SSE registers, 64-bit operand size,
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index b400b87..3ddd865 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -108,6 +108,15 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
case MCSymbolRefExpr::VK_None:
Type = ELF::R_X86_64_64;
break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_X86_64_GOT64;
+ break;
+ case MCSymbolRefExpr::VK_GOTOFF:
+ Type = ELF::R_X86_64_GOTOFF64;
+ break;
+ case MCSymbolRefExpr::VK_TPOFF:
+ Type = ELF::R_X86_64_TPOFF64;
+ break;
case MCSymbolRefExpr::VK_DTPOFF:
Type = ELF::R_X86_64_DTPOFF64;
break;
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index 8f4ab46..a3eb4fb 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -13,7 +13,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCRelocationInfo.h"
-#include "llvm/Object/ELF.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/ELF.h"
using namespace llvm;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 7815ae9..3861e1c 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -59,10 +59,8 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
// for .S files on other systems. Perhaps this is because the file system
// wasn't always case preserving or something.
CommentString = "##";
- PCSymbol = ".";
SupportsDebugInformation = true;
- DwarfUsesInlineInfoSection = true;
UseDataRegionDirectives = MarkedJTDataRegions;
// Exceptions handling
@@ -92,8 +90,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
TextAlignFillValue = 0x90;
PrivateGlobalPrefix = ".L";
- WeakRefDirective = "\t.weak\t";
- PCSymbol = ".";
// Set up DWARF directives
HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
@@ -139,6 +135,8 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
+
+ AllowAtInName = true;
}
void X86MCAsmInfoGNUCOFF::anchor() { }
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index b6b70fd..80979dd 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -17,6 +17,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmInfoCOFF.h"
#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class Triple;
@@ -35,7 +36,7 @@ namespace llvm {
MCStreamer &Streamer) const;
};
- class X86ELFMCAsmInfo : public MCAsmInfo {
+ class X86ELFMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit X86ELFMCAsmInfo(const Triple &Triple);
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 8515879..7952607 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -564,7 +564,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
unsigned char VEX_W = 0;
// XOP: Use XOP prefix byte 0x8f instead of VEX.
- unsigned char XOP = 0;
+ bool XOP = false;
// VEX_5M (VEX m-mmmmm field):
//
@@ -574,7 +574,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// 0b00011: implied 0F 3A leading opcode bytes
// 0b00100-0b11111: Reserved for future use
// 0b01000: XOP map select - 08h instructions with imm byte
- // 0b10001: XOP map select - 09h instructions with no imm byte
+ // 0b01001: XOP map select - 09h instructions with no imm byte
+ // 0b01010: XOP map select - 0Ah instructions with imm dword
unsigned char VEX_5M = 0x1;
// VEX_4V (VEX vvvv field): a register specifier
@@ -620,7 +621,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_W = 1;
if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
- XOP = 1;
+ XOP = true;
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
VEX_L = 1;
@@ -665,11 +666,11 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::XOP9:
VEX_5M = 0x9;
break;
- case X86II::A6: // Bypass: Not used by VEX
- case X86II::A7: // Bypass: Not used by VEX
- case X86II::TB: // Bypass: Not used by VEX
- case 0:
- break; // No prefix!
+ case X86II::XOPA:
+ VEX_5M = 0xA;
+ break;
+ case X86II::TB: // VEX_5M/VEX_PP already correct
+ break;
}
@@ -786,8 +787,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_4V = getVEXRegisterEncoding(MI, CurOp);
if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg()))
EVEX_V2 = 0x0;
+ CurOp++;
}
- CurOp++;
if (HasEVEX_K)
EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++);
@@ -868,11 +869,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::MRM6r: case X86II::MRM7r:
// MRM0r-MRM7r instructions forms:
// dst(VEX_4V), src(ModR/M), imm8
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
- if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg()))
- EVEX_V2 = 0x0;
- CurOp++;
-
+ if (HasVEX_4V) {
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg()))
+ EVEX_V2 = 0x0;
+ CurOp++;
+ }
if (HasEVEX_K)
EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++);
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index bd23ce4..1cbdafd 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -368,7 +368,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll);
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+ return createELFStreamer(Ctx, 0, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
}
static MCInstPrinter *createX86MCInstPrinter(const Target &T,
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 2f459b4..41ae435 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -79,8 +79,10 @@ MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT, StringRef CPU);
-MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU);
+MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
+MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index 75b5acf..209b1d0 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -17,7 +17,7 @@
using namespace llvm;
using namespace object;
-using namespace macho;
+using namespace MachO;
namespace {
class X86_64MachORelocationInfo : public MCRelocationInfo {
@@ -33,7 +33,7 @@ public:
StringRef SymName; SymI->getName(SymName);
uint64_t SymAddr; SymI->getAddress(SymAddr);
- RelocationEntry RE = Obj->getRelocation(Rel.getRawDataRefImpl());
+ any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl());
bool isPCRel = Obj->getAnyRelocationPCRel(RE);
MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName);
@@ -43,44 +43,44 @@ public:
const MCExpr *Expr = 0;
switch(RelType) {
- case RIT_X86_64_TLV:
+ case X86_64_RELOC_TLV:
Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
break;
- case RIT_X86_64_Signed4:
+ case X86_64_RELOC_SIGNED_4:
Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx),
MCConstantExpr::Create(4, Ctx),
Ctx);
break;
- case RIT_X86_64_Signed2:
+ case X86_64_RELOC_SIGNED_2:
Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx),
MCConstantExpr::Create(2, Ctx),
Ctx);
break;
- case RIT_X86_64_Signed1:
+ case X86_64_RELOC_SIGNED_1:
Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx),
MCConstantExpr::Create(1, Ctx),
Ctx);
break;
- case RIT_X86_64_GOTLoad:
+ case X86_64_RELOC_GOT_LOAD:
Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
break;
- case RIT_X86_64_GOT:
+ case X86_64_RELOC_GOT:
Expr = MCSymbolRefExpr::Create(Sym, isPCRel ?
MCSymbolRefExpr::VK_GOTPCREL :
MCSymbolRefExpr::VK_GOT,
Ctx);
break;
- case RIT_X86_64_Subtractor:
+ case X86_64_RELOC_SUBTRACTOR:
{
RelocationRef RelNext;
Obj->getRelocationNext(Rel.getRawDataRefImpl(), RelNext);
- RelocationEntry RENext = Obj->getRelocation(RelNext.getRawDataRefImpl());
+ any_relocation_info RENext = Obj->getRelocation(RelNext.getRawDataRefImpl());
// X86_64_SUBTRACTOR must be followed by a relocation of type
- // X86_64_RELOC_UNSIGNED .
+ // X86_64_RELOC_UNSIGNED.
// NOTE: Scattered relocations don't exist on x86_64.
unsigned RType = Obj->getAnyRelocationType(RENext);
- if (RType != RIT_X86_64_Unsigned)
+ if (RType != X86_64_RELOC_UNSIGNED)
report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
"X86_64_RELOC_SUBTRACTOR.");
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 64f005c..eb7c0b1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -16,12 +16,11 @@
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MachO.h"
using namespace llvm;
-using namespace llvm::object;
namespace {
class X86MachObjectWriter : public MCMachObjectTargetWriter {
@@ -132,7 +131,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
if (Target.isAbsolute()) { // constant
// SymbolNum of 0 indicates the absolute section.
- Type = macho::RIT_X86_64_Unsigned;
+ Type = MachO::X86_64_RELOC_UNSIGNED;
Index = 0;
// FIXME: I believe this is broken, I don't think the linker can understand
@@ -141,26 +140,31 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
// is to use an absolute symbol (which we don't support yet).
if (IsPCRel) {
IsExtern = 1;
- Type = macho::RIT_X86_64_Branch;
+ Type = MachO::X86_64_RELOC_BRANCH;
}
} else if (Target.getSymB()) { // A - B + constant
const MCSymbol *A = &Target.getSymA()->getSymbol();
+ if (A->isTemporary())
+ A = &A->AliasedSymbol();
MCSymbolData &A_SD = Asm.getSymbolData(*A);
const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
const MCSymbol *B = &Target.getSymB()->getSymbol();
+ if (B->isTemporary())
+ B = &B->AliasedSymbol();
MCSymbolData &B_SD = Asm.getSymbolData(*B);
const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
// Neither symbol can be modified.
if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
- report_fatal_error("unsupported relocation of modified symbol");
+ report_fatal_error("unsupported relocation of modified symbol", false);
// We don't support PCrel relocations of differences. Darwin 'as' doesn't
// implement most of these correctly.
if (IsPCRel)
- report_fatal_error("unsupported pc-relative relocation of difference");
+ report_fatal_error("unsupported pc-relative relocation of difference",
+ false);
// The support for the situation where one or both of the symbols would
// require a local relocation is handled just like if the symbols were
@@ -173,7 +177,13 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
// single SIGNED relocation); reject it for now. Except the case where both
// symbols don't have a base, equal but both NULL.
if (A_Base == B_Base && A_Base)
- report_fatal_error("unsupported relocation with identical base");
+ report_fatal_error("unsupported relocation with identical base", false);
+
+ // A subtraction expression where both symbols are undefined is a
+ // non-relocatable expression.
+ if (A->isUndefined() && B->isUndefined())
+ report_fatal_error("unsupported relocation with subtraction expression",
+ false);
Value += Writer->getSymbolAddress(&A_SD, Layout) -
(A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout));
@@ -188,15 +198,15 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
IsExtern = 0;
}
- Type = macho::RIT_X86_64_Unsigned;
-
- macho::RelocationEntry MRE;
- MRE.Word0 = FixupOffset;
- MRE.Word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
+ Type = MachO::X86_64_RELOC_UNSIGNED;
+
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = FixupOffset;
+ MRE.r_word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
Writer->addRelocation(Fragment->getParent(), MRE);
if (B_Base) {
@@ -207,7 +217,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
IsExtern = 0;
}
- Type = macho::RIT_X86_64_Subtractor;
+ Type = MachO::X86_64_RELOC_SUBTRACTOR;
} else {
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
MCSymbolData &SD = Asm.getSymbolData(*Symbol);
@@ -252,11 +262,11 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
return;
} else {
report_fatal_error("unsupported relocation of variable '" +
- Symbol->getName() + "'");
+ Symbol->getName() + "'", false);
}
} else {
report_fatal_error("unsupported relocation of undefined symbol '" +
- Symbol->getName() + "'");
+ Symbol->getName() + "'", false);
}
MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
@@ -267,15 +277,16 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
// rewrite the movq to an leaq at link time if the symbol ends up in
// the same linkage unit.
if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
- Type = macho::RIT_X86_64_GOTLoad;
+ Type = MachO::X86_64_RELOC_GOT_LOAD;
else
- Type = macho::RIT_X86_64_GOT;
+ Type = MachO::X86_64_RELOC_GOT;
} else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
- Type = macho::RIT_X86_64_TLV;
+ Type = MachO::X86_64_RELOC_TLV;
} else if (Modifier != MCSymbolRefExpr::VK_None) {
- report_fatal_error("unsupported symbol modifier in relocation");
+ report_fatal_error("unsupported symbol modifier in relocation",
+ false);
} else {
- Type = macho::RIT_X86_64_Signed;
+ Type = MachO::X86_64_RELOC_SIGNED;
// The Darwin x86_64 relocation format has a problem where it cannot
// encode an address (L<foo> + <constant>) which is outside the atom
@@ -292,34 +303,40 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
// (the additional bias), but instead appear to just look at the final
// offset.
switch (-(Target.getConstant() + (1LL << Log2Size))) {
- case 1: Type = macho::RIT_X86_64_Signed1; break;
- case 2: Type = macho::RIT_X86_64_Signed2; break;
- case 4: Type = macho::RIT_X86_64_Signed4; break;
+ case 1: Type = MachO::X86_64_RELOC_SIGNED_1; break;
+ case 2: Type = MachO::X86_64_RELOC_SIGNED_2; break;
+ case 4: Type = MachO::X86_64_RELOC_SIGNED_4; break;
}
}
} else {
if (Modifier != MCSymbolRefExpr::VK_None)
report_fatal_error("unsupported symbol modifier in branch "
- "relocation");
+ "relocation", false);
- Type = macho::RIT_X86_64_Branch;
+ Type = MachO::X86_64_RELOC_BRANCH;
}
} else {
if (Modifier == MCSymbolRefExpr::VK_GOT) {
- Type = macho::RIT_X86_64_GOT;
+ Type = MachO::X86_64_RELOC_GOT;
} else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
// GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
// case all we do is set the PCrel bit in the relocation entry; this is
// used with exception handling, for example. The source is required to
// include any necessary offset directly.
- Type = macho::RIT_X86_64_GOT;
+ Type = MachO::X86_64_RELOC_GOT;
IsPCRel = 1;
} else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
- report_fatal_error("TLVP symbol modifier should have been rip-rel");
+ report_fatal_error("TLVP symbol modifier should have been rip-rel",
+ false);
} else if (Modifier != MCSymbolRefExpr::VK_None)
- report_fatal_error("unsupported symbol modifier in relocation");
- else
- Type = macho::RIT_X86_64_Unsigned;
+ report_fatal_error("unsupported symbol modifier in relocation", false);
+ else {
+ Type = MachO::X86_64_RELOC_UNSIGNED;
+ unsigned Kind = Fixup.getKind();
+ if (Kind == X86::reloc_signed_4byte)
+ report_fatal_error("32-bit absolute addressing is not supported in "
+ "64-bit mode", false);
+ }
}
}
@@ -327,13 +344,13 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
FixedValue = Value;
// struct relocation_info (8 bytes)
- macho::RelocationEntry MRE;
- MRE.Word0 = FixupOffset;
- MRE.Word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = FixupOffset;
+ MRE.r_word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
Writer->addRelocation(Fragment->getParent(), MRE);
}
@@ -347,7 +364,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
- unsigned Type = macho::RIT_Vanilla;
+ unsigned Type = MachO::GENERIC_RELOC_VANILLA;
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -355,7 +372,8 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
if (!A_SD->getFragment())
report_fatal_error("symbol '" + A->getName() +
- "' can not be undefined in a subtraction expression");
+ "' can not be undefined in a subtraction expression",
+ false);
uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
@@ -367,22 +385,23 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
if (!B_SD->getFragment())
report_fatal_error("symbol '" + B->getSymbol().getName() +
- "' can not be undefined in a subtraction expression");
+ "' can not be undefined in a subtraction expression",
+ false);
// Select the appropriate difference relocation type.
//
// Note that there is no longer any semantic difference between these two
// relocation types from the linkers point of view, this is done solely for
// pedantic compatibility with 'as'.
- Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
- (unsigned)macho::RIT_Generic_LocalDifference;
+ Type = A_SD->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF :
+ (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF;
Value2 = Writer->getSymbolAddress(B_SD, Layout);
FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
}
// Relocations are written out in reverse order, so the PAIR comes first.
- if (Type == macho::RIT_Difference ||
- Type == macho::RIT_Generic_LocalDifference) {
+ if (Type == MachO::GENERIC_RELOC_SECTDIFF ||
+ Type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF) {
// If the offset is too large to fit in a scattered relocation,
// we're hosed. It's an unfortunate limitation of the MachO format.
if (FixupOffset > 0xffffff) {
@@ -396,13 +415,13 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
llvm_unreachable("fatal error returned?!");
}
- macho::RelocationEntry MRE;
- MRE.Word0 = ((0 << 0) |
- (macho::RIT_Pair << 24) |
- (Log2Size << 28) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value2;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((0 << 0) | // r_address
+ (MachO::GENERIC_RELOC_PAIR << 24) | // r_type
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
Writer->addRelocation(Fragment->getParent(), MRE);
} else {
// If the offset is more than 24-bits, it won't fit in a scattered
@@ -416,13 +435,13 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
return false;
}
- macho::RelocationEntry MRE;
- MRE.Word0 = ((FixupOffset << 0) |
- (Type << 24) |
- (Log2Size << 28) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value;
Writer->addRelocation(Fragment->getParent(), MRE);
return true;
}
@@ -464,13 +483,13 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
}
// struct relocation_info (8 bytes)
- macho::RelocationEntry MRE;
- MRE.Word0 = Value;
- MRE.Word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (1 << 27) | // Extern
- (macho::RIT_Generic_TLV << 28)); // Type
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = Value;
+ MRE.r_word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (1 << 27) | // r_extern
+ (MachO::GENERIC_RELOC_TLV << 28)); // r_type
Writer->addRelocation(Fragment->getParent(), MRE);
}
@@ -530,7 +549,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
//
// FIXME: Currently, these are never generated (see code below). I cannot
// find a case where they are actually emitted.
- Type = macho::RIT_Vanilla;
+ Type = MachO::GENERIC_RELOC_VANILLA;
} else {
// Resolve constant variables.
if (SD->getSymbol().isVariable()) {
@@ -561,17 +580,17 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
if (IsPCRel)
FixedValue -= Writer->getSectionAddress(Fragment->getParent());
- Type = macho::RIT_Vanilla;
+ Type = MachO::GENERIC_RELOC_VANILLA;
}
// struct relocation_info (8 bytes)
- macho::RelocationEntry MRE;
- MRE.Word0 = FixupOffset;
- MRE.Word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = FixupOffset;
+ MRE.r_word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
Writer->addRelocation(Fragment->getParent(), MRE);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index ed64a32..6da4142 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -27,7 +27,7 @@ namespace {
public:
X86WinCOFFObjectWriter(bool Is64Bit_);
- ~X86WinCOFFObjectWriter();
+ virtual ~X86WinCOFFObjectWriter();
virtual unsigned getRelocType(const MCValue &Target,
const MCFixup &Fixup,
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 461ea9b..65c5552 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -50,10 +50,10 @@ def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
"Enable SSSE3 instructions",
[FeatureSSE3]>;
-def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
+def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
"Enable SSE 4.1 instructions",
[FeatureSSSE3]>;
-def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
+def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
[FeatureSSE41]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
@@ -68,7 +68,7 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions",
[FeatureCMOV]>;
-def FeatureCMPXCHG16B : SubtargetFeature<"cmpxchg16b", "HasCmpxchg16b", "true",
+def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
"64-bit with cmpxchg16b",
[Feature64Bit]>;
def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
@@ -86,16 +86,16 @@ def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
"Enable AVX2 instructions",
[FeatureAVX]>;
-def FeatureAVX512 : SubtargetFeature<"avx-512", "X86SSELevel", "AVX512",
+def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
"Enable AVX-512 instructions",
[FeatureAVX2]>;
-def FeatureERI : SubtargetFeature<"avx-512-eri", "HasERI", "true",
+def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
"Enable AVX-512 Exponential and Reciprocal Instructions",
[FeatureAVX512]>;
-def FeatureCDI : SubtargetFeature<"avx-512-cdi", "HasCDI", "true",
+def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
"Enable AVX-512 Conflict Detection Instructions",
[FeatureAVX512]>;
-def FeaturePFI : SubtargetFeature<"avx-512-pfi", "HasPFI", "true",
+def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
@@ -117,12 +117,15 @@ def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES instructions",
[FeatureSSE2]>;
+def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
+ "Enable TBM instructions">;
def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
"Support MOVBE instruction">;
-def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true",
+def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
"Support RDRAND instruction">;
def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
- "Support 16-bit floating point conversion instructions">;
+ "Support 16-bit floating point conversion instructions",
+ [FeatureAVX]>;
def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
"Support FS/GS Base instructions">;
def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
@@ -137,6 +140,9 @@ def FeatureHLE : SubtargetFeature<"hle", "HasHLE", "true",
"Support HLE">;
def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
"Support ADX instructions">;
+def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
+ "Enable SHA instructions",
+ [FeatureSSE2]>;
def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
@@ -163,6 +169,8 @@ include "X86Schedule.td"
def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
"Intel Atom processors">;
+def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
+ "Intel Silvermont processors">;
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
@@ -206,6 +214,14 @@ def : ProcessorModel<"atom", AtomModel,
FeatureLEAUsesAG,
FeaturePadShortFunctions]>;
+// Atom Silvermont.
+def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
+ FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureMOVBE, FeaturePOPCNT,
+ FeaturePCLMUL, FeatureAES,
+ FeatureCallRegIndirect,
+ FeaturePRFCHW,
+ FeatureSlowBTMem]>;
// "Arrandale" along with corei3 and corei5
def : ProcessorModel<"corei7", SandyBridgeModel,
[FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
@@ -276,21 +292,30 @@ def : Proc<"amdfam10", [FeatureSSE4A,
FeaturePOPCNT, FeatureSlowBTMem]>;
// Bobcat
def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
- FeatureLZCNT, FeaturePOPCNT]>;
+ FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT]>;
// Jaguar
def : Proc<"btver2", [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
- FeatureAES, FeaturePCLMUL, FeatureBMI,
- FeatureF16C, FeatureMOVBE, FeatureLZCNT,
- FeaturePOPCNT]>;
+ FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
+ FeatureBMI, FeatureF16C, FeatureMOVBE,
+ FeatureLZCNT, FeaturePOPCNT]>;
// Bulldozer
def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePCLMUL,
+ FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureLZCNT, FeaturePOPCNT]>;
// Piledriver
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePCLMUL,
+ FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureF16C, FeatureLZCNT,
- FeaturePOPCNT, FeatureBMI, FeatureFMA]>;
+ FeaturePOPCNT, FeatureBMI, FeatureTBM,
+ FeatureFMA]>;
+
+// Steamroller
+def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
+ FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
+ FeatureF16C, FeatureLZCNT,
+ FeaturePOPCNT, FeatureBMI, FeatureTBM,
+ FeatureFMA, FeatureFSGSBase]>;
+
def : Proc<"geode", [Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 9e0ab82..1258441 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -96,7 +96,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
GVSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
else
- GVSym = Mang->getSymbol(GV);
+ GVSym = getSymbol(GV);
// Handle dllimport linkage.
if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
@@ -109,21 +109,21 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
}
// If the name begins with a dollar-sign, enclose it in parens. We do this
@@ -333,21 +333,21 @@ void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op,
const MachineOperand &IndexReg = MI->getOperand(Op+2);
const MachineOperand &DispSpec = MI->getOperand(Op+3);
const MachineOperand &SegReg = MI->getOperand(Op+4);
-
+
// If this has a segment register, print it.
if (SegReg.getReg()) {
printOperand(MI, Op+4, O, Modifier, AsmVariant);
O << ':';
}
-
+
O << '[';
-
+
bool NeedPlus = false;
if (BaseReg.getReg()) {
printOperand(MI, Op, O, Modifier, AsmVariant);
NeedPlus = true;
}
-
+
if (IndexReg.getReg()) {
if (NeedPlus) O << " + ";
if (ScaleVal != 1)
@@ -394,6 +394,7 @@ bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
Reg = getX86SubSuperRegister(Reg, MVT::i32);
break;
case 'q': // Print DImode register
+ // FIXME: gcc will actually print e instead of r for 32-bit.
Reg = getX86SubSuperRegister(Reg, MVT::i64);
break;
}
@@ -518,6 +519,27 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
if (Subtarget->isTargetEnvMacho())
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+
+ if (Subtarget->isTargetCOFF()) {
+ // Emit an absolute @feat.00 symbol. This appears to be some kind of
+ // compiler features bitfield read by link.exe.
+ if (!Subtarget->is64Bit()) {
+ MCSymbol *S = MMI->getContext().GetOrCreateSymbol(StringRef("@feat.00"));
+ OutStreamer.BeginCOFFSymbolDef(S);
+ OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+ OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
+ OutStreamer.EndCOFFSymbolDef();
+ // According to the PE-COFF spec, the LSB of this value marks the object
+ // for "registered SEH". This means that all SEH handler entry points
+ // must be registered in .sxdata. Use of any unregistered handlers will
+ // cause the process to terminate immediately. LLVM does not know how to
+ // register any SEH handlers, so its object files should be safe.
+ S->setAbsolute();
+ OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
+ OutStreamer.EmitAssignment(
+ S, MCConstantExpr::Create(int64_t(1), MMI->getContext()));
+ }
+ }
}
@@ -606,6 +628,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.AddBlankLine();
}
+ SM.serializeToStackMapSection();
+
// Funny Darwin hack: This flag tells the linker that no global symbols
// contain code that falls through to other global symbols (e.g. the obvious
// implementation of multiple entry points). If this doesn't occur, the
@@ -645,12 +669,12 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
if (I->hasDLLExportLinkage())
- DLLExportedFns.push_back(Mang->getSymbol(I));
+ DLLExportedFns.push_back(getSymbol(I));
for (Module::const_global_iterator I = M.global_begin(),
E = M.global_end(); I != E; ++I)
if (I->hasDLLExportLinkage())
- DLLExportedGlobals.push_back(Mang->getSymbol(I));
+ DLLExportedGlobals.push_back(getSymbol(I));
// Output linker support code for dllexported globals on windows.
if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 6eed5ce..24a768b 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
@@ -24,9 +25,21 @@ class MCStreamer;
class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget;
+ StackMaps SM;
+
+ // Parses operands of PATCHPOINT and STACKMAP to produce stack map Location
+ // structures. Returns a result location and an iterator to the operand
+ // immediately following the operands consumed.
+ //
+ // This method is implemented in X86MCInstLower.cpp.
+ static std::pair<StackMaps::Location, MachineInstr::const_mop_iterator>
+ stackmapOperandParser(MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE,
+ const TargetMachine &TM);
+
public:
explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {
+ : AsmPrinter(TM, Streamer), SM(*this, stackmapOperandParser) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
}
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
new file mode 100644
index 0000000..e76f9fd
--- /dev/null
+++ b/lib/Target/X86/X86CallingConv.h
@@ -0,0 +1,35 @@
+//=== X86CallingConv.h - X86 Custom Calling Convention Routines -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the X86 Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86CALLINGCONV_H
+#define X86CALLINGCONV_H
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
+ CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+ CCState &) {
+ llvm_unreachable("The AnyReg calling convention is only supported by the " \
+ "stackmap and patchpoint intrinsics.");
+ // gracefully fallback to X86 C calling convention on Release builds.
+ return false;
+}
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 38e2591..a78b5c0 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -151,6 +151,26 @@ def RetCC_X86_64_HiPE : CallingConv<[
CCIfType<[i64], CCAssignToReg<[R15, RBP, RAX, RDX]>>
]>;
+// X86-64 WebKit_JS return-value convention.
+def RetCC_X86_64_WebKit_JS : CallingConv<[
+ // Promote all types to i64
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Return: RAX
+ CCIfType<[i64], CCAssignToReg<[RAX]>>
+]>;
+
+// X86-64 AnyReg return-value convention. No explicit register is specified for
+// the return-value. The register allocator is allowed and expected to choose
+// any free register.
+//
+// This calling convention is currently only supported by the stackmap and
+// patchpoint intrinsics. All other uses will result in an assert on Debug
+// builds. On Release builds we fallback to the X86 C calling convention.
+def RetCC_X86_64_AnyReg : CallingConv<[
+ CCCustom<"CC_X86_AnyReg_Error">
+]>;
+
// This is the root return-value convention for the X86-32 backend.
def RetCC_X86_32 : CallingConv<[
// If FastCC, use RetCC_X86_32_Fast.
@@ -167,6 +187,10 @@ def RetCC_X86_64 : CallingConv<[
// HiPE uses RetCC_X86_64_HiPE
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_64_HiPE>>,
+ // Handle JavaScript calls.
+ CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo<RetCC_X86_64_WebKit_JS>>,
+ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_X86_64_AnyReg>>,
+
// Handle explicit CC selection
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<RetCC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<RetCC_X86_64_C>>,
@@ -279,10 +303,10 @@ def CC_X86_Win64_C : CallingConv<[
// The first 4 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
[XMM0, XMM1, XMM2, XMM3]>>,
-
+
// Do not pass the sret argument in RCX, the Win64 thiscall calling
- // convention requires "this" to be passed in RCX.
- CCIfCC<"CallingConv::X86_ThisCall",
+ // convention requires "this" to be passed in RCX.
+ CCIfCC<"CallingConv::X86_ThisCall",
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[RDX , R8 , R9 ],
[XMM1, XMM2, XMM3]>>>>,
@@ -329,6 +353,25 @@ def CC_X86_64_HiPE : CallingConv<[
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
]>;
+def CC_X86_64_WebKit_JS : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Integer/FP values are always stored in stack slots that are 8 bytes in size
+ // and 8-byte aligned.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
+
+// No explicit register is specified for the AnyReg calling convention. The
+// register allocator may assign the arguments to any free register.
+//
+// This calling convention is currently only supported by the stackmap and
+// patchpoint intrinsics. All other uses will result in an assert on Debug
+// builds. On Release builds we fallback to the X86 C calling convention.
+def CC_X86_64_AnyReg : CallingConv<[
+ CCCustom<"CC_X86_AnyReg_Error">
+]>;
+
//===----------------------------------------------------------------------===//
// X86 C Calling Convention
//===----------------------------------------------------------------------===//
@@ -354,7 +397,7 @@ def CC_X86_32_Common : CallingConv<[
// Integer/Float values get stored in stack slots that are 4 bytes in
// size and 4-byte aligned.
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
-
+
// Doubles get 8-byte slots that are 4-byte aligned.
CCIfType<[f64], CCAssignToStack<8, 4>>,
@@ -520,6 +563,8 @@ def CC_X86_32 : CallingConv<[
def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_64_HiPE>>,
+ CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo<CC_X86_64_WebKit_JS>>,
+ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_X86_64_AnyReg>>,
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<CC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
@@ -558,11 +603,11 @@ def CSR_MostRegs_64 : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10,
// Standard C + YMM6-15
def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,
- R13, R14, R15,
+ R13, R14, R15,
(sequence "YMM%u", 6, 15))>;
def CSR_Win64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI,
- R12, R13, R14, R15,
+ R12, R13, R14, R15,
(sequence "ZMM%u", 6, 21),
K4, K5, K6, K7)>;
//Standard C + XMM 8-15
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 5d72b44..14385ed 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -840,7 +840,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
unsigned char VEX_W = 0;
// XOP: Use XOP prefix byte 0x8f instead of VEX.
- unsigned char XOP = 0;
+ bool XOP = false;
// VEX_5M (VEX m-mmmmm field):
//
@@ -850,7 +850,8 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
// 0b00011: implied 0F 3A leading opcode bytes
// 0b00100-0b11111: Reserved for future use
// 0b01000: XOP map select - 08h instructions with imm byte
- // 0b10001: XOP map select - 09h instructions with no imm byte
+ // 0b01001: XOP map select - 09h instructions with no imm byte
+ // 0b01010: XOP map select - 0Ah instructions with imm dword
unsigned char VEX_5M = 0x1;
// VEX_4V (VEX vvvv field): a register specifier
@@ -882,7 +883,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
VEX_W = 1;
if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
- XOP = 1;
+ XOP = true;
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
VEX_L = 1;
@@ -919,11 +920,11 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
case X86II::XOP9:
VEX_5M = 0x9;
break;
- case X86II::A6: // Bypass: Not used by VEX
- case X86II::A7: // Bypass: Not used by VEX
- case X86II::TB: // Bypass: Not used by VEX
- case 0:
- break; // No prefix!
+ case X86II::XOPA:
+ VEX_5M = 0xA;
+ break;
+ case X86II::TB: // VEX_5M/VEX_PP already correct
+ break;
}
@@ -982,11 +983,14 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
// FMA4:
// dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
// dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
+ CurOp++;
- if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, 1);
+ if (HasVEX_4V) {
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ CurOp++;
+ }
if (X86II::isX86_64ExtendedReg(
MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
@@ -996,7 +1000,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
VEX_X = 0x0;
if (HasVEX_4VOp3)
- VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1);
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands);
break;
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
@@ -1006,7 +1010,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
// MemAddr
// src1(VEX_4V), MemAddr
if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, 0);
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
if (X86II::isX86_64ExtendedReg(
MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
@@ -1059,8 +1063,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
case X86II::MRM6r: case X86II::MRM7r:
// MRM0r-MRM7r instructions forms:
// dst(VEX_4V), src(ModR/M), imm8
- VEX_4V = getVEXRegisterEncoding(MI, 0);
- if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ CurOp++;
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
break;
default: // RawFrm
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 5bc3420..97f96ab 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "X86.h"
+#include "X86CallingConv.h"
#include "X86ISelLowering.h"
#include "X86InstrBuilder.h"
#include "X86RegisterInfo.h"
@@ -125,6 +126,8 @@ private:
return static_cast<const X86TargetMachine *>(&TM);
}
+ bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
+
unsigned TargetMaterializeConstant(const Constant *C);
unsigned TargetMaterializeAlloca(const AllocaInst *C);
@@ -344,9 +347,126 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
return true;
}
+bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
+ // Handle constant address.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // Can't handle TLS yet.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
+ // it works...).
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ if (const GlobalVariable *GVar =
+ dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // RIP-relative addresses can't have additional register operands, so if
+ // we've already folded stuff into the addressing mode, just force the
+ // global value into its own register, which we can use as the basereg.
+ if (!Subtarget->isPICStyleRIPRel() ||
+ (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
+ // Okay, we've committed to selecting this global. Set up the address.
+ AM.GV = GV;
+
+ // Allow the subtarget to classify the global.
+ unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+ // If this reference is relative to the pic base, set it now.
+ if (isGlobalRelativeToPICBase(GVFlags)) {
+ // FIXME: How do we know Base.Reg is free??
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ }
+
+ // Unless the ABI requires an extra load, return a direct reference to
+ // the global.
+ if (!isGlobalStubReference(GVFlags)) {
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ }
+ AM.GVOpFlags = GVFlags;
+ return true;
+ }
+
+ // Ok, we need to do a load from a stub. If we've already loaded from
+ // this stub, reuse the loaded pointer, otherwise emit the load now.
+ DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+ unsigned LoadReg;
+ if (I != LocalValueMap.end() && I->second != 0) {
+ LoadReg = I->second;
+ } else {
+ // Issue load from stub.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = GV;
+ StubAM.GVOpFlags = GVFlags;
+
+ // Prepare for inserting code in the local-value area.
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ if (TLI.getPointerTy() == MVT::i64) {
+ Opc = X86::MOV64rm;
+ RC = &X86::GR64RegClass;
+
+ if (Subtarget->isPICStyleRIPRel())
+ StubAM.Base.Reg = X86::RIP;
+ } else {
+ Opc = X86::MOV32rm;
+ RC = &X86::GR32RegClass;
+ }
+
+ LoadReg = createResultReg(RC);
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
+
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
+
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = LoadReg;
+ }
+
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = LoadReg;
+ AM.GV = 0;
+ return true;
+ }
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
/// X86SelectAddress - Attempt to fill in an address from the given value.
///
bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
+ SmallVector<const Value *, 32> GEPs;
+redo_gep:
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(V)) {
@@ -441,13 +561,8 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
Disp += CI->getSExtValue() * S;
break;
}
- if (isa<AddOperator>(Op) &&
- (!isa<Instruction>(Op) ||
- FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
- == FuncInfo.MBB) &&
- isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
- // An add (in the same block) with a constant operand. Fold the
- // constant.
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
Disp += CI->getSExtValue() * S;
@@ -469,139 +584,43 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
goto unsupported_gep;
}
}
+
// Check for displacement overflow.
if (!isInt<32>(Disp))
break;
- // Ok, the GEP indices were covered by constant-offset and scaled-index
- // addressing. Update the address state and move on to examining the base.
+
AM.IndexReg = IndexReg;
AM.Scale = Scale;
AM.Disp = (uint32_t)Disp;
- if (X86SelectAddress(U->getOperand(0), AM))
+ GEPs.push_back(V);
+
+ if (const GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
+ // Ok, the GEP indices were covered by constant-offset and scaled-index
+ // addressing. Update the address state and move on to examining the base.
+ V = GEP;
+ goto redo_gep;
+ } else if (X86SelectAddress(U->getOperand(0), AM)) {
return true;
+ }
// If we couldn't merge the gep value into this addr mode, revert back to
// our address and just match the value instead of completely failing.
AM = SavedAM;
- break;
- unsupported_gep:
- // Ok, the GEP indices weren't all covered.
- break;
- }
- }
-
- // Handle constant address.
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- // Can't handle alternate code models yet.
- if (TM.getCodeModel() != CodeModel::Small)
- return false;
- // Can't handle TLS yet.
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- if (GVar->isThreadLocal())
- return false;
-
- // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
- // it works...).
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- if (const GlobalVariable *GVar =
- dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
- if (GVar->isThreadLocal())
- return false;
-
- // RIP-relative addresses can't have additional register operands, so if
- // we've already folded stuff into the addressing mode, just force the
- // global value into its own register, which we can use as the basereg.
- if (!Subtarget->isPICStyleRIPRel() ||
- (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
- // Okay, we've committed to selecting this global. Set up the address.
- AM.GV = GV;
-
- // Allow the subtarget to classify the global.
- unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
-
- // If this reference is relative to the pic base, set it now.
- if (isGlobalRelativeToPICBase(GVFlags)) {
- // FIXME: How do we know Base.Reg is free??
- AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
- }
-
- // Unless the ABI requires an extra load, return a direct reference to
- // the global.
- if (!isGlobalStubReference(GVFlags)) {
- if (Subtarget->isPICStyleRIPRel()) {
- // Use rip-relative addressing if we can. Above we verified that the
- // base and index registers are unused.
- assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
- AM.Base.Reg = X86::RIP;
- }
- AM.GVOpFlags = GVFlags;
+ for (SmallVectorImpl<const Value *>::reverse_iterator
+ I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I)
+ if (handleConstantAddresses(*I, AM))
return true;
- }
-
- // Ok, we need to do a load from a stub. If we've already loaded from
- // this stub, reuse the loaded pointer, otherwise emit the load now.
- DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
- unsigned LoadReg;
- if (I != LocalValueMap.end() && I->second != 0) {
- LoadReg = I->second;
- } else {
- // Issue load from stub.
- unsigned Opc = 0;
- const TargetRegisterClass *RC = NULL;
- X86AddressMode StubAM;
- StubAM.Base.Reg = AM.Base.Reg;
- StubAM.GV = GV;
- StubAM.GVOpFlags = GVFlags;
-
- // Prepare for inserting code in the local-value area.
- SavePoint SaveInsertPt = enterLocalValueArea();
-
- if (TLI.getPointerTy() == MVT::i64) {
- Opc = X86::MOV64rm;
- RC = &X86::GR64RegClass;
- if (Subtarget->isPICStyleRIPRel())
- StubAM.Base.Reg = X86::RIP;
- } else {
- Opc = X86::MOV32rm;
- RC = &X86::GR32RegClass;
- }
-
- LoadReg = createResultReg(RC);
- MachineInstrBuilder LoadMI =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
- addFullAddress(LoadMI, StubAM);
-
- // Ok, back to normal mode.
- leaveLocalValueArea(SaveInsertPt);
-
- // Prevent loading GV stub multiple times in same MBB.
- LocalValueMap[V] = LoadReg;
- }
-
- // Now construct the final address. Note that the Disp, Scale,
- // and Index values may already be set here.
- AM.Base.Reg = LoadReg;
- AM.GV = 0;
- return true;
- }
+ return false;
+ unsupported_gep:
+ // Ok, the GEP indices weren't all covered.
+ break;
}
-
- // If all else fails, try to materialize the value in a register.
- if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
- if (AM.Base.Reg == 0) {
- AM.Base.Reg = getRegForValue(V);
- return AM.Base.Reg != 0;
- }
- if (AM.IndexReg == 0) {
- assert(AM.Scale == 1 && "Scale with no index!");
- AM.IndexReg = getRegForValue(V);
- return AM.IndexReg != 0;
- }
}
- return false;
+ return handleConstantAddresses(V, AM);
}
/// X86SelectCallAddress - Attempt to fill in an address from the given value.
@@ -609,9 +628,35 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
- if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ const Instruction *I = dyn_cast<Instruction>(V);
+ // Record if the value is defined in the same basic block.
+ //
+ // This information is crucial to know whether or not folding an
+ // operand is valid.
+ // Indeed, FastISel generates or reuses a virtual register for all
+ // operands of all instructions it selects. Obviously, the definition and
+ // its uses must use the same virtual register otherwise the produced
+ // code is incorrect.
+ // Before instruction selection, FunctionLoweringInfo::set sets the virtual
+ // registers for values that are alive across basic blocks. This ensures
+ // that the values are consistently set between across basic block, even
+ // if different instruction selection mechanisms are used (e.g., a mix of
+ // SDISel and FastISel).
+ // For values local to a basic block, the instruction selection process
+ // generates these virtual registers with whatever method is appropriate
+ // for its needs. In particular, FastISel and SDISel do not share the way
+ // local virtual registers are set.
+ // Therefore, this is impossible (or at least unsafe) to share values
+ // between basic blocks unless they use the same instruction selection
+ // method, which is not guarantee for X86.
+ // Moreover, things like hasOneUse could not be used accurately, if we
+ // allow to reference values across basic blocks whereas they are not
+ // alive across basic blocks initially.
+ bool InMBB = true;
+ if (I) {
Opcode = I->getOpcode();
U = I;
+ InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
Opcode = C->getOpcode();
U = C;
@@ -620,18 +665,22 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
switch (Opcode) {
default: break;
case Instruction::BitCast:
- // Look past bitcasts.
- return X86SelectCallAddress(U->getOperand(0), AM);
+ // Look past bitcasts if its operand is in the same BB.
+ if (InMBB)
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
case Instruction::IntToPtr:
- // Look past no-op inttoptrs.
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ // Look past no-op inttoptrs if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
return X86SelectCallAddress(U->getOperand(0), AM);
break;
case Instruction::PtrToInt:
- // Look past no-op ptrtoints.
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ // Look past no-op ptrtoints if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getType()) == TLI.getPointerTy())
return X86SelectCallAddress(U->getOperand(0), AM);
break;
}
@@ -1026,7 +1075,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
return false;
// Handle zero-extension from i1 to i8, which is common.
- MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()).getSimpleVT();
+ MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
if (SrcVT.SimpleTy == MVT::i1) {
// Set the high bits to zero.
ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index d21cb8a..38a8351 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -125,6 +125,15 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
// which requires isImm() to be true
return 0;
}
+ break;
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB:
+ if (MI->getOperand(1).getReg() != MI->getOperand(2).getReg()) {
+ // if src1 != src2, then convertToThreeAddress will
+ // need to create a Virtual register, which we cannot do
+ // after register allocation.
+ return 0;
+ }
}
return TII->convertToThreeAddress(MFI, MBBI, 0);
}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index b994e67..a06ba9d 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -365,270 +365,13 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
}
}
-/// getCompactUnwindRegNum - Get the compact unwind number for a given
-/// register. The number corresponds to the enum lists in
-/// compact_unwind_encoding.h.
-static int getCompactUnwindRegNum(unsigned Reg, bool is64Bit) {
- static const uint16_t CU32BitRegs[] = {
- X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
- };
- static const uint16_t CU64BitRegs[] = {
- X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
- };
- const uint16_t *CURegs = is64Bit ? CU64BitRegs : CU32BitRegs;
- for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
- if (*CURegs == Reg)
- return Idx;
-
- return -1;
-}
-
-// Number of registers that can be saved in a compact unwind encoding.
-#define CU_NUM_SAVED_REGS 6
-
-/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding
-/// used with frameless stacks. It is passed the number of registers to be saved
-/// and an array of the registers saved.
-static uint32_t
-encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
- unsigned RegCount, bool Is64Bit) {
- // The saved registers are numbered from 1 to 6. In order to encode the order
- // in which they were saved, we re-number them according to their place in the
- // register order. The re-numbering is relative to the last re-numbered
- // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
- //
- // Orig Re-Num
- // ---- ------
- // 6 6
- // 2 2
- // 4 3
- // 5 3
- //
- for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
- int CUReg = getCompactUnwindRegNum(SavedRegs[i], Is64Bit);
- if (CUReg == -1) return ~0U;
- SavedRegs[i] = CUReg;
- }
-
- // Reverse the list.
- std::swap(SavedRegs[0], SavedRegs[5]);
- std::swap(SavedRegs[1], SavedRegs[4]);
- std::swap(SavedRegs[2], SavedRegs[3]);
-
- uint32_t RenumRegs[CU_NUM_SAVED_REGS];
- for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i) {
- unsigned Countless = 0;
- for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
- if (SavedRegs[j] < SavedRegs[i])
- ++Countless;
-
- RenumRegs[i] = SavedRegs[i] - Countless - 1;
- }
-
- // Take the renumbered values and encode them into a 10-bit number.
- uint32_t permutationEncoding = 0;
- switch (RegCount) {
- case 6:
- permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
- + 6 * RenumRegs[2] + 2 * RenumRegs[3]
- + RenumRegs[4];
- break;
- case 5:
- permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
- + 6 * RenumRegs[3] + 2 * RenumRegs[4]
- + RenumRegs[5];
- break;
- case 4:
- permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
- + 3 * RenumRegs[4] + RenumRegs[5];
- break;
- case 3:
- permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
- + RenumRegs[5];
- break;
- case 2:
- permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
- break;
- case 1:
- permutationEncoding |= RenumRegs[5];
- break;
- }
-
- assert((permutationEncoding & 0x3FF) == permutationEncoding &&
- "Invalid compact register encoding!");
- return permutationEncoding;
-}
-
-/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a
-/// compact encoding with a frame pointer.
-static uint32_t
-encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
- bool Is64Bit) {
- // Encode the registers in the order they were saved, 3-bits per register. The
- // registers are numbered from 1 to CU_NUM_SAVED_REGS.
- uint32_t RegEnc = 0;
- for (int I = CU_NUM_SAVED_REGS - 1, Idx = 0; I != -1; --I) {
- unsigned Reg = SavedRegs[I];
- if (Reg == 0) continue;
-
- int CURegNum = getCompactUnwindRegNum(Reg, Is64Bit);
- if (CURegNum == -1) return ~0U;
-
- // Encode the 3-bit register number in order, skipping over 3-bits for each
- // register.
- RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
- }
-
- assert((RegEnc & 0x3FFFF) == RegEnc && "Invalid compact register encoding!");
- return RegEnc;
-}
-
-uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
- const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
- unsigned StackPtr = RegInfo->getStackRegister();
-
- bool Is64Bit = STI.is64Bit();
- bool HasFP = hasFP(MF);
-
- unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 };
- unsigned SavedRegIdx = 0;
-
- unsigned OffsetSize = (Is64Bit ? 8 : 4);
-
- unsigned PushInstr = (Is64Bit ? X86::PUSH64r : X86::PUSH32r);
- unsigned PushInstrSize = 1;
- unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
- unsigned MoveInstrSize = (Is64Bit ? 3 : 2);
- unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2);
-
- unsigned StackDivide = (Is64Bit ? 8 : 4);
-
- unsigned InstrOffset = 0;
- unsigned StackAdjust = 0;
- unsigned StackSize = 0;
-
- MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB.
- bool ExpectEnd = false;
- for (MachineBasicBlock::iterator
- MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) {
- MachineInstr &MI = *MBBI;
- unsigned Opc = MI.getOpcode();
- if (Opc == X86::PROLOG_LABEL) continue;
- if (!MI.getFlag(MachineInstr::FrameSetup)) break;
-
- // We don't exect any more prolog instructions.
- if (ExpectEnd) return CU::UNWIND_MODE_DWARF;
-
- if (Opc == PushInstr) {
- // If there are too many saved registers, we cannot use compact encoding.
- if (SavedRegIdx >= CU_NUM_SAVED_REGS) return CU::UNWIND_MODE_DWARF;
-
- unsigned Reg = MI.getOperand(0).getReg();
- if (Reg == (Is64Bit ? X86::RAX : X86::EAX)) {
- ExpectEnd = true;
- continue;
- }
-
- SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg();
- StackAdjust += OffsetSize;
- InstrOffset += PushInstrSize;
- } else if (Opc == MoveInstr) {
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
-
- if (DstReg != FramePtr || SrcReg != StackPtr)
- return CU::UNWIND_MODE_DWARF;
-
- StackAdjust = 0;
- memset(SavedRegs, 0, sizeof(SavedRegs));
- SavedRegIdx = 0;
- InstrOffset += MoveInstrSize;
- } else if (Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) {
- if (StackSize)
- // We already have a stack size.
- return CU::UNWIND_MODE_DWARF;
-
- if (!MI.getOperand(0).isReg() ||
- MI.getOperand(0).getReg() != MI.getOperand(1).getReg() ||
- MI.getOperand(0).getReg() != StackPtr || !MI.getOperand(2).isImm())
- // We need this to be a stack adjustment pointer. Something like:
- //
- // %RSP<def> = SUB64ri8 %RSP, 48
- return CU::UNWIND_MODE_DWARF;
-
- StackSize = MI.getOperand(2).getImm() / StackDivide;
- SubtractInstrIdx += InstrOffset;
- ExpectEnd = true;
- }
- }
-
- // Encode that we are using EBP/RBP as the frame pointer.
- uint32_t CompactUnwindEncoding = 0;
- StackAdjust /= StackDivide;
- if (HasFP) {
- if ((StackAdjust & 0xFF) != StackAdjust)
- // Offset was too big for compact encoding.
- return CU::UNWIND_MODE_DWARF;
-
- // Get the encoding of the saved registers when we have a frame pointer.
- uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
- if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
-
- CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
- CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
- CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
- } else {
- ++StackAdjust;
- uint32_t TotalStackSize = StackAdjust + StackSize;
- if ((TotalStackSize & 0xFF) == TotalStackSize) {
- // Frameless stack with a small stack size.
- CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
-
- // Encode the stack size.
- CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16;
- } else {
- if ((StackAdjust & 0x7) != StackAdjust)
- // The extra stack adjustments are too big for us to handle.
- return CU::UNWIND_MODE_DWARF;
-
- // Frameless stack with an offset too large for us to encode compactly.
- CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
-
- // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
- // instruction.
- CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
-
- // Encode any extra stack stack adjustments (done via push instructions).
- CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
- }
-
- // Encode the number of registers saved.
- CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
-
- // Get the encoding of the saved registers when we don't have a frame
- // pointer.
- uint32_t RegEnc =
- encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx,
- Is64Bit);
- if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
-
- // Encode the register encoding.
- CompactUnwindEncoding |=
- RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
- }
-
- return CompactUnwindEncoding;
-}
-
/// usesTheStack - This function checks if any of the users of EFLAGS
/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
/// to use the stack, and if we don't adjust the stack we clobber the first
/// frame index.
/// See X86InstrInfo::copyPhysReg.
-static bool usesTheStack(MachineFunction &MF) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
+static bool usesTheStack(const MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS),
re = MRI.reg_end(); ri != re; ++ri)
@@ -863,7 +606,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// responsible for adjusting the stack pointer. Touching the stack at 4K
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
- if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) {
+ if (NumBytes >= 4096 && STI.isOSWindows() && !STI.isTargetEnvMacho()) {
const char *StackProbeSymbol;
bool isSPUpdateNeeded = false;
@@ -964,11 +707,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (PushedRegs)
emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
}
-
- // Darwin 10.7 and greater has support for compact unwind encoding.
- if (STI.getTargetTriple().isMacOSX() &&
- !STI.getTargetTriple().isMacOSXVersionLT(10, 7))
- MMI.setCompactUnwindEncoding(getCompactUnwindEncoding(MF));
}
void X86FrameLowering::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 6e309d8..3d3b011 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -20,32 +20,6 @@
namespace llvm {
-namespace CU {
-
- /// Compact unwind encoding values.
- enum CompactUnwindEncodings {
- /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
- /// the return address, then [RE]SP is moved to [RE]BP.
- UNWIND_MODE_BP_FRAME = 0x01000000,
-
- /// A frameless function with a small constant stack size.
- UNWIND_MODE_STACK_IMMD = 0x02000000,
-
- /// A frameless function with a large constant stack size.
- UNWIND_MODE_STACK_IND = 0x03000000,
-
- /// No compact unwind encoding is available.
- UNWIND_MODE_DWARF = 0x04000000,
-
- /// Mask for encoding the frame registers.
- UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
-
- /// Mask for encoding the frameless registers.
- UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
- };
-
-} // end CU namespace
-
class MCSymbol;
class X86TargetMachine;
@@ -91,7 +65,6 @@ public:
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const;
- uint32_t getCompactUnwindEncoding(MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9465420..36d1690 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -79,7 +79,8 @@ namespace {
}
bool hasBaseOrIndexReg() const {
- return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
+ return BaseType == FrameIndexBase ||
+ IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
}
/// isRIPRelative - Return true if this addressing mode is already RIP
@@ -183,7 +184,7 @@ namespace {
SDNode *Select(SDNode *N);
SDNode *SelectGather(SDNode *N, unsigned Opc);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
- SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
+ SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
@@ -491,8 +492,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
- EVT SrcVT = N->getOperand(0).getValueType();
- EVT DstVT = N->getValueType(0);
+ MVT SrcVT = N->getOperand(0).getSimpleValueType();
+ MVT DstVT = N->getSimpleValueType(0);
// If any of the sources are vectors, no fp stack involved.
if (SrcVT.isVector() || DstVT.isVector())
@@ -519,7 +520,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
// FPStack has extload and truncstore. SSE can fold direct loads into other
// operations. Based on this, decide what we want to do.
- EVT MemVT;
+ MVT MemVT;
if (N->getOpcode() == ISD::FP_ROUND)
MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
else
@@ -783,7 +784,7 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
Mask != (0xffu << ScaleLog))
return true;
- EVT VT = N.getValueType();
+ MVT VT = N.getSimpleValueType();
SDLoc DL(N);
SDValue Eight = DAG.getConstant(8, MVT::i8);
SDValue NewMask = DAG.getConstant(0xff, VT);
@@ -831,7 +832,7 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
return true;
- EVT VT = N.getValueType();
+ MVT VT = N.getSimpleValueType();
SDLoc DL(N);
SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
@@ -904,7 +905,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
// Scale the leading zero count down based on the actual size of the value.
// Also scale it down based on the size of the shift.
- MaskLZ -= (64 - X.getValueSizeInBits()) + ShiftAmt;
+ MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
// The final check is to ensure that any masked out high bits of X are
// already known to be zero. Otherwise, the mask has a semantic impact
@@ -914,23 +915,23 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
// replace them with zero extensions cheaply if necessary.
bool ReplacingAnyExtend = false;
if (X.getOpcode() == ISD::ANY_EXTEND) {
- unsigned ExtendBits =
- X.getValueSizeInBits() - X.getOperand(0).getValueSizeInBits();
+ unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
+ X.getOperand(0).getSimpleValueType().getSizeInBits();
// Assume that we'll replace the any-extend with a zero-extend, and
// narrow the search to the extended value.
X = X.getOperand(0);
MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
ReplacingAnyExtend = true;
}
- APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(),
- MaskLZ);
+ APInt MaskedHighBits =
+ APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
APInt KnownZero, KnownOne;
DAG.ComputeMaskedBits(X, KnownZero, KnownOne);
if (MaskedHighBits != KnownZero) return true;
// We've identified a pattern that can be transformed into a single shift
// and an addressing mode. Make it so.
- EVT VT = N.getValueType();
+ MVT VT = N.getSimpleValueType();
if (ReplacingAnyExtend) {
assert(X.getValueType() != VT);
// We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
@@ -1059,7 +1060,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// We only handle up to 64-bit values here as those are what matter for
// addressing mode optimizations.
- if (X.getValueSizeInBits() > 64) break;
+ if (X.getSimpleValueType().getSizeInBits() > 64) break;
// The mask used for the transform is expected to be post-shift, but we
// found the shift first so just apply the shift to the mask before passing
@@ -1244,7 +1245,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// We only handle up to 64-bit values here as those are what matter for
// addressing mode optimizations.
- if (X.getValueSizeInBits() > 64) break;
+ if (X.getSimpleValueType().getSizeInBits() > 64) break;
if (!isa<ConstantSDNode>(N.getOperand(1)))
break;
@@ -1323,7 +1324,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
if (MatchAddress(N, AM))
return false;
- EVT VT = N.getValueType();
+ MVT VT = N.getSimpleValueType();
if (AM.BaseType == X86ISelAddressMode::RegBase) {
if (!AM.Base_Reg.getNode())
AM.Base_Reg = CurDAG->getRegister(0, VT);
@@ -1465,7 +1466,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
assert (T == AM.Segment);
AM.Segment = Copy;
- EVT VT = N.getValueType();
+ MVT VT = N.getSimpleValueType();
unsigned Complexity = 0;
if (AM.BaseType == X86ISelAddressMode::RegBase)
if (AM.Base_Reg.getNode())
@@ -1706,7 +1707,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
// + non-empty, otherwise.
static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
SDLoc dl,
- enum AtomicOpc &Op, EVT NVT,
+ enum AtomicOpc &Op, MVT NVT,
SDValue Val) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
int64_t CNVal = CN->getSExtValue();
@@ -1753,7 +1754,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
return Val;
}
-SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
+SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
if (Node->hasAnyUseOfValue(0))
return 0;
@@ -1793,7 +1794,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
unsigned Opc = 0;
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: return 0;
case MVT::i8:
if (isCN)
@@ -2047,7 +2048,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
}
SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
- EVT NVT = Node->getValueType(0);
+ MVT NVT = Node->getSimpleValueType(0);
unsigned Opc, MOpc;
unsigned Opcode = Node->getOpcode();
SDLoc dl(Node);
@@ -2056,6 +2057,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (Node->isMachineOpcode()) {
DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
+ Node->setNodeId(-1);
return NULL; // Already selected.
}
@@ -2187,7 +2189,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
break;
unsigned ShlOp, Op;
- EVT CstVT = NVT;
+ MVT CstVT = NVT;
// Check the minimum bitwidth for the new constant.
// TODO: AND32ri is the same as AND64ri32 with zext imm.
@@ -2202,7 +2204,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (NVT == CstVT)
break;
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i32:
assert(CstVT == MVT::i8);
@@ -2239,7 +2241,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N1 = Node->getOperand(1);
unsigned LoReg;
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break;
case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
@@ -2268,7 +2270,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
bool isSigned = Opcode == ISD::SMUL_LOHI;
bool hasBMI2 = Subtarget->hasBMI2();
if (!isSigned) {
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
@@ -2278,7 +2280,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
}
} else {
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
@@ -2415,7 +2417,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
bool isSigned = Opcode == ISD::SDIVREM;
if (!isSigned) {
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
@@ -2423,7 +2425,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
}
} else {
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
@@ -2434,7 +2436,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
unsigned LoReg, HiReg, ClrReg;
unsigned SExtOpcode;
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8:
LoReg = X86::AL; ClrReg = HiReg = X86::AH;
@@ -2489,7 +2491,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
} else {
// Zero out the high part, effectively zero extending the input.
SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
case MVT::i16:
ClrNode =
SDValue(CurDAG->getMachineNode(
@@ -2609,7 +2611,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// On x86-32, only the ABCD registers have 8-bit subregisters.
if (!Subtarget->is64Bit()) {
const TargetRegisterClass *TRC;
- switch (N0.getValueType().getSimpleVT().SimpleTy) {
+ switch (N0.getSimpleValueType().SimpleTy) {
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
default: llvm_unreachable("Unsupported TEST operand type!");
@@ -2644,7 +2646,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// Put the value in an ABCD register.
const TargetRegisterClass *TRC;
- switch (N0.getValueType().getSimpleVT().SimpleTy) {
+ switch (N0.getSimpleValueType().SimpleTy) {
case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9ffe29f..081c558 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16,6 +16,7 @@
#include "X86ISelLowering.h"
#include "Utils/X86ShuffleDecode.h"
#include "X86.h"
+#include "X86CallingConv.h"
#include "X86InstrBuilder.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
@@ -91,7 +92,7 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
VecIdx);
return Result;
-
+
}
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
@@ -631,7 +632,7 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ if (Subtarget->isOSWindows() && !Subtarget->isTargetEnvMacho())
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
else if (TM.Options.EnableSegmentedStacks)
@@ -1150,9 +1151,6 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
-
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
@@ -1160,7 +1158,6 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
@@ -1193,10 +1190,16 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
@@ -1330,7 +1333,16 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
setOperationAction(ISD::SDIV, MVT::v16i32, Custom);
-
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
+ }
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
@@ -1390,6 +1402,9 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::AND, MVT::v8i64, Legal);
setOperationAction(ISD::OR, MVT::v8i64, Legal);
setOperationAction(ISD::XOR, MVT::v8i64, Legal);
+ setOperationAction(ISD::AND, MVT::v16i32, Legal);
+ setOperationAction(ISD::OR, MVT::v16i32, Legal);
+ setOperationAction(ISD::XOR, MVT::v16i32, Legal);
// Custom lower several nodes.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
@@ -1409,14 +1424,6 @@ void X86TargetLowering::resetOperationActions() {
if (!VT.is512BitVector())
continue;
- if (VT != MVT::v8i64) {
- setOperationAction(ISD::XOR, VT, Promote);
- AddPromotedToType (ISD::XOR, VT, MVT::v8i64);
- setOperationAction(ISD::OR, VT, Promote);
- AddPromotedToType (ISD::OR, VT, MVT::v8i64);
- setOperationAction(ISD::AND, VT, Promote);
- AddPromotedToType (ISD::AND, VT, MVT::v8i64);
- }
if ( EltSize >= 32) {
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
@@ -1434,8 +1441,6 @@ void X86TargetLowering::resetOperationActions() {
if (!VT.is512BitVector())
continue;
- setOperationAction(ISD::LOAD, VT, Promote);
- AddPromotedToType (ISD::LOAD, VT, MVT::v8i64);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
}
@@ -1452,6 +1457,7 @@ void X86TargetLowering::resetOperationActions() {
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
@@ -1541,7 +1547,16 @@ void X86TargetLowering::resetOperationActions() {
}
EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
- if (!VT.isVector()) return MVT::i8;
+ if (!VT.isVector())
+ return MVT::i8;
+
+ const TargetMachine &TM = getTargetMachine();
+ if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512())
+ switch(VT.getVectorNumElements()) {
+ case 8: return MVT::v8i1;
+ case 16: return MVT::v16i1;
+ }
+
return VT.changeVectorElementTypeToInteger();
}
@@ -1750,6 +1765,13 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
return true;
}
+bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ assert(SrcAS != DestAS && "Expected different address spaces!");
+
+ return SrcAS < 256 && DestAS < 256;
+}
+
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1767,6 +1789,11 @@ X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
+const uint16_t *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
+ static const uint16_t ScratchRegs[] = { X86::R11, 0 };
+ return ScratchRegs;
+}
+
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -3532,7 +3559,7 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
/// the second operand.
-static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) {
+static bool isPSHUFDMask(ArrayRef<int> Mask, MVT VT) {
if (VT == MVT::v4f32 || VT == MVT::v4i32 )
return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
if (VT == MVT::v2f64 || VT == MVT::v2i64)
@@ -3542,7 +3569,7 @@ static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) {
/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+static bool isPSHUFHWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
return false;
@@ -3571,7 +3598,7 @@ static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+static bool isPSHUFLWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
return false;
@@ -3600,14 +3627,14 @@ static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PALIGNR.
-static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT,
+static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
(VT.is256BitVector() && !Subtarget->hasInt256()))
return false;
unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLanes = VT.is512BitVector() ? 1: VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
// Do not handle 64-bit element shuffles with palignr.
@@ -3690,10 +3717,7 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
/// specifies a shuffle of elements that is suitable for input to 128/256-bit
/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
/// reverse of what x86 shuffles want.
-static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
- bool Commuted = false) {
- if (!HasFp256 && VT.is256BitVector())
- return false;
+static bool isSHUFPMask(ArrayRef<int> Mask, MVT VT, bool Commuted = false) {
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
@@ -3702,6 +3726,10 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
if (NumLaneElems != 2 && NumLaneElems != 4)
return false;
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ bool symetricMaskRequired =
+ (VT.getSizeInBits() >= 256) && (EltSize == 32);
+
// VSHUFPSY divides the resulting vector into 4 chunks.
// The sources are also splitted into 4 chunks, and each destination
// chunk must come from a different source chunk.
@@ -3721,6 +3749,7 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
//
// DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
//
+ SmallVector<int, 4> MaskVal(NumLaneElems, -1);
unsigned HalfLaneElems = NumLaneElems/2;
for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
for (unsigned i = 0; i != NumLaneElems; ++i) {
@@ -3731,9 +3760,13 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
// For VSHUFPSY, the mask of the second half must be the same as the
// first but with the appropriate offsets. This works in the same way as
// VPERMILPS works with masks.
- if (NumElems != 8 || l == 0 || Mask[i] < 0)
+ if (!symetricMaskRequired || Idx < 0)
continue;
- if (!isUndefOrEqual(Idx, Mask[i]+l))
+ if (MaskVal[i] < 0) {
+ MaskVal[i] = Idx - l;
+ continue;
+ }
+ if ((signed)(Idx - l) != MaskVal[i])
return false;
}
}
@@ -3743,7 +3776,7 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
-static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
+static bool isMOVHLPSMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
@@ -3762,7 +3795,7 @@ static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
-static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
+static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
@@ -3779,7 +3812,7 @@ static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
-static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
+static bool isMOVLPMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
@@ -3801,7 +3834,7 @@ static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
-static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
+static bool isMOVLHPSMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
@@ -3827,7 +3860,7 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
static
SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
SDLoc dl(SVOp);
if (VT != MVT::v8i32 && VT != MVT::v8f32)
@@ -3870,70 +3903,92 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
-static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
+static bool isUNPCKLMask(ArrayRef<int> Mask, MVT VT,
bool HasInt256, bool V2IsSplat = false) {
- unsigned NumElts = VT.getVectorNumElements();
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
- "Unsupported vector type for unpckh");
+ assert(VT.getSizeInBits() >= 128 &&
+ "Unsupported vector type for unpckl");
- if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
- (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ // AVX defines UNPCK* to operate independently on 128-bit lanes.
+ unsigned NumLanes;
+ unsigned NumOf256BitLanes;
+ unsigned NumElts = VT.getVectorNumElements();
+ if (VT.is256BitVector()) {
+ if (NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
+ NumLanes = 2;
+ NumOf256BitLanes = 1;
+ } else if (VT.is512BitVector()) {
+ assert(VT.getScalarType().getSizeInBits() >= 32 &&
+ "Unsupported vector type for unpckh");
+ NumLanes = 2;
+ NumOf256BitLanes = 2;
+ } else {
+ NumLanes = 1;
+ NumOf256BitLanes = 1;
+ }
- // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
- // independently on 128-bit lanes.
- unsigned NumLanes = VT.getSizeInBits()/128;
- unsigned NumLaneElts = NumElts/NumLanes;
+ unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
+ unsigned NumLaneElts = NumEltsInStride/NumLanes;
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
- int BitI = Mask[l+i];
- int BitI1 = Mask[l+i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (V2IsSplat) {
- if (!isUndefOrEqual(BitI1, NumElts))
+ for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
+ for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
+ for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[l256*NumEltsInStride+l+i];
+ int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
+ if (!isUndefOrEqual(BitI, j+l256*NumElts))
return false;
- } else {
- if (!isUndefOrEqual(BitI1, j + NumElts))
+ if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
+ return false;
+ if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
return false;
}
}
}
-
return true;
}
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
-static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
+static bool isUNPCKHMask(ArrayRef<int> Mask, MVT VT,
bool HasInt256, bool V2IsSplat = false) {
- unsigned NumElts = VT.getVectorNumElements();
-
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ assert(VT.getSizeInBits() >= 128 &&
"Unsupported vector type for unpckh");
- if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
- (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ // AVX defines UNPCK* to operate independently on 128-bit lanes.
+ unsigned NumLanes;
+ unsigned NumOf256BitLanes;
+ unsigned NumElts = VT.getVectorNumElements();
+ if (VT.is256BitVector()) {
+ if (NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
+ NumLanes = 2;
+ NumOf256BitLanes = 1;
+ } else if (VT.is512BitVector()) {
+ assert(VT.getScalarType().getSizeInBits() >= 32 &&
+ "Unsupported vector type for unpckh");
+ NumLanes = 2;
+ NumOf256BitLanes = 2;
+ } else {
+ NumLanes = 1;
+ NumOf256BitLanes = 1;
+ }
- // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
- // independently on 128-bit lanes.
- unsigned NumLanes = VT.getSizeInBits()/128;
- unsigned NumLaneElts = NumElts/NumLanes;
+ unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
+ unsigned NumLaneElts = NumEltsInStride/NumLanes;
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
- int BitI = Mask[l+i];
- int BitI1 = Mask[l+i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (V2IsSplat) {
- if (isUndefOrEqual(BitI1, NumElts))
+ for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
+ for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
+ for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[l256*NumEltsInStride+l+i];
+ int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
+ if (!isUndefOrEqual(BitI, j+l256*NumElts))
return false;
- } else {
- if (!isUndefOrEqual(BitI1, j+NumElts))
+ if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
+ return false;
+ if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
return false;
}
}
@@ -3944,10 +3999,12 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
-static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
bool Is256BitVec = VT.is256BitVector();
+ if (VT.is512BitVector())
+ return false;
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
@@ -3985,9 +4042,12 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
/// <2, 2, 3, 3>
-static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
+ if (VT.is512BitVector())
+ return false;
+
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
@@ -4040,7 +4100,7 @@ static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
-static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+static bool isVPERM2X128Mask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
if (!HasFp256 || !VT.is256BitVector())
return false;
@@ -4072,7 +4132,7 @@ static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
unsigned HalfSize = VT.getVectorNumElements()/2;
@@ -4093,6 +4153,44 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
return (FstHalf | (SndHalf << 4));
}
+// Symetric in-lane mask. Each lane has 4 elements (for imm8)
+static bool isPermImmMask(ArrayRef<int> Mask, MVT VT, unsigned& Imm8) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize < 32)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ Imm8 = 0;
+ if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ Imm8 |= Mask[i] << (i*2);
+ }
+ return true;
+ }
+
+ unsigned LaneSize = 4;
+ SmallVector<int, 4> MaskVal(LaneSize, -1);
+
+ for (unsigned l = 0; l != NumElts; l += LaneSize) {
+ for (unsigned i = 0; i != LaneSize; ++i) {
+ if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
+ return false;
+ if (Mask[i+l] < 0)
+ continue;
+ if (MaskVal[i] < 0) {
+ MaskVal[i] = Mask[i+l] - l;
+ Imm8 |= MaskVal[i] << (i*2);
+ continue;
+ }
+ if (Mask[i+l] != (signed)(MaskVal[i]+l))
+ return false;
+ }
+ }
+ return true;
+}
+
/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
/// Note that VPERMIL mask matching is different depending whether theunderlying
@@ -4100,38 +4198,39 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
-static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
- if (!HasFp256)
+static bool isVPERMILPMask(ArrayRef<int> Mask, MVT VT) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (VT.getSizeInBits() < 256 || EltSize < 32)
return false;
-
+ bool symetricMaskRequired = (EltSize == 32);
unsigned NumElts = VT.getVectorNumElements();
- // Only match 256-bit with 32/64-bit types
- if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8))
- return false;
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned LaneSize = NumElts/NumLanes;
+ // 2 or 4 elements in one lane
+
+ SmallVector<int, 4> ExpectedMaskVal(LaneSize, -1);
for (unsigned l = 0; l != NumElts; l += LaneSize) {
for (unsigned i = 0; i != LaneSize; ++i) {
if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
return false;
- if (NumElts != 8 || l == 0)
- continue;
- // VPERMILPS handling
- if (Mask[i] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i+l], Mask[i]+l))
- return false;
+ if (symetricMaskRequired) {
+ if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) {
+ ExpectedMaskVal[i] = Mask[i+l] - l;
+ continue;
+ }
+ if (!isUndefOrEqual(Mask[i+l], ExpectedMaskVal[i]+l))
+ return false;
+ }
}
}
-
return true;
}
/// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
-static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
+static bool isCommutedMOVLMask(ArrayRef<int> Mask, MVT VT,
bool V2IsSplat = false, bool V2IsUndef = false) {
if (!VT.is128BitVector())
return false;
@@ -4155,7 +4254,7 @@ static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
-static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
+static bool isMOVSHDUPMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if (!Subtarget->hasSSE3())
return false;
@@ -4163,7 +4262,8 @@ static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
unsigned NumElems = VT.getVectorNumElements();
if ((VT.is128BitVector() && NumElems != 4) ||
- (VT.is256BitVector() && NumElems != 8))
+ (VT.is256BitVector() && NumElems != 8) ||
+ (VT.is512BitVector() && NumElems != 16))
return false;
// "i+1" is the value the indexed mask element must have
@@ -4178,7 +4278,7 @@ static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
-static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
+static bool isMOVSLDUPMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if (!Subtarget->hasSSE3())
return false;
@@ -4186,7 +4286,8 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
unsigned NumElems = VT.getVectorNumElements();
if ((VT.is128BitVector() && NumElems != 4) ||
- (VT.is256BitVector() && NumElems != 8))
+ (VT.is256BitVector() && NumElems != 8) ||
+ (VT.is512BitVector() && NumElems != 16))
return false;
// "i" is the value the indexed mask element must have
@@ -4201,7 +4302,7 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// version of MOVDDUP.
-static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+static bool isMOVDDUPYMask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
if (!HasFp256 || !VT.is256BitVector())
return false;
@@ -4221,7 +4322,7 @@ static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 128-bit
/// version of MOVDDUP.
-static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) {
+static bool isMOVDDUPMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
@@ -4247,7 +4348,7 @@ static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- MVT VT = N->getValueType(0).getSimpleVT();
+ MVT VT = N->getSimpleValueType(0);
unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % vecWidth == 0;
@@ -4265,7 +4366,7 @@ static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
- MVT VT = N->getValueType(0).getSimpleVT();
+ MVT VT = N->getSimpleValueType(0);
unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % vecWidth == 0;
@@ -4292,9 +4393,9 @@ bool X86::isVEXTRACT256Index(SDNode *N) {
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
/// Handles 128-bit and 256-bit.
static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
- MVT VT = N->getValueType(0).getSimpleVT();
+ MVT VT = N->getSimpleValueType(0);
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ assert((VT.getSizeInBits() >= 128) &&
"Unsupported vector type for PSHUF/SHUFP");
// Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
@@ -4303,10 +4404,10 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
- assert((NumLaneElts == 2 || NumLaneElts == 4) &&
- "Only supports 2 or 4 elements per lane");
+ assert((NumLaneElts == 2 || NumLaneElts == 4 || NumLaneElts == 8) &&
+ "Only supports 2, 4 or 8 elements per lane");
- unsigned Shift = (NumLaneElts == 4) ? 1 : 0;
+ unsigned Shift = (NumLaneElts >= 4) ? 1 : 0;
unsigned Mask = 0;
for (unsigned i = 0; i != NumElts; ++i) {
int Elt = N->getMaskElt(i);
@@ -4322,7 +4423,7 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
- MVT VT = N->getValueType(0).getSimpleVT();
+ MVT VT = N->getSimpleValueType(0);
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
@@ -4346,7 +4447,7 @@ static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
- MVT VT = N->getValueType(0).getSimpleVT();
+ MVT VT = N->getSimpleValueType(0);
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
@@ -4370,11 +4471,12 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
- MVT VT = SVOp->getValueType(0).getSimpleVT();
- unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
+ MVT VT = SVOp->getSimpleValueType(0);
+ unsigned EltSize = VT.is512BitVector() ? 1 :
+ VT.getVectorElementType().getSizeInBits() >> 3;
unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLanes = VT.is512BitVector() ? 1 : VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
int Val = 0;
@@ -4399,7 +4501,7 @@ static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- MVT VecVT = N->getOperand(0).getValueType().getSimpleVT();
+ MVT VecVT = N->getOperand(0).getSimpleValueType();
MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
@@ -4414,7 +4516,7 @@ static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
- MVT VecVT = N->getValueType(0).getSimpleVT();
+ MVT VecVT = N->getSimpleValueType(0);
MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
@@ -4449,27 +4551,6 @@ unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 256);
}
-/// getShuffleCLImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions.
-/// Handles 256-bit.
-static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
- MVT VT = N->getValueType(0).getSimpleVT();
-
- unsigned NumElts = VT.getVectorNumElements();
-
- assert((VT.is256BitVector() && NumElts == 4) &&
- "Unsupported vector type for VPERMQ/VPERMPD");
-
- unsigned Mask = 0;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Elt = N->getMaskElt(i);
- if (Elt < 0)
- continue;
- Mask |= Elt << (i*2);
- }
-
- return Mask;
-}
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
@@ -4484,7 +4565,7 @@ bool X86::isZeroNode(SDValue Elt) {
/// their permute mask.
static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> MaskVec;
@@ -4506,7 +4587,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
-static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) {
+static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
if (VT.getVectorNumElements() != 4)
@@ -4563,7 +4644,7 @@ static bool WillBeConstantPoolLoad(SDNode *N) {
/// half of V2 (and in order). And since V1 will become the source of the
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
- ArrayRef<int> Mask, EVT VT) {
+ ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
@@ -4659,6 +4740,11 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops,
array_lengthof(Ops));
}
+ } else if (VT.is512BitVector()) { // AVX-512
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
+ Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16);
} else
llvm_unreachable("Unexpected vector type");
@@ -4715,7 +4801,7 @@ static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
}
/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
-static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
+static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
@@ -4727,7 +4813,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
}
/// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
-static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
+static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
@@ -4743,7 +4829,7 @@ static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
// Generate shuffles which repeat i16 and i8 several times until they can be
// represented by v4f32 and then be manipulated by target suported shuffles.
static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
- EVT VT = V.getValueType();
+ MVT VT = V.getSimpleValueType();
int NumElems = VT.getVectorNumElements();
SDLoc dl(V);
@@ -4761,7 +4847,7 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
/// getLegalSplat - Generate a legal splat with supported x86 shuffles
static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
- EVT VT = V.getValueType();
+ MVT VT = V.getSimpleValueType();
SDLoc dl(V);
if (VT.is128BitVector()) {
@@ -4787,7 +4873,7 @@ static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
/// PromoteSplat - Splat is promoted to target supported vector shuffles.
static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
- EVT SrcVT = SV->getValueType(0);
+ MVT SrcVT = SV->getSimpleValueType(0);
SDValue V1 = SV->getOperand(0);
SDLoc dl(SV);
@@ -4810,7 +4896,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
// instruction because the target has no such instruction. Generate shuffles
// which repeat i16 and i8 several times until they fit in i32, and then can
// be manipulated by target suported shuffles.
- EVT EltVT = SrcVT.getVectorElementType();
+ MVT EltVT = SrcVT.getVectorElementType();
if (EltVT == MVT::i8 || EltVT == MVT::i16)
V1 = PromoteSplati8i16(V1, DAG, EltNo);
@@ -4832,7 +4918,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
bool IsZero,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- EVT VT = V2.getValueType();
+ MVT VT = V2.getSimpleValueType();
SDValue V1 = IsZero
? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
unsigned NumElems = VT.getVectorNumElements();
@@ -4950,7 +5036,7 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
- MVT ShufVT = V.getValueType().getSimpleVT();
+ MVT ShufVT = V.getSimpleValueType();
unsigned NumElems = ShufVT.getVectorNumElements();
SmallVector<int, 16> ShuffleMask;
bool IsUnary;
@@ -5048,7 +5134,8 @@ bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
/// logical left shift of a vector.
static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
- unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumElems =
+ SVOp->getSimpleValueType(0).getVectorNumElements();
unsigned NumZeros = getNumOfConsecutiveZeros(
SVOp, NumElems, false /* check zeros from right */, DAG,
SVOp->getMaskElt(0));
@@ -5082,7 +5169,8 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
/// logical left shift of a vector.
static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
- unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumElems =
+ SVOp->getSimpleValueType(0).getVectorNumElements();
unsigned NumZeros = getNumOfConsecutiveZeros(
SVOp, NumElems, true /* check zeros from left */, DAG,
NumElems - SVOp->getMaskElt(NumElems - 1) - 1);
@@ -5118,7 +5206,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
// Although the logic below support any bitwidth size, there are no
// shift instructions which handle more than 128-bit vectors.
- if (!SVOp->getValueType(0).is128BitVector())
+ if (!SVOp->getSimpleValueType(0).is128BitVector())
return false;
if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
@@ -5223,9 +5311,8 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
}
-SDValue
-X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl,
- SelectionDAG &DAG) const {
+static SDValue
+LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
// Check if the scalar load can be widened into a vector load. And if
// the address is "base + cst" see if the cst can be "absorbed" into
@@ -5288,7 +5375,10 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl,
LD->getPointerInfo().getWithOffset(StartOffset),
false, false, false, 0);
- SmallVector<int, 8> Mask(NumElems, EltNo);
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != NumElems; ++i)
+ Mask.push_back(EltNo);
+
return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
}
@@ -5305,7 +5395,8 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl,
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
/// There's even a handy isZeroNode for that purpose.
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
- SDLoc &DL, SelectionDAG &DAG) {
+ SDLoc &DL, SelectionDAG &DAG,
+ bool isAfterLegalize) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = Elts.size();
@@ -5341,7 +5432,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
// load of the entire vector width starting at the base pointer. If we found
// consecutive loads for the low half, generate a vzext_load node.
if (LastLoadedElt == NumElems - 1) {
+
+ if (isAfterLegalize &&
+ !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
+ return SDValue();
+
SDValue NewLd = SDValue();
+
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(),
@@ -5398,12 +5495,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
/// a scalar load, or a constant.
/// The VBROADCAST node is returned when a pattern is found,
/// or SDValue() otherwise.
-SDValue
-X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
+ SelectionDAG &DAG) {
if (!Subtarget->hasFp256())
return SDValue();
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
@@ -5450,7 +5547,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
// Use the register form of the broadcast instruction available on AVX2.
- if (VT.is256BitVector())
+ if (VT.getSizeInBits() >= 256)
Sc = Extract128BitVector(Sc, 0, DAG, dl);
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
}
@@ -5492,7 +5589,8 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
assert(C && "Invalid constant type");
- SDValue CP = DAG.getConstantPool(C, getPointerTy());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
@@ -5528,12 +5626,12 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue
-X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
+static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
// Skip if insert_vec_elt is not supported.
- if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
return SDValue();
SDLoc DL(Op);
@@ -5606,7 +5704,7 @@ X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) &&
"Unexpected type in LowerBUILD_VECTORvXi1!");
@@ -5645,13 +5743,16 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1,
DAG.getConstant(Immediate, MVT::i16));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask,
- DAG.getIntPtrConstant(0));
+ DAG.getIntPtrConstant(0));
}
- if (!isSplatVector(Op.getNode()))
- llvm_unreachable("Unsupported predicate operation");
-
+ // Splat vector (with undefs)
SDValue In = Op.getOperand(0);
+ for (unsigned i = 1, e = Op.getNumOperands(); i != e; ++i) {
+ if (Op.getOperand(i) != In && Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ llvm_unreachable("Unsupported predicate operation");
+ }
+
SDValue EFLAGS, X86CC;
if (In.getOpcode() == ISD::SETCC) {
SDValue Op0 = In.getOperand(0);
@@ -5679,7 +5780,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
// res = allOnes ### CMOVNE -1, %res
// else
// res = allZero
- MVT InVT = In.getValueType().getSimpleVT();
+ MVT InVT = In.getSimpleValueType();
SDValue Bit1 = DAG.getNode(ISD::AND, dl, InVT, In, DAG.getConstant(1, InVT));
EFLAGS = EmitTest(Bit1, X86::COND_NE, DAG);
X86CC = DAG.getConstant(X86::COND_NE, MVT::i8);
@@ -5708,7 +5809,7 @@ SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
MVT ExtVT = VT.getVectorElementType();
unsigned NumElems = Op.getNumOperands();
@@ -5720,7 +5821,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
// Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
// and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
- if (VT == MVT::v4i32 || VT == MVT::v8i32)
+ if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
return Op;
return getZeroVector(VT, Subtarget, DAG, dl);
@@ -5733,10 +5834,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
return Op;
- return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
+ if (!VT.is512BitVector())
+ return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
}
- SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
+ SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
if (Broadcast.getNode())
return Broadcast;
@@ -5815,7 +5917,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
- if (VT.is256BitVector()) {
+ if (VT.is256BitVector() || VT.is512BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
@@ -5980,7 +6082,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
V[i] = Op.getOperand(i);
// Check for elements which are consecutive loads.
- SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+ SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
if (LD.getNode())
return LD;
@@ -6044,7 +6146,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
- MVT ResVT = Op.getValueType().getSimpleVT();
+ MVT ResVT = Op.getSimpleValueType();
assert((ResVT.is256BitVector() ||
ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
@@ -6073,10 +6175,14 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
+ // There is no blend with immediate in AVX-512.
+ if (VT.is512BitVector())
+ return SDValue();
+
if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
return SDValue();
if (!Subtarget->hasInt256() && VT == MVT::v16i16)
@@ -6382,10 +6488,10 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
// 1. [ssse3] 1 x pshufb
// 2. [ssse3] 2 x pshufb + 1 x por
// 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw
-static
-SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG,
- const X86TargetLowering &TLI) {
+static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
+ const X86Subtarget* Subtarget,
+ SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
@@ -6401,7 +6507,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
// present, fall back to case 3.
// If SSSE3, use 1 pshufb instruction per vector with elements in the result.
- if (TLI.getSubtarget()->hasSSSE3()) {
+ if (Subtarget->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
// If all result elements are from one input vector, then only translate
@@ -6514,7 +6620,7 @@ static
SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
@@ -6562,7 +6668,7 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
static
SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
SDLoc dl(SVOp);
unsigned NumElems = VT.getVectorNumElements();
MVT NewVT;
@@ -6599,7 +6705,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
/// getVZextMovL - Return a zero-extending vector move low node.
///
-static SDValue getVZextMovL(MVT VT, EVT OpVT,
+static SDValue getVZextMovL(MVT VT, MVT OpVT,
SDValue SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget, SDLoc dl) {
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
@@ -6641,7 +6747,7 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
if (NewOp.getNode())
return NewOp;
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLaneElems = NumElems / 2;
@@ -6753,7 +6859,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
assert(VT.is128BitVector() && "Unsupported vector size");
@@ -6904,7 +7010,7 @@ static bool MayFoldVectorLoad(SDValue V) {
static
SDValue getMOVDDup(SDValue &Op, SDLoc &dl, SDValue V1, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
// Canonizalize to v2f64.
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
@@ -6918,7 +7024,7 @@ SDValue getMOVLowToHigh(SDValue &Op, SDLoc &dl, SelectionDAG &DAG,
bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
assert(VT != MVT::v2i64 && "unsupported shuffle type");
@@ -6936,7 +7042,7 @@ static
SDValue getMOVHighToLow(SDValue &Op, SDLoc &dl, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
"unsupported shuffle type");
@@ -6952,7 +7058,7 @@ static
SDValue getMOVLP(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
unsigned NumElems = VT.getVectorNumElements();
// Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
@@ -7006,13 +7112,13 @@ SDValue getMOVLP(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
}
// Reduce a vector shuffle to zext.
-SDValue
-X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
// PMOVZX is only available from SSE41.
if (!Subtarget->hasSSE41())
return SDValue();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
// Only AVX2 support 256-bit vector integer extending.
if (!Subtarget->hasInt256() && VT.is256BitVector())
@@ -7051,12 +7157,11 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
- LLVMContext *Context = DAG.getContext();
unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
- EVT NeVT = EVT::getIntegerVT(*Context, NBits);
- EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift);
+ MVT NeVT = MVT::getIntegerVT(NBits);
+ MVT NVT = MVT::getVectorVT(NeVT, NumElems >> Shift);
- if (!isTypeLegal(NVT))
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(NVT))
return SDValue();
// Simplify the operand as it's prepared to be fed into shuffle.
@@ -7064,8 +7169,8 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
if (V1.getOpcode() == ISD::BITCAST &&
V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- V1.getOperand(0)
- .getOperand(0).getValueType().getSizeInBits() == SignificantBits) {
+ V1.getOperand(0).getOperand(0)
+ .getSimpleValueType().getSizeInBits() == SignificantBits) {
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
ConstantSDNode *CIdx =
@@ -7074,19 +7179,19 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
// selection to fold it. Otherwise, we will short the conversion sequence.
if (CIdx && CIdx->getZExtValue() == 0 &&
(!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
- if (V.getValueSizeInBits() > V1.getValueSizeInBits()) {
+ MVT FullVT = V.getSimpleValueType();
+ MVT V1VT = V1.getSimpleValueType();
+ if (FullVT.getSizeInBits() > V1VT.getSizeInBits()) {
// The "ext_vec_elt" node is wider than the result node.
// In this case we should extract subvector from V.
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
- unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits();
- EVT FullVT = V.getValueType();
- EVT SubVecVT = EVT::getVectorVT(*Context,
- FullVT.getVectorElementType(),
+ unsigned Ratio = FullVT.getSizeInBits() / V1VT.getSizeInBits();
+ MVT SubVecVT = MVT::getVectorVT(FullVT.getVectorElementType(),
FullVT.getVectorNumElements()/Ratio);
V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
DAG.getIntPtrConstant(0));
}
- V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+ V1 = DAG.getNode(ISD::BITCAST, DL, V1VT, V);
}
}
@@ -7094,10 +7199,11 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
}
-SDValue
-X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
+static SDValue
+NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -7108,13 +7214,13 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// Handle splat operations
if (SVOp->isSplat()) {
// Use vbroadcast whenever the splat comes from a foldable load
- SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
+ SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
if (Broadcast.getNode())
return Broadcast;
}
// Check integer expanding shuffles.
- SDValue NewOp = LowerVectorIntExtend(Op, DAG);
+ SDValue NewOp = LowerVectorIntExtend(Op, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
@@ -7132,7 +7238,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
- MVT NewVT = NewOp.getValueType().getSimpleVT();
+ MVT NewVT = NewOp.getSimpleValueType();
if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
NewVT, true, false))
return getVZextMovL(VT, NewVT, NewOp.getOperand(0),
@@ -7141,7 +7247,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
- MVT NewVT = NewOp.getValueType().getSimpleVT();
+ MVT NewVT = NewOp.getSimpleValueType();
if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
return getVZextMovL(VT, NewVT, NewOp.getOperand(1),
DAG, Subtarget, dl);
@@ -7156,7 +7262,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
unsigned NumElems = VT.getVectorNumElements();
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
@@ -7194,7 +7300,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// Normalize the input vectors. Here splats, zeroed vectors, profitable
// narrowing and commutation of operands should be handled. The actual code
// doesn't include all of those, work in progress...
- SDValue NewOp = NormalizeVectorShuffle(Op, DAG);
+ SDValue NewOp = NormalizeVectorShuffle(Op, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
@@ -7354,7 +7460,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
// Normalize the node to match x86 shuffle ops if needed
- if (!V2IsUndef && (isSHUFPMask(M, VT, HasFp256, /* Commuted */ true)))
+ if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true)))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
@@ -7377,7 +7483,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
getShufflePSHUFLWImmediate(SVOp),
DAG);
- if (isSHUFPMask(M, VT, HasFp256))
+ if (isSHUFPMask(M, VT))
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
getShuffleSHUFImmediate(SVOp), DAG);
@@ -7396,8 +7502,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
// Handle VPERMILPS/D* permutations
- if (isVPERMILPMask(M, VT, HasFp256)) {
- if (HasInt256 && VT == MVT::v8i32)
+ if (isVPERMILPMask(M, VT)) {
+ if ((HasInt256 && VT == MVT::v8i32) || VT == MVT::v16i32)
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
@@ -7413,21 +7519,28 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (BlendOp.getNode())
return BlendOp;
- if (V2IsUndef && HasInt256 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
- SmallVector<SDValue, 8> permclMask;
- for (unsigned i = 0; i != 8; ++i) {
- permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32));
+ unsigned Imm8;
+ if (V2IsUndef && HasInt256 && isPermImmMask(M, VT, Imm8))
+ return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, Imm8, DAG);
+
+ if ((V2IsUndef && HasInt256 && VT.is256BitVector() && NumElems == 8) ||
+ VT.is512BitVector()) {
+ MVT MaskEltVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
+ MVT MaskVectorVT = MVT::getVectorVT(MaskEltVT, NumElems);
+ SmallVector<SDValue, 16> permclMask;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MaskEltVT));
}
- SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32,
- &permclMask[0], 8);
- // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32
- return DAG.getNode(X86ISD::VPERMV, dl, VT,
- DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
- }
- if (V2IsUndef && HasInt256 && (VT == MVT::v4i64 || VT == MVT::v4f64))
- return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1,
- getShuffleCLImmediate(SVOp), DAG);
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT,
+ &permclMask[0], NumElems);
+ if (V2IsUndef)
+ // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32
+ return DAG.getNode(X86ISD::VPERMV, dl, VT,
+ DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
+ return DAG.getNode(X86ISD::VPERMV3, dl, VT,
+ DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1, V2);
+ }
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
@@ -7443,7 +7556,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
if (VT == MVT::v16i8) {
- SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this);
+ SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
}
@@ -7467,10 +7580,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
- if (!Op.getOperand(0).getValueType().getSimpleVT().is128BitVector())
+ if (!Op.getOperand(0).getSimpleValueType().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
@@ -7532,21 +7645,38 @@ SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
- if (!isa<ConstantSDNode>(Op.getOperand(1)))
- return SDValue();
-
SDValue Vec = Op.getOperand(0);
- MVT VecVT = Vec.getValueType().getSimpleVT();
+ MVT VecVT = Vec.getSimpleValueType();
+ SDValue Idx = Op.getOperand(1);
+ if (!isa<ConstantSDNode>(Idx)) {
+ if (VecVT.is512BitVector() ||
+ (VecVT.is256BitVector() && Subtarget->hasInt256() &&
+ VecVT.getVectorElementType().getSizeInBits() == 32)) {
+
+ MVT MaskEltVT =
+ MVT::getIntegerVT(VecVT.getVectorElementType().getSizeInBits());
+ MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() /
+ MaskEltVT.getSizeInBits());
+
+ Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT);
+ SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT,
+ getZeroVector(MaskVT, Subtarget, DAG, dl),
+ Idx, DAG.getConstant(0, getPointerTy()));
+ SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(),
+ Perm, DAG.getConstant(0, getPointerTy()));
+ }
+ return SDValue();
+ }
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
if (VecVT.is256BitVector() || VecVT.is512BitVector()) {
- SDValue Idx = Op.getOperand(1);
- unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
// Get the 128-bit vector.
Vec = Extract128BitVector(Vec, IdxVal, DAG, dl);
- EVT EltVT = VecVT.getVectorElementType();
+ MVT EltVT = VecVT.getVectorElementType();
unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
@@ -7565,7 +7695,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return Res;
}
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
SDValue Vec = Op.getOperand(0);
@@ -7592,7 +7722,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// SHUFPS the element to the lowest double word, then movss.
int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 };
- MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
+ MVT VVT = Op.getOperand(0).getSimpleValueType();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
@@ -7611,7 +7741,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
int Mask[2] = { 1, -1 };
- MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
+ MVT VVT = Op.getOperand(0).getSimpleValueType();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
@@ -7622,7 +7752,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
}
static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
SDLoc dl(Op);
@@ -7676,7 +7806,7 @@ static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
SDLoc dl(Op);
@@ -7724,17 +7854,15 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
}
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
- LLVMContext *Context = DAG.getContext();
SDLoc dl(Op);
- MVT OpVT = Op.getValueType().getSimpleVT();
+ MVT OpVT = Op.getSimpleValueType();
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
if (!OpVT.is128BitVector()) {
// Insert into a 128-bit vector.
unsigned SizeFactor = OpVT.getSizeInBits()/128;
- EVT VT128 = EVT::getVectorVT(*Context,
- OpVT.getVectorElementType(),
+ MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(),
OpVT.getVectorNumElements() / SizeFactor);
Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
@@ -7762,8 +7890,8 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
SDValue In = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- EVT ResVT = Op.getValueType();
- EVT InVT = In.getValueType();
+ MVT ResVT = Op.getSimpleValueType();
+ MVT InVT = In.getSimpleValueType();
if (Subtarget->hasFp256()) {
if (ResVT.is128BitVector() &&
@@ -7790,16 +7918,16 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
SDValue SubVec = Op.getNode()->getOperand(1);
SDValue Idx = Op.getNode()->getOperand(2);
- if ((Op.getNode()->getValueType(0).is256BitVector() ||
- Op.getNode()->getValueType(0).is512BitVector()) &&
- SubVec.getNode()->getValueType(0).is128BitVector() &&
+ if ((Op.getNode()->getSimpleValueType(0).is256BitVector() ||
+ Op.getNode()->getSimpleValueType(0).is512BitVector()) &&
+ SubVec.getNode()->getSimpleValueType(0).is128BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
}
- if (Op.getNode()->getValueType(0).is512BitVector() &&
- SubVec.getNode()->getValueType(0).is256BitVector() &&
+ if (Op.getNode()->getSimpleValueType(0).is512BitVector() &&
+ SubVec.getNode()->getSimpleValueType(0).is256BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
@@ -8108,10 +8236,9 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
is64Bit ? 257 : 256));
- SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
- DAG.getIntPtrConstant(0),
- MachinePointerInfo(Ptr),
- false, false, false, 0);
+ SDValue ThreadPointer =
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0),
+ MachinePointerInfo(Ptr), false, false, false, 0);
unsigned char OperandFlags = 0;
// Most TLS accesses are not RIP relative, even on x86-64. One exception is
@@ -8133,21 +8260,20 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
// emit "addl x@ntpoff,%eax" (local exec)
// or "addl x@indntpoff,%eax" (initial exec)
// or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
- GA->getValueType(0),
- GA->getOffset(), OperandFlags);
+ SDValue TGA =
+ DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
+ GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec) {
if (isPIC && !is64Bit) {
Offset = DAG.getNode(ISD::ADD, dl, PtrVT,
- DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
Offset);
}
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
- MachinePointerInfo::getGOT(), false, false, false,
- 0);
+ MachinePointerInfo::getGOT(), false, false, false, 0);
}
// The address of the thread local variable is the add of the thread
@@ -8305,6 +8431,11 @@ SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
+ // X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the
+ // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away
+ // during isel.
+ SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
+ DAG.getConstant(VTBits - 1, MVT::i8));
SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
DAG.getConstant(VTBits - 1, MVT::i8))
: DAG.getConstant(0, VT);
@@ -8312,12 +8443,15 @@ SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
SDValue Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
} else {
Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
- Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
}
+ // If the shift amount is larger or equal than the width of a part we can't
+ // rely on the results of shld/shrd. Insert a test and select the appropriate
+ // values for large shift amounts.
SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits, MVT::i8));
SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
@@ -8750,9 +8884,9 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
- MVT VT = Op->getValueType(0).getSimpleVT();
+ MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
- MVT InVT = In.getValueType().getSimpleVT();
+ MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
// Optimize vectors in AVX mode:
@@ -8768,7 +8902,8 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
// Concat upper and lower parts.
//
- if (((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
+ if (((VT != MVT::v16i16) || (InVT != MVT::v16i8)) &&
+ ((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
return SDValue();
@@ -8790,8 +8925,39 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
-SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT VT = Op->getValueType(0).getSimpleVT();
+ SDValue In = Op->getOperand(0);
+ MVT InVT = In.getValueType().getSimpleVT();
+ SDLoc DL(Op);
+ unsigned int NumElts = VT.getVectorNumElements();
+ if (NumElts != 8 && NumElts != 16)
+ return SDValue();
+
+ if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
+
+ EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Now we have only mask extension
+ assert(InVT.getVectorElementType() == MVT::i1);
+ SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
+ const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+ SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+
+ SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld);
+ if (VT.is512BitVector())
+ return Brcst;
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst);
+}
+
+static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
if (Res.getNode())
@@ -8800,12 +8966,16 @@ SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op,
return SDValue();
}
-SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
+
+static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc DL(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
- MVT SVT = In.getValueType().getSimpleVT();
+ MVT SVT = In.getSimpleValueType();
+
+ if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
+ return LowerZERO_EXTEND_AVX512(Op, DAG);
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
@@ -8813,33 +8983,44 @@ SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
return Res;
}
- if (!VT.is256BitVector() || !SVT.is128BitVector() ||
- VT.getVectorNumElements() != SVT.getVectorNumElements())
- return SDValue();
-
- assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!");
-
- // AVX2 has better support of integer extending.
- if (Subtarget->hasInt256())
- return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
-
- SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
- static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1};
- SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32,
- DAG.getVectorShuffle(MVT::v8i16, DL, In,
- DAG.getUNDEF(MVT::v8i16),
- &Mask[0]));
-
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
+ assert(!VT.is256BitVector() || !SVT.is128BitVector() ||
+ VT.getVectorNumElements() != SVT.getVectorNumElements());
+ return SDValue();
}
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
- MVT SVT = In.getValueType().getSimpleVT();
-
- if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) {
+ MVT InVT = In.getSimpleValueType();
+ assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
+ "Invalid TRUNCATE operation");
+
+ if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
+ if (VT.getVectorElementType().getSizeInBits() >=8)
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
+
+ assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
+ unsigned NumElts = InVT.getVectorNumElements();
+ assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
+ if (InVT.getSizeInBits() < 512) {
+ MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64;
+ In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
+ InVT = ExtVT;
+ }
+ SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
+ const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+ SDValue CP = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ SDValue OneV = DAG.getNode(X86ISD::VBROADCAST, DL, InVT, Ld);
+ SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In);
+ return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
+ }
+
+ if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget->hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
@@ -8870,7 +9051,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2);
}
- if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) {
+ if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
// On AVX2, v8i32 -> v8i16 becomed PSHUFB.
if (Subtarget->hasInt256()) {
In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
@@ -8928,11 +9109,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
}
// Handle truncation of V256 to V128 using shuffles.
- if (!VT.is128BitVector() || !SVT.is256BitVector())
+ if (!VT.is128BitVector() || !InVT.is256BitVector())
return SDValue();
- assert(VT.getVectorNumElements() != SVT.getVectorNumElements() &&
- "Invalid op");
assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
unsigned NumElems = VT.getVectorNumElements();
@@ -8952,7 +9131,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
if (VT.isVector()) {
if (VT == MVT::v8i16)
return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT,
@@ -8996,9 +9175,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
- MVT SVT = In.getValueType().getSimpleVT();
+ MVT SVT = In.getSimpleValueType();
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
@@ -9010,7 +9189,7 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
SDLoc dl(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
@@ -9044,7 +9223,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
SDLoc dl(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
@@ -9065,7 +9244,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
if (VT.isVector()) {
- MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+ MVT XORVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits()/64);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::XOR, dl, XORVT,
DAG.getNode(ISD::BITCAST, dl, XORVT,
@@ -9081,8 +9260,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDLoc dl(Op);
- MVT VT = Op.getValueType().getSimpleVT();
- MVT SrcVT = Op1.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
+ MVT SrcVT = Op1.getSimpleValueType();
// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
@@ -9158,7 +9337,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
// Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
@@ -9168,8 +9347,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
//
-SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
if (!Subtarget->hasSSE41())
@@ -9294,7 +9473,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
unsigned NumOperands = 0;
// Truncate operations may prevent the merge of the SETCC instruction
- // and the arithmetic intruction before it. Attempt to truncate the operands
+ // and the arithmetic instruction before it. Attempt to truncate the operands
// of the arithmetic instruction and use a reduced bit-width instruction.
bool NeedTruncation = false;
SDValue ArithOp = Op;
@@ -9402,7 +9581,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
case ISD::AND: Opcode = X86ISD::AND; break;
case ISD::OR: {
if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) {
- SDValue EFLAGS = LowerVectorAllZeroTest(Op, DAG);
+ SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG);
if (EFLAGS.getNode())
return EFLAGS;
}
@@ -9638,7 +9817,7 @@ static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
// ones, and then concatenate the result back.
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
"Unsupported value type for operation");
@@ -9665,25 +9844,62 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
}
+static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ MVT VT = Op.getSimpleValueType();
+
+ assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 &&
+ Op.getValueType().getScalarType() == MVT::i1 &&
+ "Cannot set masked compare for this operation");
+
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ SDLoc dl(Op);
+
+ bool Unsigned = false;
+ unsigned SSECC;
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Unexpected SETCC condition");
+ case ISD::SETNE: SSECC = 4; break;
+ case ISD::SETEQ: SSECC = 0; break;
+ case ISD::SETUGT: Unsigned = true;
+ case ISD::SETGT: SSECC = 6; break; // NLE
+ case ISD::SETULT: Unsigned = true;
+ case ISD::SETLT: SSECC = 1; break;
+ case ISD::SETUGE: Unsigned = true;
+ case ISD::SETGE: SSECC = 5; break; // NLT
+ case ISD::SETULE: Unsigned = true;
+ case ISD::SETLE: SSECC = 2; break;
+ }
+ unsigned Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
+ return DAG.getNode(Opc, dl, VT, Op0, Op1,
+ DAG.getConstant(SSECC, MVT::i8));
+
+}
+
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- SDValue Cond;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- bool isFP = Op.getOperand(1).getValueType().getSimpleVT().isFloatingPoint();
+ bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint();
SDLoc dl(Op);
if (isFP) {
#ifndef NDEBUG
- MVT EltVT = Op0.getValueType().getVectorElementType().getSimpleVT();
+ MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
#endif
unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
-
+ unsigned Opc = X86ISD::CMPP;
+ if (Subtarget->hasAVX512() && VT.getVectorElementType() == MVT::i1) {
+ assert(VT.getVectorNumElements() <= 16);
+ Opc = X86ISD::CMPM;
+ }
// In the two special cases we can't handle, emit two comparisons.
if (SSECC == 8) {
unsigned CC0, CC1;
@@ -9695,14 +9911,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
}
- SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CC0, MVT::i8));
- SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CC1, MVT::i8));
return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
}
// Handle all other FP comparisons here.
- return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(SSECC, MVT::i8));
}
@@ -9710,6 +9926,24 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntVSETCC(Op, DAG);
+ bool MaskResult = (VT.getVectorElementType() == MVT::i1);
+ EVT OpVT = Op1.getValueType();
+ if (Subtarget->hasAVX512()) {
+ if (Op1.getValueType().is512BitVector() ||
+ (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
+ return LowerIntVSETCC_AVX512(Op, DAG);
+
+ // In AVX-512 architecture setcc returns mask with i1 elements,
+ // But there is no compare instruction for i8 and i16 elements.
+ // We are not talking about 512-bit operands in this case, these
+ // types are illegal.
+ if (MaskResult &&
+ (OpVT.getVectorElementType().getSizeInBits() < 32 &&
+ OpVT.getVectorElementType().getSizeInBits() >= 8))
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
+ }
+
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
@@ -9719,15 +9953,18 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: Invert = true;
- case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
+ case ISD::SETEQ: Opc = MaskResult? X86ISD::PCMPEQM: X86ISD::PCMPEQ; break;
case ISD::SETLT: Swap = true;
- case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
+ case ISD::SETGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; break;
case ISD::SETGE: Swap = true;
- case ISD::SETLE: Opc = X86ISD::PCMPGT; Invert = true; break;
+ case ISD::SETLE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
+ Invert = true; break;
case ISD::SETULT: Swap = true;
- case ISD::SETUGT: Opc = X86ISD::PCMPGT; FlipSigns = true; break;
+ case ISD::SETUGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
+ FlipSigns = true; break;
case ISD::SETUGE: Swap = true;
- case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break;
+ case ISD::SETULE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
+ FlipSigns = true; Invert = true; break;
}
// Special case: Use min/max operations for SETULE/SETUGE
@@ -9841,7 +10078,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
@@ -9885,7 +10122,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
}
}
- bool isFP = Op1.getValueType().getSimpleVT().isFloatingPoint();
+ bool isFP = Op1.getSimpleValueType().isFloatingPoint();
unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
if (X86CC == X86::COND_INVALID)
return SDValue();
@@ -10040,7 +10277,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
@@ -10149,15 +10386,50 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
}
-SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
- MVT VT = Op->getValueType(0).getSimpleVT();
+static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
- MVT InVT = In.getValueType().getSimpleVT();
+ MVT InVT = In.getSimpleValueType();
+ SDLoc dl(Op);
+
+ unsigned int NumElts = VT.getVectorNumElements();
+ if (NumElts != 8 && NumElts != 16)
+ return SDValue();
+
+ if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
+
+ MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32;
+ Constant *C = ConstantInt::get(*DAG.getContext(),
+ APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits()));
+
+ SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld);
+ if (VT.is512BitVector())
+ return Brcst;
+ return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst);
+}
+
+static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op->getSimpleValueType(0);
+ SDValue In = Op->getOperand(0);
+ MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
+ if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
+ return LowerSIGN_EXTEND_AVX512(Op, DAG);
+
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
- (VT != MVT::v8i32 || InVT != MVT::v8i16))
+ (VT != MVT::v8i32 || InVT != MVT::v8i16) &&
+ (VT != MVT::v16i16 || InVT != MVT::v16i8))
return SDValue();
if (Subtarget->hasInt256())
@@ -10502,7 +10774,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
- // FIXME: Ensure alignment here
+ unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ EVT VT = Op.getNode()->getValueType(0);
bool Is64Bit = Subtarget->is64Bit();
EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
@@ -10540,14 +10813,20 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
- Flag = Chain.getValue(1);
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
- Chain = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
- SPTy).getValue(1);
+ unsigned SPReg = RegInfo->getStackRegister();
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
+ Chain = SP.getValue(1);
- SDValue Ops1[2] = { Chain.getValue(0), Chain };
+ if (Align) {
+ SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
+ DAG.getConstant(-(uint64_t)Align, VT));
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
+ }
+
+ SDValue Ops1[2] = { SP, Chain };
return DAG.getMergeValues(Ops1, 2, dl);
}
}
@@ -10698,6 +10977,26 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
+// getTargetVShiftByConstNode - Handle vector element shifts where the shift
+// amount is a constant. Takes immediate version of shift as input.
+static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, EVT VT,
+ SDValue SrcOp, uint64_t ShiftAmt,
+ SelectionDAG &DAG) {
+
+ // Check for ShiftAmt >= element width
+ if (ShiftAmt >= VT.getVectorElementType().getSizeInBits()) {
+ if (Opc == X86ISD::VSRAI)
+ ShiftAmt = VT.getVectorElementType().getSizeInBits() - 1;
+ else
+ return DAG.getConstant(0, VT);
+ }
+
+ assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
+ && "Unknown target vector shift-by-constant node");
+
+ return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8));
+}
+
// getTargetVShiftNode - Handle vector element shifts where the shift amount
// may or may not be a constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, EVT VT,
@@ -10705,18 +11004,10 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, EVT VT,
SelectionDAG &DAG) {
assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
- if (isa<ConstantSDNode>(ShAmt)) {
- // Constant may be a TargetConstant. Use a regular constant.
- uint32_t ShiftAmt = cast<ConstantSDNode>(ShAmt)->getZExtValue();
- switch (Opc) {
- default: llvm_unreachable("Unknown target vector shift node");
- case X86ISD::VSHLI:
- case X86ISD::VSRLI:
- case X86ISD::VSRAI:
- return DAG.getNode(Opc, dl, VT, SrcOp,
- DAG.getConstant(ShiftAmt, MVT::i32));
- }
- }
+ // Catch shift-by-constant.
+ if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
+ return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
+ CShAmt->getZExtValue(), DAG);
// Change opcode to non-immediate version
switch (Opc) {
@@ -10919,24 +11210,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_q:
case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_sse41_pminuw:
case Intrinsic::x86_sse41_pminud:
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_avx512_pminu_d:
+ case Intrinsic::x86_avx512_pminu_q:
case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_sse2_pmaxs_w:
case Intrinsic::x86_sse41_pmaxsd:
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_q:
case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_sse2_pmins_w:
case Intrinsic::x86_sse41_pminsd:
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
- case Intrinsic::x86_avx2_pmins_d: {
+ case Intrinsic::x86_avx2_pmins_d:
+ case Intrinsic::x86_avx512_pmins_d:
+ case Intrinsic::x86_avx512_pmins_q: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@@ -10946,6 +11245,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_q:
Opcode = X86ISD::UMAX;
break;
case Intrinsic::x86_sse2_pminu_b:
@@ -10954,6 +11255,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_avx512_pminu_d:
+ case Intrinsic::x86_avx512_pminu_q:
Opcode = X86ISD::UMIN;
break;
case Intrinsic::x86_sse41_pmaxsb:
@@ -10962,6 +11265,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_q:
Opcode = X86ISD::SMAX;
break;
case Intrinsic::x86_sse41_pminsb:
@@ -10970,6 +11275,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
case Intrinsic::x86_avx2_pmins_d:
+ case Intrinsic::x86_avx512_pmins_d:
+ case Intrinsic::x86_avx512_pmins_q:
Opcode = X86ISD::SMIN;
break;
}
@@ -10982,10 +11289,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
+ case Intrinsic::x86_avx512_max_ps_512:
+ case Intrinsic::x86_avx512_max_pd_512:
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
- case Intrinsic::x86_avx_min_pd_256: {
+ case Intrinsic::x86_avx_min_pd_256:
+ case Intrinsic::x86_avx512_min_ps_512:
+ case Intrinsic::x86_avx512_min_pd_512: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@@ -10993,12 +11304,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
+ case Intrinsic::x86_avx512_max_ps_512:
+ case Intrinsic::x86_avx512_max_pd_512:
Opcode = X86ISD::FMAX;
break;
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
case Intrinsic::x86_avx_min_pd_256:
+ case Intrinsic::x86_avx512_min_ps_512:
+ case Intrinsic::x86_avx512_min_pd_512:
Opcode = X86ISD::FMIN;
break;
}
@@ -11069,7 +11384,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
- // but second operand for node/intruction.
+ // but second operand for node/instruction.
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
@@ -11144,6 +11459,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+ case Intrinsic::x86_avx512_kortestz:
+ case Intrinsic::x86_avx512_kortestc: {
+ unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz)? X86::COND_E: X86::COND_B;
+ SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
+ SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
+ SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+ SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
// SSE/AVX shift intrinsics
case Intrinsic::x86_sse2_psll_w:
@@ -11338,7 +11663,19 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_fma_vfmaddsub_ps_256:
case Intrinsic::x86_fma_vfmaddsub_pd_256:
case Intrinsic::x86_fma_vfmsubadd_ps_256:
- case Intrinsic::x86_fma_vfmsubadd_pd_256: {
+ case Intrinsic::x86_fma_vfmsubadd_pd_256:
+ case Intrinsic::x86_fma_vfmadd_ps_512:
+ case Intrinsic::x86_fma_vfmadd_pd_512:
+ case Intrinsic::x86_fma_vfmsub_ps_512:
+ case Intrinsic::x86_fma_vfmsub_pd_512:
+ case Intrinsic::x86_fma_vfnmadd_ps_512:
+ case Intrinsic::x86_fma_vfnmadd_pd_512:
+ case Intrinsic::x86_fma_vfnmsub_ps_512:
+ case Intrinsic::x86_fma_vfnmsub_pd_512:
+ case Intrinsic::x86_fma_vfmaddsub_ps_512:
+ case Intrinsic::x86_fma_vfmaddsub_pd_512:
+ case Intrinsic::x86_fma_vfmsubadd_ps_512:
+ case Intrinsic::x86_fma_vfmsubadd_pd_512: {
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@@ -11346,36 +11683,48 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_fma_vfmadd_pd:
case Intrinsic::x86_fma_vfmadd_ps_256:
case Intrinsic::x86_fma_vfmadd_pd_256:
+ case Intrinsic::x86_fma_vfmadd_ps_512:
+ case Intrinsic::x86_fma_vfmadd_pd_512:
Opc = X86ISD::FMADD;
break;
case Intrinsic::x86_fma_vfmsub_ps:
case Intrinsic::x86_fma_vfmsub_pd:
case Intrinsic::x86_fma_vfmsub_ps_256:
case Intrinsic::x86_fma_vfmsub_pd_256:
+ case Intrinsic::x86_fma_vfmsub_ps_512:
+ case Intrinsic::x86_fma_vfmsub_pd_512:
Opc = X86ISD::FMSUB;
break;
case Intrinsic::x86_fma_vfnmadd_ps:
case Intrinsic::x86_fma_vfnmadd_pd:
case Intrinsic::x86_fma_vfnmadd_ps_256:
case Intrinsic::x86_fma_vfnmadd_pd_256:
+ case Intrinsic::x86_fma_vfnmadd_ps_512:
+ case Intrinsic::x86_fma_vfnmadd_pd_512:
Opc = X86ISD::FNMADD;
break;
case Intrinsic::x86_fma_vfnmsub_ps:
case Intrinsic::x86_fma_vfnmsub_pd:
case Intrinsic::x86_fma_vfnmsub_ps_256:
case Intrinsic::x86_fma_vfnmsub_pd_256:
+ case Intrinsic::x86_fma_vfnmsub_ps_512:
+ case Intrinsic::x86_fma_vfnmsub_pd_512:
Opc = X86ISD::FNMSUB;
break;
case Intrinsic::x86_fma_vfmaddsub_ps:
case Intrinsic::x86_fma_vfmaddsub_pd:
case Intrinsic::x86_fma_vfmaddsub_ps_256:
case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ case Intrinsic::x86_fma_vfmaddsub_ps_512:
+ case Intrinsic::x86_fma_vfmaddsub_pd_512:
Opc = X86ISD::FMADDSUB;
break;
case Intrinsic::x86_fma_vfmsubadd_ps:
case Intrinsic::x86_fma_vfmsubadd_pd:
case Intrinsic::x86_fma_vfmsubadd_ps_256:
case Intrinsic::x86_fma_vfmsubadd_pd_256:
+ case Intrinsic::x86_fma_vfmsubadd_ps_512:
+ case Intrinsic::x86_fma_vfmsubadd_pd_512:
Opc = X86ISD::FMSUBADD;
break;
}
@@ -11386,7 +11735,87 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
}
}
-static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
+static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Base, SDValue Index,
+ SDValue ScaleOp, SDValue Chain,
+ const X86Subtarget * Subtarget) {
+ SDLoc dl(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ assert(C && "Invalid scale type");
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Index.getValueType().getVectorNumElements());
+ SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
+ SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
+ return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl);
+}
+
+static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain,
+ const X86Subtarget * Subtarget) {
+ SDLoc dl(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ assert(C && "Invalid scale type");
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Index.getValueType().getVectorNumElements());
+ SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
+ SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ if (Src.getOpcode() == ISD::UNDEF)
+ Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
+ SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
+ return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl);
+}
+
+static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Base, SDValue Index,
+ SDValue ScaleOp, SDValue Chain) {
+ SDLoc dl(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ assert(C && "Invalid scale type");
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Index.getValueType().getVectorNumElements());
+ SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
+ SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
+ SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ return SDValue(Res, 1);
+}
+
+static SDValue getMScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain) {
+ SDLoc dl(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ assert(C && "Invalid scale type");
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Index.getValueType().getVectorNumElements());
+ SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
+ SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ return SDValue(Res, 1);
+}
+
+static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc dl(Op);
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntNo) {
@@ -11421,7 +11850,144 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
SDValue(Result.getNode(), 2));
}
-
+ //int_gather(index, base, scale);
+ case Intrinsic::x86_avx512_gather_qpd_512:
+ case Intrinsic::x86_avx512_gather_qps_512:
+ case Intrinsic::x86_avx512_gather_dpd_512:
+ case Intrinsic::x86_avx512_gather_qpi_512:
+ case Intrinsic::x86_avx512_gather_qpq_512:
+ case Intrinsic::x86_avx512_gather_dpq_512:
+ case Intrinsic::x86_avx512_gather_dps_512:
+ case Intrinsic::x86_avx512_gather_dpi_512: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx512_gather_qps_512: Opc = X86::VGATHERQPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpd_512: Opc = X86::VGATHERQPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpd_512: Opc = X86::VGATHERDPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dps_512: Opc = X86::VGATHERDPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpi_512: Opc = X86::VPGATHERQDZrm; break;
+ case Intrinsic::x86_avx512_gather_qpq_512: Opc = X86::VPGATHERQQZrm; break;
+ case Intrinsic::x86_avx512_gather_dpi_512: Opc = X86::VPGATHERDDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpq_512: Opc = X86::VPGATHERDQZrm; break;
+ }
+ SDValue Chain = Op.getOperand(0);
+ SDValue Index = Op.getOperand(2);
+ SDValue Base = Op.getOperand(3);
+ SDValue Scale = Op.getOperand(4);
+ return getGatherNode(Opc, Op, DAG, Base, Index, Scale, Chain, Subtarget);
+ }
+ //int_gather_mask(v1, mask, index, base, scale);
+ case Intrinsic::x86_avx512_gather_qps_mask_512:
+ case Intrinsic::x86_avx512_gather_qpd_mask_512:
+ case Intrinsic::x86_avx512_gather_dpd_mask_512:
+ case Intrinsic::x86_avx512_gather_dps_mask_512:
+ case Intrinsic::x86_avx512_gather_qpi_mask_512:
+ case Intrinsic::x86_avx512_gather_qpq_mask_512:
+ case Intrinsic::x86_avx512_gather_dpi_mask_512:
+ case Intrinsic::x86_avx512_gather_dpq_mask_512: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx512_gather_qps_mask_512:
+ Opc = X86::VGATHERQPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpd_mask_512:
+ Opc = X86::VGATHERQPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpd_mask_512:
+ Opc = X86::VGATHERDPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dps_mask_512:
+ Opc = X86::VGATHERDPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpi_mask_512:
+ Opc = X86::VPGATHERQDZrm; break;
+ case Intrinsic::x86_avx512_gather_qpq_mask_512:
+ Opc = X86::VPGATHERQQZrm; break;
+ case Intrinsic::x86_avx512_gather_dpi_mask_512:
+ Opc = X86::VPGATHERDDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpq_mask_512:
+ Opc = X86::VPGATHERDQZrm; break;
+ }
+ SDValue Chain = Op.getOperand(0);
+ SDValue Src = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ SDValue Index = Op.getOperand(4);
+ SDValue Base = Op.getOperand(5);
+ SDValue Scale = Op.getOperand(6);
+ return getMGatherNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
+ Subtarget);
+ }
+ //int_scatter(base, index, v1, scale);
+ case Intrinsic::x86_avx512_scatter_qpd_512:
+ case Intrinsic::x86_avx512_scatter_qps_512:
+ case Intrinsic::x86_avx512_scatter_dpd_512:
+ case Intrinsic::x86_avx512_scatter_qpi_512:
+ case Intrinsic::x86_avx512_scatter_qpq_512:
+ case Intrinsic::x86_avx512_scatter_dpq_512:
+ case Intrinsic::x86_avx512_scatter_dps_512:
+ case Intrinsic::x86_avx512_scatter_dpi_512: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx512_scatter_qpd_512:
+ Opc = X86::VSCATTERQPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qps_512:
+ Opc = X86::VSCATTERQPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpd_512:
+ Opc = X86::VSCATTERDPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_dps_512:
+ Opc = X86::VSCATTERDPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpi_512:
+ Opc = X86::VPSCATTERQDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpq_512:
+ Opc = X86::VPSCATTERQQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpq_512:
+ Opc = X86::VPSCATTERDQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpi_512:
+ Opc = X86::VPSCATTERDDZmr; break;
+ }
+ SDValue Chain = Op.getOperand(0);
+ SDValue Base = Op.getOperand(2);
+ SDValue Index = Op.getOperand(3);
+ SDValue Src = Op.getOperand(4);
+ SDValue Scale = Op.getOperand(5);
+ return getScatterNode(Opc, Op, DAG, Src, Base, Index, Scale, Chain);
+ }
+ //int_scatter_mask(base, mask, index, v1, scale);
+ case Intrinsic::x86_avx512_scatter_qps_mask_512:
+ case Intrinsic::x86_avx512_scatter_qpd_mask_512:
+ case Intrinsic::x86_avx512_scatter_dpd_mask_512:
+ case Intrinsic::x86_avx512_scatter_dps_mask_512:
+ case Intrinsic::x86_avx512_scatter_qpi_mask_512:
+ case Intrinsic::x86_avx512_scatter_qpq_mask_512:
+ case Intrinsic::x86_avx512_scatter_dpi_mask_512:
+ case Intrinsic::x86_avx512_scatter_dpq_mask_512: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx512_scatter_qpd_mask_512:
+ Opc = X86::VSCATTERQPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qps_mask_512:
+ Opc = X86::VSCATTERQPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpd_mask_512:
+ Opc = X86::VSCATTERDPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_dps_mask_512:
+ Opc = X86::VSCATTERDPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpi_mask_512:
+ Opc = X86::VPSCATTERQDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpq_mask_512:
+ Opc = X86::VPSCATTERQQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpq_mask_512:
+ Opc = X86::VPSCATTERDQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpi_mask_512:
+ Opc = X86::VPSCATTERDDZmr; break;
+ }
+ SDValue Chain = Op.getOperand(0);
+ SDValue Base = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ SDValue Index = Op.getOperand(4);
+ SDValue Src = Op.getOperand(5);
+ SDValue Scale = Op.getOperand(6);
+ return getMScatterNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
+ }
// XTEST intrinsics.
case Intrinsic::x86_xtest: {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
@@ -11913,8 +12479,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
}
- assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
- "Only know how to lower V2I64/V4I64 multiply");
+ assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&
+ "Only know how to lower V2I64/V4I64/V8I64 multiply");
// Ahi = psrlqi(a, 32);
// Bhi = psrlqi(b, 32);
@@ -11927,13 +12493,12 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
// AhiBlo = psllqi(AhiBlo, 32);
// return AloBlo + AloBhi + AhiBlo;
- SDValue ShAmt = DAG.getConstant(32, MVT::i32);
-
- SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt);
- SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B, ShAmt);
+ SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
+ SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
// Bit cast to 32-bit vectors for MULUDQ
- EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : MVT::v8i32;
+ EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
+ (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
@@ -11943,14 +12508,14 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
- AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi, ShAmt);
- AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo, ShAmt);
+ AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
+ AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
}
-SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT EltTy = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
@@ -11972,16 +12537,26 @@ SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
if ((SplatValue != 0) &&
(SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
- unsigned lg2 = SplatValue.countTrailingZeros();
+ unsigned Lg2 = SplatValue.countTrailingZeros();
// Splat the sign bit.
- SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32);
- SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG);
+ SmallVector<SDValue, 16> Sz(NumElts,
+ DAG.getConstant(EltTy.getSizeInBits() - 1,
+ EltTy));
+ SDValue SGN = DAG.getNode(ISD::SRA, dl, VT, N0,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Sz[0],
+ NumElts));
// Add (N0 < 0) ? abs2 - 1 : 0;
- SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32);
- SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG);
+ SmallVector<SDValue, 16> Amt(NumElts,
+ DAG.getConstant(EltTy.getSizeInBits() - Lg2,
+ EltTy));
+ SDValue SRL = DAG.getNode(ISD::SRL, dl, VT, SGN,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Amt[0],
+ NumElts));
SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
- SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32);
- SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG);
+ SmallVector<SDValue, 16> Lg2Amt(NumElts, DAG.getConstant(Lg2, EltTy));
+ SDValue SRA = DAG.getNode(ISD::SRA, dl, VT, ADD,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Lg2Amt[0],
+ NumElts));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
@@ -12010,23 +12585,26 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
(Subtarget->hasInt256() &&
- (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16)) ||
+ (Subtarget->hasAVX512() &&
+ (VT == MVT::v8i64 || VT == MVT::v16i32))) {
if (Op.getOpcode() == ISD::SHL)
- return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
+ DAG);
if (Op.getOpcode() == ISD::SRL)
- return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
+ DAG);
if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
- return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
+ DAG);
}
if (VT == MVT::v16i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
- SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
+ MVT::v8i16, R, ShiftAmt,
+ DAG);
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
// Zero out the rightmost bits.
SmallVector<SDValue, 16> V(16,
@@ -12037,8 +12615,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
- SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v8i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
+ MVT::v8i16, R, ShiftAmt,
+ DAG);
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
// Zero out the leftmost bits.
SmallVector<SDValue, 16> V(16,
@@ -12069,8 +12648,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
if (Subtarget->hasInt256() && VT == MVT::v32i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
- SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
+ MVT::v16i16, R, ShiftAmt,
+ DAG);
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
// Zero out the rightmost bits.
SmallVector<SDValue, 32> V(32,
@@ -12081,8 +12661,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
- SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v16i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
+ MVT::v16i16, R, ShiftAmt,
+ DAG);
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
// Zero out the leftmost bits.
SmallVector<SDValue, 32> V(32,
@@ -12147,14 +12728,14 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
default:
llvm_unreachable("Unknown shift opcode!");
case ISD::SHL:
- return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
+ DAG);
case ISD::SRL:
- return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
+ DAG);
case ISD::SRA:
- return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
+ DAG);
}
}
@@ -12172,7 +12753,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
VT == MVT::v4i32 || VT == MVT::v8i16 ||
(Subtarget->hasInt256() &&
((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
- VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ VT == MVT::v8i32 || VT == MVT::v16i16)) ||
+ (Subtarget->hasAVX512() && (VT == MVT::v8i64 || VT == MVT::v16i32))) {
SDValue BaseShAmt;
EVT EltVT = VT.getVectorElementType();
@@ -12240,6 +12822,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
case MVT::v4i64:
case MVT::v8i32:
case MVT::v16i16:
+ case MVT::v16i32:
+ case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
}
case ISD::SRA:
@@ -12249,6 +12833,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
case MVT::v8i16:
case MVT::v8i32:
case MVT::v16i16:
+ case MVT::v16i32:
+ case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
}
case ISD::SRL:
@@ -12260,6 +12846,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
case MVT::v4i64:
case MVT::v8i32:
case MVT::v16i16:
+ case MVT::v16i32:
+ case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
}
}
@@ -12268,7 +12856,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
if (!Subtarget->is64Bit() &&
- (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64) ||
+ (Subtarget->hasAVX512() && VT == MVT::v8i64)) &&
Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
@@ -12297,7 +12886,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
-SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
+ SelectionDAG &DAG) {
EVT VT = Op.getValueType();
SDLoc dl(Op);
@@ -12316,6 +12906,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
if (V.getNode())
return V;
+ if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64))
+ return Op;
// AVX2 has VPSLLV/VPSRAV/VPSRLV.
if (Subtarget->hasInt256()) {
if (Op.getOpcode() == ISD::SRL &&
@@ -12356,8 +12948,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
// r = VSELECT(r, psllw(r & (char16)15, 4), a);
SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
- M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
- DAG.getConstant(4, MVT::i32), DAG);
+ M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 4, DAG);
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
@@ -12368,8 +12959,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
// r = VSELECT(r, psllw(r & (char16)63, 2), a);
M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
- M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
- DAG.getConstant(2, MVT::i32), DAG);
+ M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 2, DAG);
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
@@ -12512,7 +13102,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
ExtraVT.getScalarType().getSizeInBits();
- SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
switch (VT.getSimpleVT().SimpleTy) {
default: return SDValue();
@@ -12546,24 +13135,34 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
// fall through
case MVT::v4i32:
case MVT::v8i16: {
- // (sext (vzext x)) -> (vsext x)
SDValue Op0 = Op.getOperand(0);
SDValue Op00 = Op0.getOperand(0);
SDValue Tmp1;
// Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
if (Op0.getOpcode() == ISD::BITCAST &&
- Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
- Tmp1 = LowerVectorIntExtend(Op00, DAG);
- if (Tmp1.getNode()) {
- SDValue Tmp1Op0 = Tmp1.getOperand(0);
- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
- "This optimization is invalid without a VZEXT.");
- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ // (sext (vzext x)) -> (vsext x)
+ Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG);
+ if (Tmp1.getNode()) {
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
+ // This folding is only valid when the in-reg type is a vector of i8,
+ // i16, or i32.
+ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
+ ExtraEltVT == MVT::i32) {
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+ "This optimization is invalid without a VZEXT.");
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ }
+ Op0 = Tmp1;
+ }
}
// If the above didn't work, then just use Shift-Left + Shift-Right.
- Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
- return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
+ Tmp1 = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0, BitsDiff,
+ DAG);
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Tmp1, BitsDiff,
+ DAG);
}
}
}
@@ -12655,9 +13254,10 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
}
-SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
- EVT SrcVT = Op.getOperand(0).getValueType();
- EVT DstVT = Op.getValueType();
+static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT SrcVT = Op.getOperand(0).getSimpleValueType();
+ MVT DstVT = Op.getSimpleValueType();
assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
Subtarget->hasMMX() && "Unexpected custom BITCAST");
assert((DstVT == MVT::i64 ||
@@ -12742,7 +13342,8 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
-SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
// For MacOSX, we want to call an alternative entry point: __sincos_stret,
@@ -12753,8 +13354,8 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- ArgListTy Args;
- ArgListEntry Entry;
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
@@ -12767,7 +13368,8 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
// the small struct {f32, f32} is returned in (eax, edx). For f64,
// the results are returned via SRet in memory.
const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret";
- SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy());
Type *RetTy = isF64
? (Type*)StructType::get(ArgTy, ArgTy, NULL)
@@ -12778,7 +13380,7 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
CallingConv::C, /*isTaillCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed*/true,
Callee, Args, DAG, dl);
- std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
if (isF64)
// Returned in xmm0 and xmm1.
@@ -12822,9 +13424,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
- case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
- case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
- case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG);
+ case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
+ case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG);
+ case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
@@ -12840,7 +13442,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::FRAME_TO_ARGS_OFFSET:
@@ -12858,7 +13461,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
case ISD::SRA:
case ISD::SRL:
- case ISD::SHL: return LowerShift(Op, DAG);
+ case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
@@ -12866,7 +13469,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMULO:
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
- case ISD::BITCAST: return LowerBITCAST(Op, DAG);
+ case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
@@ -12874,7 +13477,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
case ISD::SDIV: return LowerSDIV(Op, DAG);
- case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
}
}
@@ -13128,6 +13731,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CMP: return "X86ISD::CMP";
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
+ case X86ISD::CMPM: return "X86ISD::CMPM";
+ case X86ISD::CMPMU: return "X86ISD::CMPMU";
case X86ISD::SETCC: return "X86ISD::SETCC";
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
@@ -13187,6 +13792,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
case X86ISD::VSEXT: return "X86ISD::VSEXT";
+ case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
+ case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM";
+ case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
@@ -13200,6 +13808,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CMPP: return "X86ISD::CMPP";
case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
+ case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM";
+ case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM";
case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";
case X86ISD::ADC: return "X86ISD::ADC";
@@ -13214,9 +13824,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::BLSI: return "X86ISD::BLSI";
case X86ISD::BLSMSK: return "X86ISD::BLSMSK";
case X86ISD::BLSR: return "X86ISD::BLSR";
+ case X86ISD::BZHI: return "X86ISD::BZHI";
+ case X86ISD::BEXTR: return "X86ISD::BEXTR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
+ case X86ISD::TESTM: return "X86ISD::TESTM";
+ case X86ISD::KORTEST: return "X86ISD::KORTEST";
+ case X86ISD::KTEST: return "X86ISD::KTEST";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
@@ -13239,6 +13854,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
+ case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
@@ -13422,37 +14038,46 @@ bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
bool
X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
EVT VT) const {
+ if (!VT.isSimple())
+ return false;
+
+ MVT SVT = VT.getSimpleVT();
+
// Very little shuffling can be done for 64-bit vectors right now.
if (VT.getSizeInBits() == 64)
return false;
// FIXME: pshufb, blends, shifts.
- return (VT.getVectorNumElements() == 2 ||
+ return (SVT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
- isMOVLMask(M, VT) ||
- isSHUFPMask(M, VT, Subtarget->hasFp256()) ||
- isPSHUFDMask(M, VT) ||
- isPSHUFHWMask(M, VT, Subtarget->hasInt256()) ||
- isPSHUFLWMask(M, VT, Subtarget->hasInt256()) ||
- isPALIGNRMask(M, VT, Subtarget) ||
- isUNPCKLMask(M, VT, Subtarget->hasInt256()) ||
- isUNPCKHMask(M, VT, Subtarget->hasInt256()) ||
- isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasInt256()) ||
- isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasInt256()));
+ isMOVLMask(M, SVT) ||
+ isSHUFPMask(M, SVT) ||
+ isPSHUFDMask(M, SVT) ||
+ isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) ||
+ isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) ||
+ isPALIGNRMask(M, SVT, Subtarget) ||
+ isUNPCKLMask(M, SVT, Subtarget->hasInt256()) ||
+ isUNPCKHMask(M, SVT, Subtarget->hasInt256()) ||
+ isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
+ isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()));
}
bool
X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
EVT VT) const {
- unsigned NumElts = VT.getVectorNumElements();
+ if (!VT.isSimple())
+ return false;
+
+ MVT SVT = VT.getSimpleVT();
+ unsigned NumElts = SVT.getVectorNumElements();
// FIXME: This collection of masks seems suspect.
if (NumElts == 2)
return true;
- if (NumElts == 4 && VT.is128BitVector()) {
- return (isMOVLMask(Mask, VT) ||
- isCommutedMOVLMask(Mask, VT, true) ||
- isSHUFPMask(Mask, VT, Subtarget->hasFp256()) ||
- isSHUFPMask(Mask, VT, Subtarget->hasFp256(), /* Commuted */ true));
+ if (NumElts == 4 && SVT.is128BitVector()) {
+ return (isMOVLMask(Mask, SVT) ||
+ isCommutedMOVLMask(Mask, SVT, true) ||
+ isSHUFPMask(Mask, SVT) ||
+ isSHUFPMask(Mask, SVT, /* Commuted */ true));
}
return false;
}
@@ -15194,6 +15819,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::CMOV_V8F32:
case X86::CMOV_V4F64:
case X86::CMOV_V4I64:
+ case X86::CMOV_V16F32:
+ case X86::CMOV_V8F64:
+ case X86::CMOV_V8I64:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
@@ -15642,7 +16270,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
- return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
+ return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
}
/// PerformTruncateCombine - Converts truncate operation to
@@ -15749,6 +16377,44 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
EltNo);
}
+/// Extract one bit from mask vector, like v16i1 or v8i1.
+/// AVX-512 feature.
+static SDValue ExtractBitFromMaskVector(SDNode *N, SelectionDAG &DAG) {
+ SDValue Vec = N->getOperand(0);
+ SDLoc dl(Vec);
+ MVT VecVT = Vec.getSimpleValueType();
+ SDValue Idx = N->getOperand(1);
+ MVT EltVT = N->getSimpleValueType(0);
+
+ assert((VecVT.getVectorElementType() == MVT::i1 && EltVT == MVT::i8) ||
+ "Unexpected operands in ExtractBitFromMaskVector");
+
+ // variable index
+ if (!isa<ConstantSDNode>(Idx)) {
+ MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
+ SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec);
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ ExtVT.getVectorElementType(), Ext);
+ return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
+ }
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ MVT ScalarVT = MVT::getIntegerVT(VecVT.getSizeInBits());
+ unsigned MaxShift = VecVT.getSizeInBits() - 1;
+ Vec = DAG.getNode(ISD::BITCAST, dl, ScalarVT, Vec);
+ Vec = DAG.getNode(ISD::SHL, dl, ScalarVT, Vec,
+ DAG.getConstant(MaxShift - IdxVal, ScalarVT));
+ Vec = DAG.getNode(ISD::SRL, dl, ScalarVT, Vec,
+ DAG.getConstant(MaxShift, ScalarVT));
+
+ if (VecVT == MVT::v16i1) {
+ Vec = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Vec);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Vec);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i8, Vec);
+}
+
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
/// to a simple store and scalar loads to extract the elements.
@@ -15759,6 +16425,11 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return NewOp;
SDValue InputVector = N->getOperand(0);
+
+ if (InputVector.getValueType().getVectorElementType() == MVT::i1 &&
+ !DCI.isBeforeLegalize())
+ return ExtractBitFromMaskVector(N, DAG);
+
// Detect whether we are trying to convert from mmx to i32 and the bitcast
// from mmx to v2i32 has a single usage.
if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
@@ -15846,24 +16517,28 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
}
/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
-static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
- SDValue RHS, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+static std::pair<unsigned, bool>
+matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, const X86Subtarget *Subtarget) {
if (!VT.isVector())
- return 0;
+ return std::make_pair(0, false);
+ bool NeedSplit = false;
switch (VT.getSimpleVT().SimpleTy) {
- default: return 0;
+ default: return std::make_pair(0, false);
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
if (!Subtarget->hasAVX2())
- return 0;
+ NeedSplit = true;
+ if (!Subtarget->hasAVX())
+ return std::make_pair(0, false);
+ break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
if (!Subtarget->hasSSE2())
- return 0;
+ return std::make_pair(0, false);
}
// SSE2 has only a small subset of the operations.
@@ -15874,6 +16549,7 @@ static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ unsigned Opc = 0;
// Check for x CC y ? x : y.
if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
@@ -15881,16 +16557,16 @@ static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
default: break;
case ISD::SETULT:
case ISD::SETULE:
- return hasUnsigned ? X86ISD::UMIN : 0;
+ Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
- return hasUnsigned ? X86ISD::UMAX : 0;
+ Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
case ISD::SETLT:
case ISD::SETLE:
- return hasSigned ? X86ISD::SMIN : 0;
+ Opc = hasSigned ? X86ISD::SMIN : 0; break;
case ISD::SETGT:
case ISD::SETGE:
- return hasSigned ? X86ISD::SMAX : 0;
+ Opc = hasSigned ? X86ISD::SMAX : 0; break;
}
// Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
@@ -15899,20 +16575,20 @@ static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
default: break;
case ISD::SETULT:
case ISD::SETULE:
- return hasUnsigned ? X86ISD::UMAX : 0;
+ Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
- return hasUnsigned ? X86ISD::UMIN : 0;
+ Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
case ISD::SETLT:
case ISD::SETLE:
- return hasSigned ? X86ISD::SMAX : 0;
+ Opc = hasSigned ? X86ISD::SMAX : 0; break;
case ISD::SETGT:
case ISD::SETGE:
- return hasSigned ? X86ISD::SMIN : 0;
+ Opc = hasSigned ? X86ISD::SMIN : 0; break;
}
}
- return 0;
+ return std::make_pair(Opc, NeedSplit);
}
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
@@ -15926,13 +16602,14 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
EVT VT = LHS.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
// instructions match the semantics of the common C idiom x<y?x:y but not
// x<=y?x:y, because of how they handle negative zero (which can be
// ignored in unsafe-math mode).
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
- VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ VT != MVT::f80 && TLI.isTypeLegal(VT) &&
(Subtarget->hasSSE2() ||
(Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
@@ -16071,6 +16748,22 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
}
+ EVT CondVT = Cond.getValueType();
+ if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
+ CondVT.getVectorElementType() == MVT::i1) {
+ // v16i8 (select v16i1, v16i8, v16i8) does not have a proper
+ // lowering on AVX-512. In this case we convert it to
+ // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
+ // The same situation for all 128 and 256-bit vectors of i8 and i16
+ EVT OpVT = LHS.getValueType();
+ if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
+ (OpVT.getVectorElementType() == MVT::i8 ||
+ OpVT.getVectorElementType() == MVT::i16)) {
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
+ DCI.AddToWorklist(Cond.getNode());
+ return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
+ }
+ }
// If this is a select between two integer constants, try to do some
// optimizations.
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
@@ -16195,9 +16888,12 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ // Early exit check
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
// Match VSELECTs into subs with unsigned saturation.
- if (!DCI.isBeforeLegalize() &&
- N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
// psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
(Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
@@ -16251,14 +16947,35 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
}
// Try to match a min/max vector operation.
- if (!DCI.isBeforeLegalize() &&
- N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC)
- if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
- return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) {
+ std::pair<unsigned, bool> ret = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget);
+ unsigned Opc = ret.first;
+ bool NeedSplit = ret.second;
+
+ if (Opc && NeedSplit) {
+ unsigned NumElems = VT.getVectorNumElements();
+ // Extract the LHS vectors
+ SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, DL);
+ SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, DL);
+
+ // Extract the RHS vectors
+ SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, DL);
+ SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, DL);
+
+ // Create min/max for each subvector
+ LHS = DAG.getNode(Opc, DL, LHS1.getValueType(), LHS1, RHS1);
+ RHS = DAG.getNode(Opc, DL, LHS2.getValueType(), LHS2, RHS2);
+
+ // Merge the result
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS, RHS);
+ } else if (Opc)
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ }
// Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
- if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT &&
- Cond.getOpcode() == ISD::SETCC) {
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+ // Check if SETCC has already been promoted
+ TLI.getSetCCResultType(*DAG.getContext(), VT) == Cond.getValueType()) {
assert(Cond.getValueType().isVector() &&
"vector select expects a vector selector!");
@@ -16305,7 +17022,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// matched by one of the SSE/AVX BLEND instructions. These instructions only
// depend on the highest bit in each word. Try to use SimplifyDemandedBits
// to simplify previous instructions.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() && TLI.isOperationLegal(ISD::VSELECT, VT)) {
unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
@@ -16314,6 +17030,15 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (BitWidth == 1)
return SDValue();
+ // Check all uses of that condition operand to check whether it will be
+ // consumed by non-BLEND instructions, which may depend on all bits are set
+ // properly.
+ for (SDNode::use_iterator I = Cond->use_begin(),
+ E = Cond->use_end(); I != E; ++I)
+ if (I->getOpcode() != ISD::VSELECT)
+ // TODO: Add other opcodes eventually lowered into BLEND.
+ return SDValue();
+
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
@@ -16990,33 +17715,80 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (R.getNode())
return R;
- // Create BLSI, and BLSR instructions
+ // Create BLSI, BLSR, and BZHI instructions
// BLSI is X & (-X)
// BLSR is X & (X-1)
- if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
+ // BZHI is X & ((1 << Y) - 1)
+ // BEXTR is ((X >> imm) & (2**size-1))
+ if (VT == MVT::i32 || VT == MVT::i64) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
- // Check LHS for neg
- if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
- isZero(N0.getOperand(0)))
- return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
-
- // Check RHS for neg
- if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
- isZero(N1.getOperand(0)))
- return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
+ if (Subtarget->hasBMI()) {
+ // Check LHS for neg
+ if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
+ isZero(N0.getOperand(0)))
+ return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
+
+ // Check RHS for neg
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
+ isZero(N1.getOperand(0)))
+ return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
+
+ // Check LHS for X-1
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
+ isAllOnes(N0.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
+
+ // Check RHS for X-1
+ if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
+ isAllOnes(N1.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
+ }
+
+ if (Subtarget->hasBMI2()) {
+ // Check for (and (add (shl 1, Y), -1), X)
+ if (N0.getOpcode() == ISD::ADD && isAllOnes(N0.getOperand(1))) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::SHL) {
+ SDValue N001 = N00.getOperand(1);
+ assert(N001.getValueType() == MVT::i8 && "unexpected type");
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N00.getOperand(0));
+ if (C && C->getZExtValue() == 1)
+ return DAG.getNode(X86ISD::BZHI, DL, VT, N1, N001);
+ }
+ }
- // Check LHS for X-1
- if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
- isAllOnes(N0.getOperand(1)))
- return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
+ // Check for (and X, (add (shl 1, Y), -1))
+ if (N1.getOpcode() == ISD::ADD && isAllOnes(N1.getOperand(1))) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::SHL) {
+ SDValue N101 = N10.getOperand(1);
+ assert(N101.getValueType() == MVT::i8 && "unexpected type");
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N10.getOperand(0));
+ if (C && C->getZExtValue() == 1)
+ return DAG.getNode(X86ISD::BZHI, DL, VT, N0, N101);
+ }
+ }
+ }
- // Check RHS for X-1
- if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
- isAllOnes(N1.getOperand(1)))
- return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
+ // Check for BEXTR.
+ if ((Subtarget->hasBMI() || Subtarget->hasTBM()) &&
+ (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) {
+ ConstantSDNode *MaskNode = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (MaskNode && ShiftNode) {
+ uint64_t Mask = MaskNode->getZExtValue();
+ uint64_t Shift = ShiftNode->getZExtValue();
+ if (isMask_64(Mask)) {
+ uint64_t MaskSize = CountPopulation_64(Mask);
+ if (Shift + MaskSize <= VT.getSizeInBits())
+ return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Shift | (MaskSize << 8), VT));
+ }
+ }
+ } // BEXTR
return SDValue();
}
@@ -17732,7 +18504,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
return false;
- MVT VT = LHS.getValueType().getSimpleVT();
+ MVT VT = LHS.getSimpleValueType();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for horizontal add/sub");
@@ -18205,7 +18977,7 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!XTLI->getSubtarget()->is64Bit() &&
- !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ VT == MVT::i64) {
SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
@@ -18531,6 +19303,22 @@ namespace {
const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={};
}
+static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
+
+ if (AsmPieces.size() == 3 || AsmPieces.size() == 4) {
+ if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") &&
+ std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") &&
+ std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) {
+
+ if (AsmPieces.size() == 3)
+ return true;
+ else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}"))
+ return true;
+ }
+ }
+ return false;
+}
+
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
@@ -18572,12 +19360,8 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
- if (AsmPieces.size() == 4 &&
- AsmPieces[0] == "~{cc}" &&
- AsmPieces[1] == "~{dirflag}" &&
- AsmPieces[2] == "~{flags}" &&
- AsmPieces[3] == "~{fpsr}")
- return IntrinsicLowering::LowerToByteSwap(CI);
+ if (clobbersFlagRegisters(AsmPieces))
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
break;
case 3:
@@ -18590,11 +19374,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
- if (AsmPieces.size() == 4 &&
- AsmPieces[0] == "~{cc}" &&
- AsmPieces[1] == "~{dirflag}" &&
- AsmPieces[2] == "~{flags}" &&
- AsmPieces[3] == "~{fpsr}")
+ if (clobbersFlagRegisters(AsmPieces))
return IntrinsicLowering::LowerToByteSwap(CI);
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 2703274..bc3dd60 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -53,7 +53,7 @@ namespace llvm {
/// to X86::XORPS or X86::XORPD.
FXOR,
- /// FAND - Bitwise logical ANDNOT of floating point values. This
+ /// FANDN - Bitwise logical ANDNOT of floating point values. This
/// corresponds to X86::ANDNPS or X86::ANDNPD.
FANDN,
@@ -254,6 +254,12 @@ namespace llvm {
// VSEXT - Vector integer signed-extend.
VSEXT,
+ // VTRUNC - Vector integer truncate.
+ VTRUNC,
+
+ // VTRUNC - Vector integer truncate with mask.
+ VTRUNCM,
+
// VFPEXT - Vector FP extend.
VFPEXT,
@@ -274,6 +280,13 @@ namespace llvm {
// PCMP* - Vector integer comparisons.
PCMPEQ, PCMPGT,
+ // PCMP*M - Vector integer comparisons, the result is in a mask vector.
+ PCMPEQM, PCMPGTM,
+
+ /// CMPM, CMPMU - Vector comparison generating mask bits for fp and
+ /// integer signed and unsigned data types.
+ CMPM,
+ CMPMU,
// ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL,
@@ -282,18 +295,23 @@ namespace llvm {
BLSI, // BLSI - Extract lowest set isolated bit
BLSMSK, // BLSMSK - Get mask up to lowest set bit
BLSR, // BLSR - Reset lowest set bit
+ BZHI, // BZHI - Zero high bits
+ BEXTR, // BEXTR - Bit field extract
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
// MUL_IMM - X86 specific multiply by immediate.
MUL_IMM,
- // PTEST - Vector bitwise comparisons
+ // PTEST - Vector bitwise comparisons.
PTEST,
- // TESTP - Vector packed fp sign bitwise comparisons
+ // TESTP - Vector packed fp sign bitwise comparisons.
TESTP,
+ // TESTM - Vector "test" in AVX-512, the result is in a mask vector.
+ TESTM,
+
// OR/AND test for masks
KORTEST,
KTEST,
@@ -318,11 +336,13 @@ namespace llvm {
UNPCKH,
VPERMILP,
VPERMV,
+ VPERMV3,
VPERMI,
VPERM2X128,
VBROADCAST,
// masked broadcast
VBROADCASTM,
+ VINSERT,
// PMULUDQ - Vector multiply packed unsigned doubleword integers
PMULUDQ,
@@ -755,6 +775,8 @@ namespace llvm {
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
+ virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const LLVM_OVERRIDE;
+
/// \brief Reset the operation actions based on target options.
virtual void resetOperationActions();
@@ -831,8 +853,6 @@ namespace llvm {
bool isSigned,
bool isReplace) const;
- SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl,
- SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
@@ -846,18 +866,12 @@ namespace llvm {
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerZERO_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
@@ -881,19 +895,7 @@ namespace llvm {
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
-
- // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
- SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
- SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
- SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
@@ -925,6 +927,8 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const;
+ virtual const uint16_t *getScratchRegisters(CallingConv::ID CC) const;
+
/// Utility function to emit atomic-load-arith operations (and, or, xor,
/// nand, max, min, umax, umin). It takes the corresponding instruction to
/// expand, the associated machine basic block, and the associated X86
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 8abae14..cb19fbd 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -80,6 +80,21 @@ let Predicates = [HasAVX512] in {
def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
}
+//
+// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
+//
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, Predicates = [HasAVX512] in {
+def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
+ [(set VR512:$dst, (v16f32 immAllZerosV))]>;
+}
+
+def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
@@ -376,6 +391,11 @@ def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
(VBROADCASTSDZrm addr:$src)>;
+def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
+ (VBROADCASTSSZrm addr:$src)>;
+def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
+ (VBROADCASTSDZrm addr:$src)>;
+
multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
RegisterClass SrcRC, RegisterClass KRC> {
def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
@@ -402,6 +422,13 @@ def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
(VPBROADCASTDrZrr GR32:$src)>;
def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
(VPBROADCASTQrZrr GR64:$src)>;
+def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
+ (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
+
+def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
+ (VPBROADCASTDrZrr GR32:$src)>;
+def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
+ (VPBROADCASTQrZrr GR64:$src)>;
multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, PatFrag ld_frag,
@@ -414,10 +441,11 @@ multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
VR128X:$src),
!strconcat(OpcodeStr,
- "\t{$src, ${dst}{${mask}}{z}|${dst}{${mask}}{z}, $src}"),
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[(set DstRC:$dst,
(OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
EVEX, EVEX_KZ;
+ let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
@@ -425,9 +453,10 @@ multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
x86memop:$src),
!strconcat(OpcodeStr,
- "\t{$src, ${dst}{${mask}}{z}|${dst}{${mask}}{z}, $src}"),
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
(ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
+ }
}
defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
@@ -437,10 +466,20 @@ defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VT1>;
+def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
+ (VPBROADCASTDZrr VR128X:$src)>;
+def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
+ (VPBROADCASTQZrr VR128X:$src)>;
+
def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
(VBROADCASTSSZrr VR128X:$src)>;
def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
(VBROADCASTSDZrr VR128X:$src)>;
+
+def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
+ (VBROADCASTSSZrr VR128X:$src)>;
+def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
+ (VBROADCASTSDZrr VR128X:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
@@ -473,6 +512,306 @@ defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
+//===----------------------------------------------------------------------===//
+// AVX-512 - VPERM
+//
+// -- immediate form --
+multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ SDNode OpNode, PatFrag mem_frag,
+ X86MemOperand x86memop, ValueType OpVT> {
+ def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
+ EVEX;
+ def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (OpVT (OpNode (mem_frag addr:$src1),
+ (i8 imm:$src2))))]>, EVEX;
+}
+
+defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
+ i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+let ExeDomain = SSEPackedDouble in
+defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
+ f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+// -- VPERM - register form --
+multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
+
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
+
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
+ EVEX_4V;
+}
+
+defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
+ v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
+ v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+let ExeDomain = SSEPackedSingle in
+defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
+ v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+let ExeDomain = SSEPackedDouble in
+defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
+ v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+// -- VPERM2I - 3 source operands form --
+multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ PatFrag mem_frag, X86MemOperand x86memop,
+ ValueType OpVT> {
+let Constraints = "$src1 = $dst" in {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (X86VPermv3 RC:$src1, RC:$src2, RC:$src3)))]>,
+ EVEX_4V;
+
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (X86VPermv3 RC:$src1, RC:$src2,
+ (mem_frag addr:$src3))))]>, EVEX_4V;
+ }
+}
+defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem,
+ v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem,
+ v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512mem,
+ v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem,
+ v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - BLEND using mask
+//
+multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, Intrinsic Int,
+ RegisterClass KRC, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ SDNode OpNode, ValueType vt> {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
+ [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
+ (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
+ def rr_Int : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
+ [(set RC:$dst, (Int KRC:$mask, (vt RC:$src2),
+ (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
+
+ let mayLoad = 1 in {
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $mask, $dst|$dst, $mask, $src1, $src2}"),
+ []>,
+ EVEX_4V, EVEX_K;
+
+ def rm_Int : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $mask, $dst|$dst, $mask, $src1, $src2}"),
+ [(set RC:$dst, (Int KRC:$mask, (vt RC:$src1),
+ (mem_frag addr:$src2)))]>,
+ EVEX_4V, EVEX_K;
+ }
+}
+
+let ExeDomain = SSEPackedSingle in
+defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
+ int_x86_avx512_mskblend_ps_512,
+ VK16WM, VR512, f512mem,
+ memopv16f32, vselect, v16f32>,
+ EVEX_CD8<32, CD8VF>, EVEX_V512;
+let ExeDomain = SSEPackedDouble in
+defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
+ int_x86_avx512_mskblend_pd_512,
+ VK8WM, VR512, f512mem,
+ memopv8f64, vselect, v8f64>,
+ VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
+
+defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
+ int_x86_avx512_mskblend_d_512,
+ VK16WM, VR512, f512mem,
+ memopv16i32, vselect, v16i32>,
+ EVEX_CD8<32, CD8VF>, EVEX_V512;
+
+defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
+ int_x86_avx512_mskblend_q_512,
+ VK8WM, VR512, f512mem,
+ memopv8i64, vselect, v8i64>,
+ VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
+
+let Predicates = [HasAVX512] in {
+def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
+ (v8f32 VR256X:$src2))),
+ (EXTRACT_SUBREG
+ (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
+ (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+ (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
+
+def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
+ (v8i32 VR256X:$src2))),
+ (EXTRACT_SUBREG
+ (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
+}
+
+multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
+ SDNode OpNode, ValueType vt> {
+ def rr : AVX512BI<opc, MRMSrcReg,
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
+ IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ def rm : AVX512BI<opc, MRMSrcMem,
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2)))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+}
+
+defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem,
+ memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512;
+defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem,
+ memopv8i64, X86pcmpeqm, v8i64>, T8, EVEX_V512, VEX_W;
+
+defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem,
+ memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512;
+defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem,
+ memopv8i64, X86pcmpgtm, v8i64>, T8, EVEX_V512, VEX_W;
+
+def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
+ (COPY_TO_REGCLASS (VPCMPGTDZrr
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
+
+def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
+ (COPY_TO_REGCLASS (VPCMPEQDZrr
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
+
+multiclass avx512_icmp_cc<bits<8> opc, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
+ SDNode OpNode, ValueType vt, Operand CC, string asm,
+ string asm_alt> {
+ def rri : AVX512AIi8<opc, MRMSrcReg,
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))],
+ IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ def rmi : AVX512AIi8<opc, MRMSrcMem,
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2),
+ imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ // Accept explicit immediate argument form instead of comparison code.
+ let neverHasSideEffects = 1 in {
+ def rri_alt : AVX512AIi8<opc, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+ asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+ asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ }
+}
+
+defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv16i32,
+ X86cmpm, v16i32, AVXCC,
+ "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv16i32,
+ X86cmpmu, v16i32, AVXCC,
+ "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64,
+ X86cmpm, v8i64, AVXCC,
+ "vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64,
+ X86cmpmu, v8i64, AVXCC,
+ "vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+// avx512_cmp_packed - sse 1 & 2 compare packed instructions
+multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
+ X86MemOperand x86memop, Operand CC,
+ SDNode OpNode, ValueType vt, string asm,
+ string asm_alt, Domain d> {
+ def rri : AVX512PIi8<0xC2, MRMSrcReg,
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
+ def rmi : AVX512PIi8<0xC2, MRMSrcMem,
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ [(set KRC:$dst,
+ (OpNode (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+ let neverHasSideEffects = 1 in {
+ def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+ asm_alt, [], d>;
+ def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+ asm_alt, [], d>;
+ }
+}
+
+defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, AVXCC, X86cmpm, v16f32,
+ "vcmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vcmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedSingle>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, AVXCC, X86cmpm, v8f64,
+ "vcmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vcmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedDouble>, OpSize, EVEX_4V, VEX_W, EVEX_V512,
+ EVEX_CD8<64, CD8VF>;
+
+def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
+ (COPY_TO_REGCLASS (VCMPPSZrri
+ (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
+ (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+ imm:$cc), VK8)>;
+def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
+ (COPY_TO_REGCLASS (VPCMPDZrri
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+ imm:$cc), VK8)>;
+def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
+ (COPY_TO_REGCLASS (VPCMPUDZrri
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+ imm:$cc), VK8)>;
+
// Mask register copy, including
// - copy between mask registers
// - load/store mask registers
@@ -713,3 +1052,2475 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
(v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - Aligned and unaligned load and store
+//
+
+multiclass avx512_mov_packed<bits<8> opc, RegisterClass RC, RegisterClass KRC,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+let neverHasSideEffects = 1 in
+ def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>,
+ EVEX;
+let canFoldAsLoad = 1 in
+ def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (ld_frag addr:$src))], d>, EVEX;
+let Constraints = "$src1 = $dst" in {
+ def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2),
+ !strconcat(asm,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
+ EVEX, EVEX_K;
+ def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, x86memop:$src2),
+ !strconcat(asm,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ [], d>, EVEX, EVEX_K;
+}
+}
+
+defm VMOVAPSZ : avx512_mov_packed<0x28, VR512, VK16WM, f512mem, alignedloadv16f32,
+ "vmovaps", SSEPackedSingle>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VMOVAPDZ : avx512_mov_packed<0x28, VR512, VK8WM, f512mem, alignedloadv8f64,
+ "vmovapd", SSEPackedDouble>,
+ OpSize, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32,
+ "vmovups", SSEPackedSingle>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64,
+ "vmovupd", SSEPackedDouble>,
+ OpSize, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
+ "vmovaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore512 (v16f32 VR512:$src), addr:$dst)],
+ SSEPackedSingle>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+def VMOVAPDZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
+ "vmovapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore512 (v8f64 VR512:$src), addr:$dst)],
+ SSEPackedDouble>, EVEX, EVEX_V512,
+ OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+def VMOVUPSZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
+ "vmovups\t{$src, $dst|$dst, $src}",
+ [(store (v16f32 VR512:$src), addr:$dst)],
+ SSEPackedSingle>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
+ "vmovupd\t{$src, $dst|$dst, $src}",
+ [(store (v8f64 VR512:$src), addr:$dst)],
+ SSEPackedDouble>, EVEX, EVEX_V512,
+ OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+
+let neverHasSideEffects = 1 in {
+ def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst),
+ (ins VR512:$src),
+ "vmovdqa32\t{$src, $dst|$dst, $src}", []>,
+ EVEX, EVEX_V512;
+ def VMOVDQA64rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst),
+ (ins VR512:$src),
+ "vmovdqa64\t{$src, $dst|$dst, $src}", []>,
+ EVEX, EVEX_V512, VEX_W;
+let mayStore = 1 in {
+ def VMOVDQA32mr : AVX512BI<0x7F, MRMDestMem, (outs),
+ (ins i512mem:$dst, VR512:$src),
+ "vmovdqa32\t{$src, $dst|$dst, $src}", []>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ def VMOVDQA64mr : AVX512BI<0x7F, MRMDestMem, (outs),
+ (ins i512mem:$dst, VR512:$src),
+ "vmovdqa64\t{$src, $dst|$dst, $src}", []>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+let mayLoad = 1 in {
+def VMOVDQA32rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst),
+ (ins i512mem:$src),
+ "vmovdqa32\t{$src, $dst|$dst, $src}", []>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst),
+ (ins i512mem:$src),
+ "vmovdqa64\t{$src, $dst|$dst, $src}", []>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+}
+
+// 512-bit aligned load/store
+def : Pat<(alignedloadv8i64 addr:$src), (VMOVDQA64rm addr:$src)>;
+def : Pat<(alignedloadv16i32 addr:$src), (VMOVDQA32rm addr:$src)>;
+
+def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst),
+ (VMOVDQA64mr addr:$dst, VR512:$src)>;
+def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst),
+ (VMOVDQA32mr addr:$dst, VR512:$src)>;
+
+multiclass avx512_mov_int<bits<8> load_opc, bits<8> store_opc, string asm,
+ RegisterClass RC, RegisterClass KRC,
+ PatFrag ld_frag, X86MemOperand x86memop> {
+let neverHasSideEffects = 1 in
+ def rr : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX;
+let canFoldAsLoad = 1 in
+ def rm : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (ld_frag addr:$src))]>, EVEX;
+let mayStore = 1 in
+ def mr : AVX512XSI<store_opc, MRMDestMem, (outs),
+ (ins x86memop:$dst, VR512:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX;
+let Constraints = "$src1 = $dst" in {
+ def rrk : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2),
+ !strconcat(asm,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), []>,
+ EVEX, EVEX_K;
+ def rmk : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, x86memop:$src2),
+ !strconcat(asm,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ []>, EVEX, EVEX_K;
+}
+}
+
+defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM,
+ memopv16i32, i512mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VMOVDQU64 : avx512_mov_int<0x6F, 0x7F, "vmovdqu64", VR512, VK8WM,
+ memopv8i64, i512mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+// 512-bit unaligned load/store
+def : Pat<(loadv8i64 addr:$src), (VMOVDQU64rm addr:$src)>;
+def : Pat<(loadv16i32 addr:$src), (VMOVDQU32rm addr:$src)>;
+
+def : Pat<(store (v8i64 VR512:$src), addr:$dst),
+ (VMOVDQU64mr addr:$dst, VR512:$src)>;
+def : Pat<(store (v16i32 VR512:$src), addr:$dst),
+ (VMOVDQU32mr addr:$dst, VR512:$src)>;
+
+let AddedComplexity = 20 in {
+def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
+ (v16f32 VR512:$src2))),
+ (VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
+def : Pat<(v8f64 (vselect VK8WM:$mask, (v8f64 VR512:$src1),
+ (v8f64 VR512:$src2))),
+ (VMOVUPDZrrk VR512:$src2, VK8WM:$mask, VR512:$src1)>;
+def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src1),
+ (v16i32 VR512:$src2))),
+ (VMOVDQU32rrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
+def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1),
+ (v8i64 VR512:$src2))),
+ (VMOVDQU64rrk VR512:$src2, VK8WM:$mask, VR512:$src1)>;
+}
+// Move Int Doubleword to Packed Double Int
+//
+def VMOVDI2PDIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+ EVEX, VEX_LIG;
+def VMOVDI2PDIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+def VMOV64toPQIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
+let isCodeGenOnly = 1 in {
+def VMOV64toSDZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
+def VMOVSDto64Zrr : AVX512SI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
+}
+def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
+ EVEX_CD8<64, CD8VT1>;
+
+// Move Int Doubleword to Single Scalar
+//
+let isCodeGenOnly = 1 in {
+def VMOVDI2SSZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(set FR32X:$dst, (bitconvert GR32:$src))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
+
+def VMOVDI2SSZrm : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+}
+
+// Move Packed Doubleword Int to Packed Double Int
+//
+def VMOVPDI2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
+ EVEX, VEX_LIG;
+def VMOVPDI2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128X:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128X:$src),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
+ EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+
+// Move Packed Doubleword Int first element to Doubleword Int
+//
+def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
+ (iPTR 0)))],
+ IIC_SSE_MOVD_ToGP>, TB, OpSize, EVEX, VEX_LIG, VEX_W,
+ Requires<[HasAVX512, In64BitMode]>;
+
+def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128X:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
+ addr:$dst)], IIC_SSE_MOVDQ>,
+ EVEX, OpSize, VEX_LIG, VEX_W, TB, EVEX_CD8<64, CD8VT1>,
+ Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
+
+// Move Scalar Single to Double Int
+//
+let isCodeGenOnly = 1 in {
+def VMOVSS2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst),
+ (ins FR32X:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32X:$src))],
+ IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
+def VMOVSS2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, FR32X:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+}
+
+// Move Quadword Int to Packed Quadword Int
+//
+def VMOVQI2PQIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i64mem:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
+ EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 MOVSS, MOVSD
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_move_scalar <string asm, RegisterClass RC,
+ SDNode OpNode, ValueType vt,
+ X86MemOperand x86memop, PatFrag mem_pat> {
+ def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
+ (scalar_to_vector RC:$src2))))],
+ IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
+ def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
+ EVEX, VEX_LIG;
+ def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ EVEX, VEX_LIG;
+}
+
+let ExeDomain = SSEPackedSingle in
+defm VMOVSSZ : avx512_move_scalar<"movss{z}", FR32X, X86Movss, v4f32, f32mem,
+ loadf32>, XS, EVEX_CD8<32, CD8VT1>;
+
+let ExeDomain = SSEPackedDouble in
+defm VMOVSDZ : avx512_move_scalar<"movsd{z}", FR64X, X86Movsd, v2f64, f64mem,
+ loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+
+
+// For the disassembler
+let isCodeGenOnly = 1 in {
+ def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, FR32X:$src2),
+ "movss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+ IIC_SSE_MOV_S_RR>,
+ XS, EVEX_4V, VEX_LIG;
+ def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, FR64X:$src2),
+ "movsd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+ IIC_SSE_MOV_S_RR>,
+ XD, EVEX_4V, VEX_LIG, VEX_W;
+}
+
+let Predicates = [HasAVX512] in {
+ let AddedComplexity = 15 in {
+ // Move scalar to XMM zero-extended, zeroing a VR128X then do a
+ // MOVS{S,D} to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
+ (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
+ (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
+ (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
+ (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
+
+ // Move low f32 and clear high bits.
+ def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSZrr (v4f32 (V_SET0)),
+ (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
+ def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSZrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSSrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
+ def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
+
+ // MOVSDrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
+ def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
+ def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
+ def : Pat<(v2f64 (X86vzload addr:$src)),
+ (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
+
+ // Represent the same patterns above but in the form they appear for
+ // 256-bit types
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
+ }
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
+ FR32X:$src)), sub_xmm)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
+ FR64X:$src)), sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
+
+ // Move low f64 and clear high bits.
+ def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDZrr (v2f64 (V_SET0)),
+ (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
+
+ def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
+ (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
+ (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
+
+ // Extract and store.
+ def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
+ addr:$dst),
+ (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
+ def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
+ addr:$dst),
+ (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
+
+ // Shuffle with VMOVSS
+ def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
+ (VMOVSSZrr (v4i32 VR128X:$src1),
+ (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
+ def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
+ (VMOVSSZrr (v4f32 VR128X:$src1),
+ (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
+
+ // 256-bit variants
+ def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
+ sub_xmm)>;
+ def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
+ sub_xmm)>;
+
+ // Shuffle with VMOVSD
+ def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+ def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+ def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+ def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+
+ // 256-bit variants
+ def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
+ sub_xmm)>;
+ def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
+ sub_xmm)>;
+
+ def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+ def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+ def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+ def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+}
+
+let AddedComplexity = 15 in
+def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v2i64 (X86vzmovl
+ (v2i64 VR128X:$src))))],
+ IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
+
+let AddedComplexity = 20 in
+def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i128mem:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_W,
+ EVEX_CD8<8, CD8VT8>;
+
+let Predicates = [HasAVX512] in {
+ // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
+ let AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ (VMOVDI2PDIZrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
+ (VMOV64toPQIZrr GR64:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
+ (VMOVDI2PDIZrr GR32:$src)>;
+
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (VMOVDI2PDIZrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (VMOVDI2PDIZrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (VMOVZPQILo2PQIZrm addr:$src)>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
+ (VMOVZPQILo2PQIZrr VR128X:$src)>;
+ }
+
+ // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
+}
+
+def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
+
+def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
+
+def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
+
+def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - Integer arithmetic
+//
+multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, PatFrag scalar_mfrag,
+ X86MemOperand x86scalar_mop, string BrdcstStr,
+ OpndItins itins, bit IsCommutable = 0> {
+ let isCommutable = IsCommutable in
+ def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
+ itins.rr>, EVEX_4V;
+ def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
+ itins.rm>, EVEX_4V;
+ def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86scalar_mop:$src2),
+ !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
+ ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
+ [(set RC:$dst, (OpNode RC:$src1,
+ (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
+ itins.rm>, EVEX_4V, EVEX_B;
+}
+multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
+ ValueType DstVT, ValueType SrcVT, RegisterClass RC,
+ PatFrag memop_frag, X86MemOperand x86memop,
+ OpndItins itins,
+ bit IsCommutable = 0> {
+ let isCommutable = IsCommutable in
+ def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V, VEX_W;
+ def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V, VEX_W;
+}
+
+defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 0>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 1>,
+ EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
+
+defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32,
+ VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8,
+ EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32,
+ VR512, memopv8i64, i512mem, SSE_INTMUL_ITINS_P, 1>, EVEX_V512,
+ EVEX_CD8<64, CD8VF>;
+
+def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
+ (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
+
+defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
+ PatFrag mem_frag, RegisterClass RC,
+ X86MemOperand x86memop, string asm,
+ Domain d> {
+ def rr : AVX512PI<opc, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1, RC:$src2)))],
+ d>, EVEX_4V;
+ def rm : AVX512PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1,
+ (bitconvert (mem_frag addr:$src2)))))],
+ d>, EVEX_4V;
+}
+
+defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
+ VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
+ VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
+ VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
+ VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop> {
+ def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
+ IIC_SSE_UNPCK>, EVEX_4V;
+ def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
+ (bitconvert (memop_frag addr:$src2)))))],
+ IIC_SSE_UNPCK>, EVEX_4V;
+}
+defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
+ VR512, memopv16i32, i512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
+ VR512, memopv8i64, i512mem>, EVEX_V512,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
+ VR512, memopv16i32, i512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
+ VR512, memopv8i64, i512mem>, EVEX_V512,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+//===----------------------------------------------------------------------===//
+// AVX-512 - PSHUFD
+//
+
+multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ SDNode OpNode, PatFrag mem_frag,
+ X86MemOperand x86memop, ValueType OpVT> {
+ def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
+ EVEX;
+ def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (OpVT (OpNode (mem_frag addr:$src1),
+ (i8 imm:$src2))))]>, EVEX;
+}
+
+defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
+ i512mem, v16i32>, OpSize, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+let ExeDomain = SSEPackedSingle in
+defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
+ memopv16f32, i512mem, v16f32>, OpSize, TA, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+let ExeDomain = SSEPackedDouble in
+defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
+ memopv8f64, i512mem, v8f64>, OpSize, TA, EVEX_V512,
+ VEX_W, EVEX_CD8<32, CD8VF>;
+
+def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
+ (VPERMILPSZri VR512:$src1, imm:$imm)>;
+def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
+ (VPERMILPDZri VR512:$src1, imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 Logical Instructions
+//===----------------------------------------------------------------------===//
+
+defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VR512,
+ memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
+ SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 0>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 FP arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SizeItins itins> {
+ defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss{z}"), OpNode, FR32X,
+ f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd{z}"), OpNode, FR64X,
+ f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
+ EVEX_CD8<64, CD8VT1>;
+}
+
+let isCommutable = 1 in {
+defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
+defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
+defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
+defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
+}
+let isCommutable = 0 in {
+defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
+defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
+}
+
+multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType vt,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
+ string BrdcstStr,
+ Domain d, OpndItins itins, bit commutable> {
+ let isCommutable = commutable in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
+ EVEX_4V, TB;
+ let mayLoad = 1 in {
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
+ itins.rm, d>, EVEX_4V, TB;
+ def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86scalar_mop:$src2),
+ !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
+ ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
+ [(set RC:$dst, (OpNode RC:$src1,
+ (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
+ itins.rm, d>, EVEX_4V, EVEX_B, TB;
+ }
+}
+
+defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VR512, v16f32, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
+ SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VR512, v8f64, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
+ SSE_ALU_ITINS_P.d, 1>,
+ EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
+ SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VR512, v8f64, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
+ SSE_ALU_ITINS_P.d, 1>,
+ EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VR512, v16f32, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
+ SSE_ALU_ITINS_P.s, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VR512, v16f32, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
+ SSE_ALU_ITINS_P.s, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VR512, v8f64, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
+ SSE_ALU_ITINS_P.d, 1>,
+ EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VR512, v8f64, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
+ SSE_ALU_ITINS_P.d, 1>,
+ EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VR512, v16f32, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
+ SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VR512, v16f32, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
+ SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VR512, v8f64, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
+ SSE_ALU_ITINS_P.d, 0>,
+ EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
+ SSE_ALU_ITINS_P.d, 0>,
+ EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 VPTESTM instructions
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
+ SDNode OpNode, ValueType vt> {
+ def rr : AVX5128I<opc, MRMSrcReg,
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))]>, EVEX_4V;
+ def rm : AVX5128I<opc, MRMSrcMem,
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set KRC:$dst, (OpNode (vt RC:$src1),
+ (bitconvert (memop_frag addr:$src2))))]>, EVEX_4V;
+}
+
+defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
+ memopv16i32, X86testm, v16i32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
+ memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 Shift instructions
+//===----------------------------------------------------------------------===//
+multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
+ string OpcodeStr, SDNode OpNode, RegisterClass RC,
+ ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
+ RegisterClass KRC> {
+ def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
+ (ins RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
+ SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
+ def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
+ [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
+ def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
+ (ins x86memop:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpNode (mem_frag addr:$src1),
+ (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
+ def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
+ (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
+ [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
+}
+
+multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType vt, ValueType SrcVT,
+ PatFrag bc_frag, RegisterClass KRC> {
+ // src2 is always 128-bit
+ def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, VR128X:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
+ SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
+ def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, VR128X:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
+ [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
+ def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (vt (OpNode RC:$src1,
+ (bc_frag (memopv2i64 addr:$src2)))))],
+ SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
+ def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
+ [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
+}
+
+defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
+ VR512, v16i32, i512mem, memopv16i32, VK16WM>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
+ VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
+ EVEX_CD8<32, CD8VQ>;
+
+defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
+ VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
+ VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
+ EVEX_CD8<64, CD8VQ>, VEX_W;
+
+defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
+ VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
+ VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
+ EVEX_CD8<32, CD8VQ>;
+
+defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
+ VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
+ VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
+ EVEX_CD8<64, CD8VQ>, VEX_W;
+
+defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
+ VR512, v16i32, i512mem, memopv16i32, VK16WM>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
+ VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
+ EVEX_CD8<32, CD8VQ>;
+
+defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
+ VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
+ VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
+ EVEX_CD8<64, CD8VQ>, VEX_W;
+
+//===-------------------------------------------------------------------===//
+// Variable Bit Shifts
+//===-------------------------------------------------------------------===//
+multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType vt,
+ X86MemOperand x86memop, PatFrag mem_frag> {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
+ EVEX_4V;
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
+ EVEX_4V;
+}
+
+defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
+ i512mem, memopv16i32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
+ i512mem, memopv8i64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
+ i512mem, memopv16i32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
+ i512mem, memopv8i64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
+ i512mem, memopv16i32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
+ i512mem, memopv8i64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - MOVDDUP
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
+ X86MemOperand x86memop, PatFrag memop_frag> {
+def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
+def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst,
+ (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
+}
+
+defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPZrm addr:$src)>;
+
+//===---------------------------------------------------------------------===//
+// Replicate Single FP - MOVSHDUP and MOVSLDUP
+//===---------------------------------------------------------------------===//
+multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
+ ValueType vt, RegisterClass RC, PatFrag mem_frag,
+ X86MemOperand x86memop> {
+ def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
+ let mayLoad = 1 in
+ def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
+}
+
+defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
+ v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
+ v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+
+def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
+def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
+ (VMOVSHDUPZrm addr:$src)>;
+def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
+def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
+ (VMOVSLDUPZrm addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Move Low to High and High to Low packed FP Instructions
+//===----------------------------------------------------------------------===//
+def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2),
+ "vmovlhps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
+ IIC_SSE_MOV_LH>, EVEX_4V;
+def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2),
+ "vmovhlps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
+ IIC_SSE_MOV_LH>, EVEX_4V;
+
+let Predicates = [HasAVX512] in {
+ // MOVLHPS patterns
+ def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
+ (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
+ def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
+ (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
+
+ // MOVHLPS patterns
+ def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
+ (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
+}
+
+//===----------------------------------------------------------------------===//
+// FMA - Fused Multiply Operations
+//
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
+ string BrdcstStr, SDNode OpNode, ValueType OpVT> {
+ def r: AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst, (OpVT(OpNode RC:$src1, RC:$src2, RC:$src3)))]>;
+
+ let mayLoad = 1 in
+ def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
+ (mem_frag addr:$src3))))]>;
+ def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
+ !strconcat(OpcodeStr, "\t{${src3}", BrdcstStr,
+ ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
+ (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
+}
+} // Constraints = "$src1 = $dst"
+
+let ExeDomain = SSEPackedSingle in {
+ defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmadd, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+ defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmsub, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+ defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmaddsub, v16f32>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmsubadd, v16f32>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fnmadd, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+ defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fnmsub, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+}
+let ExeDomain = SSEPackedDouble in {
+ defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmadd, v8f64>, EVEX_V512,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmsub, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+}
+
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
+ string BrdcstStr, SDNode OpNode, ValueType OpVT> {
+ let mayLoad = 1 in
+ def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src3, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src3, $dst|$dst, $src3, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
+ def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
+ !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
+ ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
+ [(set RC:$dst, (OpNode RC:$src1,
+ (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
+}
+} // Constraints = "$src1 = $dst"
+
+
+let ExeDomain = SSEPackedSingle in {
+ defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmadd, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+ defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmsub, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+ defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmaddsub, v16f32>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmsubadd, v16f32>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fnmadd, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+ defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fnmsub, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+}
+let ExeDomain = SSEPackedDouble in {
+ defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmadd, v8f64>, EVEX_V512,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmsub, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+}
+
+// Scalar FMA
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType OpVT,
+ X86MemOperand x86memop, Operand memop,
+ PatFrag mem_frag> {
+ let isCommutable = 1 in
+ def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+ let mayLoad = 1 in
+ def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1,
+ (mem_frag addr:$src3))))]>;
+}
+
+} // Constraints = "$src1 = $dst"
+
+defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss{z}", X86Fmadd, FR32X,
+ f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
+defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd{z}", X86Fmadd, FR64X,
+ f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss{z}", X86Fmsub, FR32X,
+ f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
+defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd{z}", X86Fmsub, FR64X,
+ f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss{z}", X86Fnmadd, FR32X,
+ f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
+defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd{z}", X86Fnmadd, FR64X,
+ f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss{z}", X86Fnmsub, FR32X,
+ f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
+defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X,
+ f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 Scalar convert from sign integer to float/double
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ X86MemOperand x86memop, string asm> {
+let neverHasSideEffects = 1 in {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ EVEX_4V;
+ let mayLoad = 1 in
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ EVEX_4V;
+} // neverHasSideEffects = 1
+}
+let Predicates = [HasAVX512] in {
+defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">,
+ XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">,
+ XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">,
+ XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">,
+ XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+
+def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+
+def : Pat<(f32 (sint_to_fp GR32:$src)),
+ (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
+def : Pat<(f32 (sint_to_fp GR64:$src)),
+ (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
+def : Pat<(f64 (sint_to_fp GR32:$src)),
+ (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
+def : Pat<(f64 (sint_to_fp GR64:$src)),
+ (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+
+defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}{z}">,
+ XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}{z}">,
+ XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}{z}">,
+ XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}{z}">,
+ XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+
+def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
+ (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
+ (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
+ (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
+ (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+
+def : Pat<(f32 (uint_to_fp GR32:$src)),
+ (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
+def : Pat<(f32 (uint_to_fp GR64:$src)),
+ (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
+def : Pat<(f64 (uint_to_fp GR32:$src)),
+ (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
+def : Pat<(f64 (uint_to_fp GR64:$src)),
+ (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// AVX-512 Scalar convert from float/double to integer
+//===----------------------------------------------------------------------===//
+multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
+ string asm> {
+let neverHasSideEffects = 1 in {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG;
+ let mayLoad = 1 in
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG;
+} // neverHasSideEffects = 1
+}
+let Predicates = [HasAVX512] in {
+// Convert float/double to signed/unsigned int 32/64
+defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
+ ssmem, sse_load_f32, "cvtss2si{z}">,
+ XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
+ ssmem, sse_load_f32, "cvtss2si{z}">,
+ XS, VEX_W, EVEX_CD8<32, CD8VT1>;
+defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
+ ssmem, sse_load_f32, "cvtss2usi{z}">,
+ XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
+ int_x86_avx512_cvtss2usi64, ssmem,
+ sse_load_f32, "cvtss2usi{z}">, XS, VEX_W,
+ EVEX_CD8<32, CD8VT1>;
+defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
+ sdmem, sse_load_f64, "cvtsd2si{z}">,
+ XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
+ sdmem, sse_load_f64, "cvtsd2si{z}">,
+ XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
+ sdmem, sse_load_f64, "cvtsd2usi{z}">,
+ XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
+ int_x86_avx512_cvtsd2usi64, sdmem,
+ sse_load_f64, "cvtsd2usi{z}">, XD, VEX_W,
+ EVEX_CD8<64, CD8VT1>;
+
+defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}{z}",
+ SSE_CVT_Scalar, 0>, XS, EVEX_4V;
+defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}{z}",
+ SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
+defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}{z}",
+ SSE_CVT_Scalar, 0>, XD, EVEX_4V;
+defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}{z}",
+ SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
+
+defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
+ int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}{z}",
+ SSE_CVT_Scalar, 0>, XS, EVEX_4V;
+defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
+ int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}{z}",
+ SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
+defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
+ int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}{z}",
+ SSE_CVT_Scalar, 0>, XD, EVEX_4V;
+defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
+ int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}{z}",
+ SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
+
+// Convert float/double to signed/unsigned int 32/64 with truncation
+defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
+ ssmem, sse_load_f32, "cvttss2si{z}">,
+ XS, EVEX_CD8<32, CD8VT1>;
+defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
+ int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+ "cvttss2si{z}">, XS, VEX_W,
+ EVEX_CD8<32, CD8VT1>;
+defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
+ sdmem, sse_load_f64, "cvttsd2si{z}">, XD,
+ EVEX_CD8<64, CD8VT1>;
+defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
+ int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+ "cvttsd2si{z}">, XD, VEX_W,
+ EVEX_CD8<64, CD8VT1>;
+defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
+ int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
+ "cvttss2si{z}">, XS, EVEX_CD8<32, CD8VT1>;
+defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
+ int_x86_avx512_cvttss2usi64, ssmem,
+ sse_load_f32, "cvttss2usi{z}">, XS, VEX_W,
+ EVEX_CD8<32, CD8VT1>;
+defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
+ int_x86_avx512_cvttsd2usi,
+ sdmem, sse_load_f64, "cvttsd2usi{z}">, XD,
+ EVEX_CD8<64, CD8VT1>;
+defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
+ int_x86_avx512_cvttsd2usi64, sdmem,
+ sse_load_f64, "cvttsd2usi{z}">, XD, VEX_W,
+ EVEX_CD8<64, CD8VT1>;
+}
+
+multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
+}
+
+defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
+ loadf32, "cvttss2si{z}">, XS,
+ EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
+ loadf32, "cvttss2usi{z}">, XS,
+ EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
+ loadf32, "cvttss2si{z}">, XS, VEX_W,
+ EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
+ loadf32, "cvttss2usi{z}">, XS, VEX_W,
+ EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
+ loadf64, "cvttsd2si{z}">, XD,
+ EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
+ loadf64, "cvttsd2usi{z}">, XD,
+ EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
+ loadf64, "cvttsd2si{z}">, XD, VEX_W,
+ EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
+ loadf64, "cvttsd2usi{z}">, XD, VEX_W,
+ EVEX_CD8<64, CD8VT1>;
+//===----------------------------------------------------------------------===//
+// AVX-512 Convert form float to double and back
+//===----------------------------------------------------------------------===//
+let neverHasSideEffects = 1 in {
+def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
+ (ins FR32X:$src1, FR32X:$src2),
+ "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
+let mayLoad = 1 in
+def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
+ (ins FR32X:$src1, f32mem:$src2),
+ "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
+ EVEX_CD8<32, CD8VT1>;
+
+// Convert scalar double to scalar single
+def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
+ (ins FR64X:$src1, FR64X:$src2),
+ "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
+let mayLoad = 1 in
+def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
+ (ins FR64X:$src1, f64mem:$src2),
+ "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, EVEX_4V, VEX_LIG, VEX_W,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
+}
+
+def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
+ Requires<[HasAVX512]>;
+def : Pat<(fextend (loadf32 addr:$src)),
+ (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
+
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX512, OptForSize]>;
+
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
+ Requires<[HasAVX512, OptForSpeed]>;
+
+def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
+ Requires<[HasAVX512]>;
+
+multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
+ RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
+ X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
+ Domain d> {
+let neverHasSideEffects = 1 in {
+ def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst,
+ (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
+ let mayLoad = 1 in
+ def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst,
+ (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
+} // neverHasSideEffects = 1
+}
+
+defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
+ memopv8f64, f512mem, v8f32, v8f64,
+ SSEPackedSingle>, EVEX_V512, VEX_W, OpSize,
+ EVEX_CD8<64, CD8VF>;
+
+defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
+ memopv4f64, f256mem, v8f64, v8f32,
+ SSEPackedDouble>, EVEX_V512, EVEX_CD8<32, CD8VH>;
+def : Pat<(v8f64 (extloadv8f32 addr:$src)),
+ (VCVTPS2PDZrm addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 Vector convert from sign integer to float/double
+//===----------------------------------------------------------------------===//
+
+defm VCVTDQ2PSZ : avx512_vcvt_fp<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
+ memopv8i64, i512mem, v16f32, v16i32,
+ SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
+ memopv4i64, i256mem, v8f64, v8i32,
+ SSEPackedDouble>, EVEX_V512, XS,
+ EVEX_CD8<32, CD8VH>;
+
+defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
+ memopv16f32, f512mem, v16i32, v16f32,
+ SSEPackedSingle>, EVEX_V512, XS,
+ EVEX_CD8<32, CD8VF>;
+
+defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
+ memopv8f64, f512mem, v8i32, v8f64,
+ SSEPackedDouble>, EVEX_V512, OpSize, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+
+defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
+ memopv16f32, f512mem, v16i32, v16f32,
+ SSEPackedSingle>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+
+defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
+ memopv8f64, f512mem, v8i32, v8f64,
+ SSEPackedDouble>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+
+defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
+ memopv4i64, f256mem, v8f64, v8i32,
+ SSEPackedDouble>, EVEX_V512, XS,
+ EVEX_CD8<32, CD8VH>;
+
+defm VCVTUDQ2PSZ : avx512_vcvt_fp<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
+ memopv16i32, f512mem, v16f32, v16i32,
+ SSEPackedSingle>, EVEX_V512, XD,
+ EVEX_CD8<32, CD8VF>;
+
+def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
+ (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
+ (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
+
+
+def : Pat<(int_x86_avx512_cvtdq2_ps_512 VR512:$src),
+ (VCVTDQ2PSZrr VR512:$src)>;
+def : Pat<(int_x86_avx512_cvtdq2_ps_512 (bitconvert (memopv8i64 addr:$src))),
+ (VCVTDQ2PSZrm addr:$src)>;
+
+def VCVTPS2DQZrr : AVX512BI<0x5B, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ "vcvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR512:$dst,
+ (int_x86_avx512_cvt_ps2dq_512 VR512:$src))],
+ IIC_SSE_CVT_PS_RR>, EVEX, EVEX_V512;
+def VCVTPS2DQZrm : AVX512BI<0x5B, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ "vcvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR512:$dst,
+ (int_x86_avx512_cvt_ps2dq_512 (memopv16f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
+ (VCVTPD2PSZrm addr:$src)>;
+ def : Pat<(v8f64 (extloadv8f32 addr:$src)),
+ (VCVTPS2PDZrm addr:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Half precision conversion instructions
+//===----------------------------------------------------------------------===//
+multiclass avx512_f16c_ph2ps<RegisterClass destRC, RegisterClass srcRC,
+ X86MemOperand x86memop, Intrinsic Int> {
+ def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}",
+ [(set destRC:$dst, (Int srcRC:$src))]>, EVEX;
+ let neverHasSideEffects = 1, mayLoad = 1 in
+ def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
+}
+
+multiclass avx512_f16c_ps2ph<RegisterClass destRC, RegisterClass srcRC,
+ X86MemOperand x86memop, Intrinsic Int> {
+ def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
+ (ins srcRC:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set destRC:$dst, (Int srcRC:$src1, imm:$src2))]>, EVEX;
+ let neverHasSideEffects = 1, mayStore = 1 in
+ def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
+ (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
+}
+
+defm VCVTPH2PSZ : avx512_f16c_ph2ps<VR512, VR256X, f256mem,
+ int_x86_avx512_vcvtph2ps_512>, EVEX_V512,
+ EVEX_CD8<32, CD8VH>;
+defm VCVTPS2PHZ : avx512_f16c_ps2ph<VR256X, VR512, f256mem,
+ int_x86_avx512_vcvtps2ph_512>, EVEX_V512,
+ EVEX_CD8<32, CD8VH>;
+
+let Defs = [EFLAGS], Predicates = [HasAVX512] in {
+ defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
+ "ucomiss{z}">, TB, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
+ "ucomisd{z}">, TB, OpSize, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ let Pattern = []<dag> in {
+ defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
+ "comiss{z}">, TB, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
+ "comisd{z}">, TB, OpSize, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ }
+ defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
+ load, "ucomiss">, TB, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
+ load, "ucomisd">, TB, OpSize, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+
+ defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
+ load, "comiss">, TB, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
+ load, "comisd">, TB, OpSize, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+}
+
+/// avx512_unop_p - AVX-512 unops in packed form.
+multiclass avx512_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PSZr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))]>,
+ EVEX, EVEX_V512;
+ def PSZm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (OpNode (memopv16f32 addr:$src)))]>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ def PDZr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))]>,
+ EVEX, EVEX_V512, VEX_W;
+ def PDZm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (OpNode (memopv16f32 addr:$src)))]>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+/// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms.
+multiclass avx512_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V16F32Int, Intrinsic V8F64Int> {
+ def PSZr_Int : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V16F32Int VR512:$src))]>,
+ EVEX, EVEX_V512;
+ def PSZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst,
+ (V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ def PDZr_Int : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V8F64Int VR512:$src))]>,
+ EVEX, EVEX_V512, VEX_W;
+ def PDZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst,
+ (V8F64Int (memopv8f64 addr:$src)))]>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+/// avx512_fp_unop_s - AVX-512 unops in scalar form.
+multiclass avx512_fp_unop_s<bits<8> opc, string OpcodeStr> {
+ let hasSideEffects = 0 in {
+ def SSZr : AVX5128I<opc, MRMSrcReg, (outs FR32X:$dst),
+ (ins FR32X:$src1, FR32X:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V;
+ let mayLoad = 1 in {
+ def SSZm : AVX5128I<opc, MRMSrcMem, (outs FR32X:$dst),
+ (ins FR32X:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ def SSZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ }
+ def SDZr : AVX5128I<opc, MRMSrcReg, (outs FR64X:$dst),
+ (ins FR64X:$src1, FR64X:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ EVEX_4V, VEX_W;
+ let mayLoad = 1 in {
+ def SDZm : AVX5128I<opc, MRMSrcMem, (outs FR64X:$dst),
+ (ins FR64X:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
+ def SDZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
+ }
+}
+}
+
+defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14">,
+ avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>,
+ avx512_fp_unop_p_int<0x4C, "vrcp14",
+ int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>;
+
+defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14">,
+ avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>,
+ avx512_fp_unop_p_int<0x4E, "vrsqrt14",
+ int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>;
+
+def : Pat<(int_x86_avx512_rsqrt14_ss VR128X:$src),
+ (COPY_TO_REGCLASS (VRSQRT14SSZr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128X:$src, FR32)),
+ VR128X)>;
+def : Pat<(int_x86_avx512_rsqrt14_ss sse_load_f32:$src),
+ (VRSQRT14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+def : Pat<(int_x86_avx512_rcp14_ss VR128X:$src),
+ (COPY_TO_REGCLASS (VRCP14SSZr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128X:$src, FR32)),
+ VR128X)>;
+def : Pat<(int_x86_avx512_rcp14_ss sse_load_f32:$src),
+ (VRCP14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+let AddedComplexity = 20, Predicates = [HasERI] in {
+defm VRCP28 : avx512_fp_unop_s<0xCB, "vrcp28">,
+ avx512_fp_unop_p<0xCA, "vrcp28", X86frcp>,
+ avx512_fp_unop_p_int<0xCA, "vrcp28",
+ int_x86_avx512_rcp28_ps_512, int_x86_avx512_rcp28_pd_512>;
+
+defm VRSQRT28 : avx512_fp_unop_s<0xCD, "vrsqrt28">,
+ avx512_fp_unop_p<0xCC, "vrsqrt28", X86frsqrt>,
+ avx512_fp_unop_p_int<0xCC, "vrsqrt28",
+ int_x86_avx512_rsqrt28_ps_512, int_x86_avx512_rsqrt28_pd_512>;
+}
+
+let Predicates = [HasERI] in {
+ def : Pat<(int_x86_avx512_rsqrt28_ss VR128X:$src),
+ (COPY_TO_REGCLASS (VRSQRT28SSZr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128X:$src, FR32)),
+ VR128X)>;
+ def : Pat<(int_x86_avx512_rsqrt28_ss sse_load_f32:$src),
+ (VRSQRT28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+ def : Pat<(int_x86_avx512_rcp28_ss VR128X:$src),
+ (COPY_TO_REGCLASS (VRCP28SSZr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128X:$src, FR32)),
+ VR128X)>;
+ def : Pat<(int_x86_avx512_rcp28_ss sse_load_f32:$src),
+ (VRCP28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+}
+multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ Intrinsic V16F32Int, Intrinsic V8F64Int,
+ OpndItins itins_s, OpndItins itins_d> {
+ def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
+ EVEX, EVEX_V512;
+
+ let mayLoad = 1 in
+ def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst,
+ (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
+ itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+ def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
+ EVEX, EVEX_V512;
+
+ let mayLoad = 1 in
+ def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (OpNode
+ (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
+ itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+ def PSZr_Int : AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V16F32Int VR512:$src))]>,
+ EVEX, EVEX_V512;
+ def PSZm_Int : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst,
+ (V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ def PDZr_Int : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V8F64Int VR512:$src))]>,
+ EVEX, EVEX_V512, VEX_W;
+ def PDZm_Int : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V8F64Int (memopv8f64 addr:$src)))]>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
+ Intrinsic F32Int, Intrinsic F64Int,
+ OpndItins itins_s, OpndItins itins_d> {
+ def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
+ (ins FR32X:$src1, FR32X:$src2),
+ !strconcat(OpcodeStr,
+ "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [], itins_s.rr>, XS, EVEX_4V;
+ def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2),
+ !strconcat(OpcodeStr,
+ "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst,
+ (F32Int VR128X:$src1, VR128X:$src2))],
+ itins_s.rr>, XS, EVEX_4V;
+ let mayLoad = 1 in {
+ def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
+ (ins FR32X:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr,
+ "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr,
+ "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst,
+ (F32Int VR128X:$src1, sse_load_f32:$src2))],
+ itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ }
+ def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
+ (ins FR64X:$src1, FR64X:$src2),
+ !strconcat(OpcodeStr,
+ "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ XD, EVEX_4V, VEX_W;
+ def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2),
+ !strconcat(OpcodeStr,
+ "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst,
+ (F64Int VR128X:$src1, VR128X:$src2))],
+ itins_s.rr>, XD, EVEX_4V, VEX_W;
+ let mayLoad = 1 in {
+ def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
+ (ins FR64X:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
+ def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr,
+ "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst,
+ (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
+ XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
+ }
+}
+
+
+defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
+ int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
+ SSE_SQRTSS, SSE_SQRTSD>,
+ avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
+ int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512,
+ SSE_SQRTPS, SSE_SQRTPD>;
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(f32 (fsqrt FR32X:$src)),
+ (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+ def : Pat<(f32 (fsqrt (load addr:$src))),
+ (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[OptForSize]>;
+ def : Pat<(f64 (fsqrt FR64X:$src)),
+ (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
+ def : Pat<(f64 (fsqrt (load addr:$src))),
+ (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[OptForSize]>;
+
+ def : Pat<(f32 (X86frsqrt FR32X:$src)),
+ (VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+ def : Pat<(f32 (X86frsqrt (load addr:$src))),
+ (VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[OptForSize]>;
+
+ def : Pat<(f32 (X86frcp FR32X:$src)),
+ (VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+ def : Pat<(f32 (X86frcp (load addr:$src))),
+ (VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[OptForSize]>;
+
+ def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
+ (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128X:$src, FR32)),
+ VR128X)>;
+ def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
+ (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+ def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
+ (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128X:$src, FR64)),
+ VR128X)>;
+ def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
+ (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
+}
+
+
+multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+ X86MemOperand x86memop, RegisterClass RC,
+ PatFrag mem_frag32, PatFrag mem_frag64,
+ Intrinsic V4F32Int, Intrinsic V2F64Int,
+ CD8VForm VForm> {
+let ExeDomain = SSEPackedSingle in {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr : AVX512AIi8<opcps, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
+
+ // Vector intrinsic operation, mem
+ def PSm : AVX512AIi8<opcps, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
+ EVEX_CD8<32, VForm>;
+} // ExeDomain = SSEPackedSingle
+
+let ExeDomain = SSEPackedDouble in {
+ // Vector intrinsic operation, reg
+ def PDr : AVX512AIi8<opcpd, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
+
+ // Vector intrinsic operation, mem
+ def PDm : AVX512AIi8<opcpd, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
+ EVEX_CD8<64, VForm>;
+} // ExeDomain = SSEPackedDouble
+}
+
+multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic F64Int> {
+let ExeDomain = GenericDomain in {
+ // Operation, reg.
+ let hasSideEffects = 0 in
+ def SSr : AVX512AIi8<opcss, MRMSrcReg,
+ (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>;
+
+ // Intrinsic operation, reg.
+ def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
+ (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
+
+ // Intrinsic operation, mem.
+ def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128X:$dst, (F32Int VR128X:$src1,
+ sse_load_f32:$src2, imm:$src3))]>,
+ EVEX_CD8<32, CD8VT1>;
+
+ // Operation, reg.
+ let hasSideEffects = 0 in
+ def SDr : AVX512AIi8<opcsd, MRMSrcReg,
+ (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_W;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
+ (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
+ VEX_W;
+
+ // Intrinsic operation, mem.
+ def SDm : AVX512AIi8<opcsd, MRMSrcMem,
+ (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128X:$dst,
+ (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ VEX_W, EVEX_CD8<64, CD8VT1>;
+} // ExeDomain = GenericDomain
+}
+
+let Predicates = [HasAVX512] in {
+ defm VRNDSCALE : avx512_fp_binop_rm<0x0A, 0x0B, "vrndscale",
+ int_x86_avx512_rndscale_ss,
+ int_x86_avx512_rndscale_sd>, EVEX_4V;
+
+ defm VRNDSCALEZ : avx512_fp_unop_rm<0x08, 0x09, "vrndscale", f256mem, VR512,
+ memopv16f32, memopv8f64,
+ int_x86_avx512_rndscale_ps_512,
+ int_x86_avx512_rndscale_pd_512, CD8VF>,
+ EVEX, EVEX_V512;
+}
+
+def : Pat<(ffloor FR32X:$src),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
+def : Pat<(f64 (ffloor FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
+def : Pat<(f32 (fnearbyint FR32X:$src)),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
+def : Pat<(f64 (fnearbyint FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
+def : Pat<(f32 (fceil FR32X:$src)),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
+def : Pat<(f64 (fceil FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
+def : Pat<(f32 (frint FR32X:$src)),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
+def : Pat<(f64 (frint FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
+def : Pat<(f32 (ftrunc FR32X:$src)),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
+def : Pat<(f64 (ftrunc FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
+
+def : Pat<(v16f32 (ffloor VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0x1))>;
+def : Pat<(v16f32 (fnearbyint VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0xC))>;
+def : Pat<(v16f32 (fceil VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0x2))>;
+def : Pat<(v16f32 (frint VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0x4))>;
+def : Pat<(v16f32 (ftrunc VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0x3))>;
+
+def : Pat<(v8f64 (ffloor VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0x1))>;
+def : Pat<(v8f64 (fnearbyint VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0xC))>;
+def : Pat<(v8f64 (fceil VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0x2))>;
+def : Pat<(v8f64 (frint VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0x4))>;
+def : Pat<(v8f64 (ftrunc VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0x3))>;
+
+//-------------------------------------------------
+// Integer truncate and extend operations
+//-------------------------------------------------
+
+multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
+ RegisterClass dstRC, RegisterClass srcRC,
+ RegisterClass KRC, X86MemOperand x86memop> {
+ def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
+ (ins srcRC:$src),
+ !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
+ []>, EVEX;
+
+ def krr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
+ (ins KRC:$mask, srcRC:$src),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ []>, EVEX, EVEX_KZ;
+
+ def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, EVEX;
+}
+defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
+ i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
+defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
+ i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
+defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
+ i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
+defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
+ i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
+defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
+ i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
+defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
+ i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
+defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
+ i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
+defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
+ i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
+defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
+ i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
+defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
+ i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
+defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
+ i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
+defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
+ i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
+defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
+ i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
+defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
+ i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
+defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
+ i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
+
+def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
+def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
+def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
+def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
+def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
+
+def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
+ (VPMOVDBkrr VK16WM:$mask, VR512:$src)>;
+def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
+ (VPMOVDWkrr VK16WM:$mask, VR512:$src)>;
+def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
+ (VPMOVQWkrr VK8WM:$mask, VR512:$src)>;
+def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
+ (VPMOVQDkrr VK8WM:$mask, VR512:$src)>;
+
+
+multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass DstRC,
+ RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag,
+ X86MemOperand x86memop, ValueType OpVT, ValueType InVT> {
+
+ def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
+ (ins SrcRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
+ def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins x86memop:$src),
+ !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst,
+ (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
+ EVEX;
+}
+
+defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext,
+ memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
+ EVEX_CD8<8, CD8VQ>;
+defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext,
+ memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
+ EVEX_CD8<8, CD8VO>;
+defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext,
+ memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
+ EVEX_CD8<16, CD8VH>;
+defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext,
+ memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
+ EVEX_CD8<16, CD8VQ>;
+defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext,
+ memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
+ EVEX_CD8<32, CD8VH>;
+
+defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext,
+ memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
+ EVEX_CD8<8, CD8VQ>;
+defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext,
+ memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
+ EVEX_CD8<8, CD8VO>;
+defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext,
+ memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
+ EVEX_CD8<16, CD8VH>;
+defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext,
+ memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
+ EVEX_CD8<16, CD8VQ>;
+defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext,
+ memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
+ EVEX_CD8<32, CD8VH>;
+
+//===----------------------------------------------------------------------===//
+// GATHER - SCATTER Operations
+
+multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand memop> {
+let mayLoad = 1,
+ Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
+ (ins RC:$src1, KRC:$mask, memop:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ []>, EVEX, EVEX_K;
+}
+defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand memop> {
+let mayStore = 1, Constraints = "$mask = $mask_wb" in
+ def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
+ (ins memop:$dst, KRC:$mask, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ []>, EVEX, EVEX_K;
+}
+
+defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+//===----------------------------------------------------------------------===//
+// VSHUFPS - VSHUFPD Operations
+
+multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
+ ValueType vt, string OpcodeStr, PatFrag mem_frag,
+ Domain d> {
+ def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
+ (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
+ EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
+ def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
+ (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
+ EVEX_4V, Sched<[WriteShuffle]>;
+}
+
+defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
+ SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
+ SSEPackedDouble>, OpSize, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
+ (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
+def : Pat<(v16i32 (X86Shufp VR512:$src1,
+ (memopv16i32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
+ (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
+def : Pat<(v8i64 (X86Shufp VR512:$src1,
+ (memopv8i64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
+
+multiclass avx512_alignr<string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop> {
+ def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, EVEX_4V;
+ let mayLoad = 1 in
+ def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, EVEX_4V;
+}
+defm VALIGND : avx512_alignr<"valignd", VR512, i512mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VALIGNQ : avx512_alignr<"valignq", VR512, i512mem>,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+def : Pat<(v16f32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
+ (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>;
+def : Pat<(v8f64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
+ (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
+def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
+ (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>;
+def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
+ (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
+
+multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop> {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ EVEX;
+ def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
+ (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ EVEX;
+}
+
+defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, i512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+
+multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, RegisterClass KRC, PatFrag memop_frag,
+ X86MemOperand x86memop, PatFrag scalar_mfrag,
+ X86MemOperand x86scalar_mop, string BrdcstStr,
+ Intrinsic Int, Intrinsic maskInt, Intrinsic maskzInt> {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"),
+ [(set RC:$dst, (Int RC:$src))]>, EVEX;
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"),
+ [(set RC:$dst, (Int (memop_frag addr:$src)))]>, EVEX;
+ def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins x86scalar_mop:$src),
+ !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
+ ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
+ []>, EVEX, EVEX_B;
+ def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ [(set RC:$dst, (maskzInt KRC:$mask, RC:$src))]>, EVEX, EVEX_KZ;
+ def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins KRC:$mask, x86memop:$src),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ [(set RC:$dst, (maskzInt KRC:$mask, (memop_frag addr:$src)))]>,
+ EVEX, EVEX_KZ;
+ def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins KRC:$mask, x86scalar_mop:$src),
+ !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
+ ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
+ BrdcstStr, "}"),
+ []>, EVEX, EVEX_KZ, EVEX_B;
+
+ let Constraints = "$src1 = $dst" in {
+ def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ [(set RC:$dst, (maskInt RC:$src1, KRC:$mask, RC:$src2))]>, EVEX, EVEX_K;
+ def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, x86memop:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ [(set RC:$dst, (maskInt RC:$src1, KRC:$mask, (memop_frag addr:$src2)))]>, EVEX, EVEX_K;
+ def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
+ !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
+ ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
+ []>, EVEX, EVEX_K, EVEX_B;
+ }
+}
+
+let Predicates = [HasCDI] in {
+defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
+ memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
+ int_x86_avx512_conflict_d_512,
+ int_x86_avx512_conflict_d_mask_512,
+ int_x86_avx512_conflict_d_maskz_512>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
+ memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
+ int_x86_avx512_conflict_q_512,
+ int_x86_avx512_conflict_q_mask_512,
+ int_x86_avx512_conflict_q_maskz_512>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 9ce02ba..7fc9c44 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -497,6 +497,21 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
+let isCodeGenOnly = 1, CodeSize = 2 in {
+def INC32_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst", [], IIC_UNARY_REG>,
+ OpSize, Requires<[In32BitMode]>;
+def INC32_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst", [], IIC_UNARY_REG>,
+ Requires<[In32BitMode]>;
+def DEC32_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst", [], IIC_UNARY_REG>,
+ OpSize, Requires<[In32BitMode]>;
+def DEC32_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst", [], IIC_UNARY_REG>,
+ Requires<[In32BitMode]>;
+} // isCodeGenOnly = 1, CodeSize = 2
+
} // Constraints = "$src1 = $dst", SchedRW
let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
@@ -578,7 +593,6 @@ let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
} // CodeSize = 2, SchedRW
} // Defs = [EFLAGS]
-
/// X86TypeInfo - This is a bunch of information that describes relevant X86
/// information about value types. For example, it can tell you what the
/// register class and preferred load to use.
@@ -726,20 +740,25 @@ class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
- EFLAGS))], IIC_BIN_NONMEM>;
+ EFLAGS))], IIC_BIN_CARRY_NONMEM>;
// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
-class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ InstrItinClass itin = IIC_BIN_NONMEM>
: ITy<opcode, MRMSrcReg, typeinfo,
(outs typeinfo.RegClass:$dst),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>,
+ mnemonic, "{$src2, $dst|$dst, $src2}", [], itin>,
Sched<[WriteALU]> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
let hasSideEffects = 0;
}
+// BinOpRR_RDD_Rev - Instructions like "adc reg, reg, reg" (reversed encoding).
+class BinOpRR_RFF_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+ : BinOpRR_Rev<opcode, mnemonic, typeinfo, IIC_BIN_CARRY_NONMEM>;
+
// BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding).
class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo, (outs),
@@ -753,10 +772,11 @@ class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
// BinOpRM - Instructions like "add reg, reg, [mem]".
class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- dag outlist, list<dag> pattern>
+ dag outlist, list<dag> pattern,
+ InstrItinClass itin = IIC_BIN_MEM>
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
Sched<[WriteALULd, ReadAfterLd]>;
// BinOpRM_R - Instructions like "add reg, reg, [mem]".
@@ -786,14 +806,15 @@ class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
- EFLAGS))]>;
+ EFLAGS))], IIC_BIN_CARRY_MEM>;
// BinOpRI - Instructions like "add reg, reg, imm".
class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Format f, dag outlist, list<dag> pattern>
+ Format f, dag outlist, list<dag> pattern,
+ InstrItinClass itin = IIC_BIN_NONMEM>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -824,14 +845,15 @@ class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
- EFLAGS))]>;
+ EFLAGS))], IIC_BIN_CARRY_NONMEM>;
// BinOpRI8 - Instructions like "add reg, reg, imm8".
class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Format f, dag outlist, list<dag> pattern>
+ Format f, dag outlist, list<dag> pattern,
+ InstrItinClass itin = IIC_BIN_NONMEM>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
Sched<[WriteALU]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -863,14 +885,14 @@ class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
- EFLAGS))]>;
+ EFLAGS))], IIC_BIN_CARRY_NONMEM>;
// BinOpMR - Instructions like "add [mem], reg".
class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- list<dag> pattern>
+ list<dag> pattern, InstrItinClass itin = IIC_BIN_MEM>
: ITy<opcode, MRMDestMem, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>,
Sched<[WriteALULd, WriteRMW]>;
// BinOpMR_RMW - Instructions like "add [mem], reg".
@@ -886,7 +908,7 @@ class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpMR<opcode, mnemonic, typeinfo,
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_BIN_CARRY_MEM>;
// BinOpMR_F - Instructions like "cmp [mem], reg".
class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -896,10 +918,11 @@ class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
// BinOpMI - Instructions like "add [mem], imm".
class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
- Format f, list<dag> pattern, bits<8> opcode = 0x80>
+ Format f, list<dag> pattern, bits<8> opcode = 0x80,
+ InstrItinClass itin = IIC_BIN_MEM>
: ITy<opcode, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>,
Sched<[WriteALULd, WriteRMW]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -917,7 +940,7 @@ class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
: BinOpMI<mnemonic, typeinfo, f,
[(store (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], 0x80, IIC_BIN_CARRY_MEM>;
// BinOpMI_F - Instructions like "cmp [mem], imm".
class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
@@ -929,10 +952,11 @@ class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
// BinOpMI8 - Instructions like "add [mem], imm8".
class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
- Format f, list<dag> pattern>
+ Format f, list<dag> pattern,
+ InstrItinClass itin = IIC_BIN_MEM>
: ITy<0x82, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>,
Sched<[WriteALULd, WriteRMW]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -951,7 +975,7 @@ class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
: BinOpMI8<mnemonic, typeinfo, f,
[(store (opnode (load addr:$dst),
typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_BIN_CARRY_MEM>;
// BinOpMI8_F - Instructions like "cmp [mem], imm8".
class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
@@ -962,10 +986,11 @@ class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
// BinOpAI - Instructions like "add %eax, %eax, imm", that imp-def EFLAGS.
class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Register areg, string operands>
+ Register areg, string operands,
+ InstrItinClass itin = IIC_BIN_NONMEM>
: ITy<opcode, RawFrm, typeinfo,
(outs), (ins typeinfo.ImmOperand:$src),
- mnemonic, operands, []>, Sched<[WriteALU]> {
+ mnemonic, operands, [], itin>, Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
let Uses = [areg];
let Defs = [areg, EFLAGS];
@@ -976,7 +1001,8 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
// and use EFLAGS.
class BinOpAI_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Register areg, string operands>
- : BinOpAI<opcode, mnemonic, typeinfo, areg, operands> {
+ : BinOpAI<opcode, mnemonic, typeinfo, areg, operands,
+ IIC_BIN_CARRY_NONMEM> {
let Uses = [areg, EFLAGS];
}
@@ -1070,10 +1096,10 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
} // isCommutable
- def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
- def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
- def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
- def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+ def NAME#8rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi64>;
def NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
def NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 8969946..7d10b67 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -884,6 +884,24 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in {
[(set VR256:$dst,
(v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
EFLAGS)))]>;
+ def CMOV_V8I64 : I<0, Pseudo,
+ (outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
+ "#CMOV_V8I64 PSEUDO!",
+ [(set VR512:$dst,
+ (v8i64 (X86cmov VR512:$t, VR512:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V8F64 : I<0, Pseudo,
+ (outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
+ "#CMOV_V8F64 PSEUDO!",
+ [(set VR512:$dst,
+ (v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V16F32 : I<0, Pseudo,
+ (outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
+ "#CMOV_V16F32 PSEUDO!",
+ [(set VR512:$dst,
+ (v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond,
+ EFLAGS)))]>;
}
@@ -917,8 +935,6 @@ def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
(MOV32mi addr:$dst, tblockaddress:$src)>;
-
-
// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
// code model mode, should use 'movabs'. FIXME: This is really a hack, the
// 'movabs' predicate should handle this sort of thing.
@@ -966,14 +982,12 @@ def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tblockaddress:$src)>,
Requires<[NearData, IsStatic]>;
-
-
// Calls
// tls has some funny stuff here...
// This corresponds to movabs $foo@tpoff, %rax
def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
- (MOV64ri tglobaltlsaddr :$dst)>;
+ (MOV64ri32 tglobaltlsaddr :$dst)>;
// This corresponds to add $foo@tpoff, %rax
def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 0e69651..e4ccc06 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -49,10 +49,12 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
"jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>;
+ let hasSideEffects = 0 in
def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
"jmp\t$dst", [], IIC_JMP_REL>;
// FIXME : Intel syntax for JMP64pcrel32 such that it is not ambiguious
// with JMP_1.
+ let hasSideEffects = 0 in
def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
"jmpq\t$dst", [], IIC_JMP_REL>;
}
@@ -60,6 +62,7 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
// Conditional Branches.
let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+ let hasSideEffects = 0 in
def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [],
IIC_Jcc>;
def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
@@ -85,7 +88,7 @@ defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
// jcx/jecx/jrcx instructions.
-let isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in {
// These are the 32-bit versions of this instruction for the asmparser. In
// 32-bit mode, the address size prefix is jcxz and the unprefixed version is
// jecxz.
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 28954c6..4090550 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -14,26 +14,26 @@
let neverHasSideEffects = 1 in {
let Defs = [AX], Uses = [AL] in
def CBW : I<0x98, RawFrm, (outs), (ins),
- "{cbtw|cbw}", []>, OpSize; // AX = signext(AL)
+ "{cbtw|cbw}", [], IIC_CBW>, OpSize; // AX = signext(AL)
let Defs = [EAX], Uses = [AX] in
def CWDE : I<0x98, RawFrm, (outs), (ins),
- "{cwtl|cwde}", []>; // EAX = signext(AX)
+ "{cwtl|cwde}", [], IIC_CBW>; // EAX = signext(AX)
let Defs = [AX,DX], Uses = [AX] in
def CWD : I<0x99, RawFrm, (outs), (ins),
- "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
+ "{cwtd|cwd}", [], IIC_CBW>, OpSize; // DX:AX = signext(AX)
let Defs = [EAX,EDX], Uses = [EAX] in
def CDQ : I<0x99, RawFrm, (outs), (ins),
- "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
+ "{cltd|cdq}", [], IIC_CBW>; // EDX:EAX = signext(EAX)
let Defs = [RAX], Uses = [EAX] in
def CDQE : RI<0x98, RawFrm, (outs), (ins),
- "{cltq|cdqe}", []>; // RAX = signext(EAX)
+ "{cltq|cdqe}", [], IIC_CBW>; // RAX = signext(EAX)
let Defs = [RAX,RDX], Uses = [RAX] in
def CQO : RI<0x99, RawFrm, (outs), (ins),
- "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
+ "{cqto|cqo}", [], IIC_CBW>; // RDX:RAX = signext(RAX)
}
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 7759a8a..69cd5a5 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -74,43 +74,43 @@ let neverHasSideEffects = 1 in {
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
- defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32,
- memopv8f32, X86Fmadd, v4f32, v8f32>;
- defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
- memopv8f32, X86Fmsub, v4f32, v8f32>;
+ defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", loadv4f32,
+ loadv8f32, X86Fmadd, v4f32, v8f32>;
+ defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", loadv4f32,
+ loadv8f32, X86Fmsub, v4f32, v8f32>;
defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
- memopv4f32, memopv8f32, X86Fmaddsub,
+ loadv4f32, loadv8f32, X86Fmaddsub,
v4f32, v8f32>;
defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
- memopv4f32, memopv8f32, X86Fmsubadd,
+ loadv4f32, loadv8f32, X86Fmsubadd,
v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
- defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64,
- memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
- defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64,
- memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
+ defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", loadv2f64,
+ loadv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
+ defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", loadv2f64,
+ loadv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
- memopv2f64, memopv4f64, X86Fmaddsub,
+ loadv2f64, loadv4f64, X86Fmaddsub,
v2f64, v4f64>, VEX_W;
defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
- memopv2f64, memopv4f64, X86Fmsubadd,
+ loadv2f64, loadv4f64, X86Fmsubadd,
v2f64, v4f64>, VEX_W;
}
// Fused Negative Multiply-Add
let ExeDomain = SSEPackedSingle in {
- defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32,
- memopv8f32, X86Fnmadd, v4f32, v8f32>;
- defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32,
- memopv8f32, X86Fnmsub, v4f32, v8f32>;
+ defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", loadv4f32,
+ loadv8f32, X86Fnmadd, v4f32, v8f32>;
+ defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", loadv4f32,
+ loadv8f32, X86Fnmsub, v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
- defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64,
- memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
+ defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", loadv2f64,
+ loadv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
- memopv2f64, memopv4f64, X86Fnmsub, v2f64,
+ loadv2f64, loadv4f64, X86Fnmsub, v2f64,
v4f64>, VEX_W;
}
@@ -206,25 +206,26 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, MemOp4;
+ (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG, MemOp4;
def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2,
- (mem_frag addr:$src3)))]>, VEX_W, MemOp4;
+ (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG, MemOp4;
def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>;
+ (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG;
// For disassembler
let isCodeGenOnly = 1, hasSideEffects = 0 in
def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
+ VEX_LIG;
}
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
@@ -235,19 +236,19 @@ multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
+ (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, VEX_LIG, MemOp4;
def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst, (Int VR128:$src1, VR128:$src2,
- mem_cpat:$src3))]>, VEX_W, MemOp4;
+ mem_cpat:$src3))]>, VEX_W, VEX_LIG, MemOp4;
def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
+ (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG;
}
multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -338,31 +339,31 @@ defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
let ExeDomain = SSEPackedSingle in {
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
- memopv4f32, memopv8f32>;
+ loadv4f32, loadv8f32>;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
- memopv4f32, memopv8f32>;
+ loadv4f32, loadv8f32>;
defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
- memopv4f32, memopv8f32>;
+ loadv4f32, loadv8f32>;
defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
- memopv4f32, memopv8f32>;
+ loadv4f32, loadv8f32>;
defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
- memopv4f32, memopv8f32>;
+ loadv4f32, loadv8f32>;
defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
- memopv4f32, memopv8f32>;
+ loadv4f32, loadv8f32>;
}
let ExeDomain = SSEPackedDouble in {
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
- memopv2f64, memopv4f64>;
+ loadv2f64, loadv4f64>;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
- memopv2f64, memopv4f64>;
+ loadv2f64, loadv4f64>;
defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
- memopv2f64, memopv4f64>;
+ loadv2f64, loadv4f64>;
defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
- memopv2f64, memopv4f64>;
+ loadv2f64, loadv4f64>;
defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
- memopv2f64, memopv4f64>;
+ loadv2f64, loadv4f64>;
defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
- memopv2f64, memopv4f64>;
+ loadv2f64, loadv4f64>;
}
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 64018b3..0fd9011 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -139,6 +139,7 @@ class T8XS { bits<5> Prefix = 18; }
class TAXD { bits<5> Prefix = 19; }
class XOP8 { bits<5> Prefix = 20; }
class XOP9 { bits<5> Prefix = 21; }
+class XOPA { bits<5> Prefix = 22; }
class VEX { bit hasVEXPrefix = 1; }
class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
@@ -339,10 +340,11 @@ def __xd : XD;
class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin> {
- let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512],
+ !if(hasVEXPrefix /* VEX */, [UseAVX],
!if(!eq(Prefix, __xs.Prefix), [UseSSE1],
!if(!eq(Prefix, __xd.Prefix), [UseSSE2],
- !if(hasOpSizePrefix, [UseSSE2], [UseSSE1]))));
+ !if(hasOpSizePrefix, [UseSSE2], [UseSSE1])))));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
@@ -352,8 +354,9 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin> {
- let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
- !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
+ let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512],
+ !if(hasVEXPrefix /* VEX */, [UseAVX],
+ !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
@@ -363,8 +366,9 @@ class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
InstrItinClass itin, Domain d>
: I<o, F, outs, ins, asm, pattern, itin, d> {
- let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
- !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]));
+ let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512],
+ !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
@@ -381,11 +385,12 @@ class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> patter
class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin, Domain d>
: Ii8<o, F, outs, ins, asm, pattern, itin, d> {
- let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
- !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]));
+ let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512],
+ !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])));
// AVX instructions have a 'v' prefix in the mnemonic
- let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
}
// SSE1 Instruction Templates:
@@ -460,7 +465,7 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
- Requires<[HasAVX]>;
+ Requires<[UseAVX]>;
class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
@@ -472,7 +477,7 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
class VS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, TB,
- OpSize, Requires<[HasAVX]>;
+ OpSize, Requires<[UseAVX]>;
class S2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB,
@@ -641,7 +646,7 @@ class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasAVX512]>;
class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
- : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>,
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TB,
Requires<[HasAVX512]>;
class AVX512PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
@@ -653,10 +658,10 @@ class AVX512PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasAVX512]>;
class AVX512PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d, InstrItinClass itin = NoItinerary>
- : Ii8<o, F, outs, ins, asm, pattern, itin, d>, Requires<[HasAVX512]>;
+ : Ii8<o, F, outs, ins, asm, pattern, itin, d>, TB, Requires<[HasAVX512]>;
class AVX512PI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d, InstrItinClass itin = NoItinerary>
- : I<o, F, outs, ins, asm, pattern, itin, d>, Requires<[HasAVX512]>;
+ : I<o, F, outs, ins, asm, pattern, itin, d>, TB, Requires<[HasAVX512]>;
class AVX512FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, T8,
@@ -667,7 +672,7 @@ class AVX512FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
// AES8I
// These use the same encoding as the SSE4.2 T8 and TA encodings.
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = NoItinerary>
+ list<dag>pattern, InstrItinClass itin = IIC_AES>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[HasAES]>;
@@ -767,6 +772,7 @@ class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
//
// MMXI - MMX instructions with TB prefix.
+// MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode.
// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
// MMX2I - MMX / SSE2 instructions with TB and OpSize prefixes.
// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
@@ -776,6 +782,9 @@ class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
+class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In32BitMode]>;
class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 0b51521..1fed424 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -105,6 +105,13 @@ def X86vsext : SDNode<"X86ISD::VSEXT",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>]>>;
+def X86vtrunc : SDNode<"X86ISD::VTRUNC",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>]>>;
+def X86vtruncm : SDNode<"X86ISD::VTRUNCM",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVec<2>, SDTCisInt<2>]>>;
def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>]>>;
@@ -118,6 +125,15 @@ def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
+def X86IntCmpMask : SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>]>;
+def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>;
+def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
+
+def X86CmpMaskCC : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
+def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
+
def X86vshl : SDNode<"X86ISD::VSHL",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisVec<2>]>>;
@@ -140,6 +156,9 @@ def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
+def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisVec<1>,
+ SDTCisSameAs<2, 1>]>>;
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
@@ -151,6 +170,8 @@ def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>;
+def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>;
def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>]>;
@@ -194,11 +215,14 @@ def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
+def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
+def X86Vinsert : SDNode<"X86ISD::VINSERT", SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
@@ -236,13 +260,13 @@ def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
def ssmem : Operand<v4f32> {
let PrintMethod = "printf32mem";
let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
+ let ParserMatchClass = X86Mem32AsmOperand;
let OperandType = "OPERAND_MEMORY";
}
def sdmem : Operand<v2f64> {
let PrintMethod = "printf64mem";
let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
+ let ParserMatchClass = X86Mem64AsmOperand;
let OperandType = "OPERAND_MEMORY";
}
@@ -262,9 +286,16 @@ def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
-// 128-/256-bit extload pattern fragments
+// 512-bit load pattern fragments
+def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
+def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
+def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>;
+def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
+
+// 128-/256-/512-bit extload pattern fragments
def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
+def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
@@ -278,6 +309,12 @@ def alignedstore256 : PatFrag<(ops node:$val, node:$ptr),
return cast<StoreSDNode>(N)->getAlignment() >= 32;
}]>;
+// Like 'store', but always requires 512-bit vector alignment.
+def alignedstore512 : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 64;
+}]>;
+
// Like 'load', but always requires 128-bit vector alignment.
def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 16;
@@ -293,6 +330,11 @@ def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 32;
}]>;
+// Like 'load', but always requires 512-bit vector alignment.
+def alignedload512 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 64;
+}]>;
+
def alignedloadfsf32 : PatFrag<(ops node:$ptr),
(f32 (alignedload node:$ptr))>;
def alignedloadfsf64 : PatFrag<(ops node:$ptr),
@@ -316,6 +358,16 @@ def alignedloadv4f64 : PatFrag<(ops node:$ptr),
def alignedloadv4i64 : PatFrag<(ops node:$ptr),
(v4i64 (alignedload256 node:$ptr))>;
+// 512-bit aligned load pattern fragments
+def alignedloadv16f32 : PatFrag<(ops node:$ptr),
+ (v16f32 (alignedload512 node:$ptr))>;
+def alignedloadv16i32 : PatFrag<(ops node:$ptr),
+ (v16i32 (alignedload512 node:$ptr))>;
+def alignedloadv8f64 : PatFrag<(ops node:$ptr),
+ (v8f64 (alignedload512 node:$ptr))>;
+def alignedloadv8i64 : PatFrag<(ops node:$ptr),
+ (v8i64 (alignedload512 node:$ptr))>;
+
// Like 'load', but uses special alignment checks suitable for use in
// memory operands in most SSE instructions, which are required to
// be naturally aligned on some targets but not on others. If the subtarget
@@ -327,6 +379,16 @@ def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|| cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
+def memop4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return Subtarget->hasVectorUAMem()
+ || cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def memop8 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return Subtarget->hasVectorUAMem()
+ || cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
@@ -342,6 +404,12 @@ def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+// 512-bit memop pattern fragments
+def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop4 node:$ptr))>;
+def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop8 node:$ptr))>;
+def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop4 node:$ptr))>;
+def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop8 node:$ptr))>;
+
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
// 16-byte boundary.
// FIXME: 8 byte alignment for mmx reads is not required
@@ -391,6 +459,10 @@ def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
+// 512-bit bitconvert pattern fragments
+def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>;
+def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
+
def vzmovl_v2i64 : PatFrag<(ops node:$src),
(bitconvert (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 0443a93..2461773 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -35,7 +36,7 @@
#include "llvm/Target/TargetOptions.h"
#include <limits>
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "X86GenInstrInfo.inc"
using namespace llvm;
@@ -81,6 +82,7 @@ enum {
TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
+ TB_ALIGN_64 = 64 << TB_ALIGN_SHIFT,
TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT
};
@@ -90,6 +92,9 @@ struct X86OpTblEntry {
uint16_t Flags;
};
+// Pin the vtable to this file.
+void X86InstrInfo::anchor() {}
+
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
? X86::ADJCALLSTACKDOWN64
@@ -298,8 +303,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
{ X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
{ X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE },
- { X86::FsMOVAPDrr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::FsMOVAPSrr, X86::MOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
{ X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD },
{ X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD },
@@ -356,8 +359,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
// AVX 128-bit versions of foldable instructions
{ X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE },
- { X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
@@ -374,7 +375,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
- { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }
+ { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE },
+ // AVX-512 foldable instructions
+ { X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
@@ -400,8 +403,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
{ X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
{ X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
- { X86::FsMOVAPDrr, X86::MOVSDrm, TB_NO_REVERSE },
- { X86::FsMOVAPSrr, X86::MOVSSrm, TB_NO_REVERSE },
{ X86::IMUL16rri, X86::IMUL16rmi, 0 },
{ X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
{ X86::IMUL32rri, X86::IMUL32rmi, 0 },
@@ -444,7 +445,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
{ X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 },
{ X86::MOVUPSrr, X86::MOVUPSrm, 0 },
- { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 },
{ X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
{ X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 },
{ X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
@@ -493,8 +493,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
{ X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
{ X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
- { X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE },
- { X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE },
{ X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
{ X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
{ X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
@@ -507,7 +505,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 },
{ X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
{ X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
- { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 },
{ X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
{ X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
{ X86::VPABSBrr128, X86::VPABSBrm128, 0 },
@@ -552,11 +549,27 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
- // BMI/BMI2/LZCNT/POPCNT foldable instructions
+ // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
{ X86::BEXTR32rr, X86::BEXTR32rm, 0 },
{ X86::BEXTR64rr, X86::BEXTR64rm, 0 },
+ { X86::BEXTRI32ri, X86::BEXTRI32mi, 0 },
+ { X86::BEXTRI64ri, X86::BEXTRI64mi, 0 },
+ { X86::BLCFILL32rr, X86::BLCFILL32rm, 0 },
+ { X86::BLCFILL64rr, X86::BLCFILL64rm, 0 },
+ { X86::BLCI32rr, X86::BLCI32rm, 0 },
+ { X86::BLCI64rr, X86::BLCI64rm, 0 },
+ { X86::BLCIC32rr, X86::BLCIC32rm, 0 },
+ { X86::BLCIC64rr, X86::BLCIC64rm, 0 },
+ { X86::BLCMSK32rr, X86::BLCMSK32rm, 0 },
+ { X86::BLCMSK64rr, X86::BLCMSK64rm, 0 },
+ { X86::BLCS32rr, X86::BLCS32rm, 0 },
+ { X86::BLCS64rr, X86::BLCS64rm, 0 },
+ { X86::BLSFILL32rr, X86::BLSFILL32rm, 0 },
+ { X86::BLSFILL64rr, X86::BLSFILL64rm, 0 },
{ X86::BLSI32rr, X86::BLSI32rm, 0 },
{ X86::BLSI64rr, X86::BLSI64rm, 0 },
+ { X86::BLSIC32rr, X86::BLSIC32rm, 0 },
+ { X86::BLSIC64rr, X86::BLSIC64rm, 0 },
{ X86::BLSMSK32rr, X86::BLSMSK32rm, 0 },
{ X86::BLSMSK64rr, X86::BLSMSK64rm, 0 },
{ X86::BLSR32rr, X86::BLSR32rm, 0 },
@@ -577,9 +590,27 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::SHRX64rr, X86::SHRX64rm, 0 },
{ X86::SHLX32rr, X86::SHLX32rm, 0 },
{ X86::SHLX64rr, X86::SHLX64rm, 0 },
+ { X86::T1MSKC32rr, X86::T1MSKC32rm, 0 },
+ { X86::T1MSKC64rr, X86::T1MSKC64rm, 0 },
{ X86::TZCNT16rr, X86::TZCNT16rm, 0 },
{ X86::TZCNT32rr, X86::TZCNT32rm, 0 },
{ X86::TZCNT64rr, X86::TZCNT64rm, 0 },
+ { X86::TZMSK32rr, X86::TZMSK32rm, 0 },
+ { X86::TZMSK64rr, X86::TZMSK64rm, 0 },
+
+ // AVX-512 foldable instructions
+ { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
+ { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
+ { X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 },
+ { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 },
+ { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 },
+ { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 },
+
+ // AES foldable instructions
+ { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 },
+ { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 },
+ { X86::VAESIMCrr, X86::VAESIMCrm, TB_ALIGN_16 },
+ { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -1177,6 +1208,52 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PDEP64rr, X86::PDEP64rm, 0 },
{ X86::PEXT32rr, X86::PEXT32rm, 0 },
{ X86::PEXT64rr, X86::PEXT64rm, 0 },
+
+ // AVX-512 foldable instructions
+ { X86::VPADDDZrr, X86::VPADDDZrm, 0 },
+ { X86::VPADDQZrr, X86::VPADDQZrm, 0 },
+ { X86::VADDPSZrr, X86::VADDPSZrm, 0 },
+ { X86::VADDPDZrr, X86::VADDPDZrm, 0 },
+ { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 },
+ { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 },
+ { X86::VMULPSZrr, X86::VMULPSZrm, 0 },
+ { X86::VMULPDZrr, X86::VMULPDZrm, 0 },
+ { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
+ { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
+ { X86::VMINPSZrr, X86::VMINPSZrm, 0 },
+ { X86::VMINPDZrr, X86::VMINPDZrm, 0 },
+ { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
+ { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
+ { X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
+ { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
+ { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
+ { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
+ { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
+ { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
+ { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
+ { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
+ { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
+ { X86::VALIGNQrri, X86::VALIGNQrmi, 0 },
+ { X86::VALIGNDrri, X86::VALIGNDrmi, 0 },
+
+ // AES foldable instructions
+ { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
+ { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 },
+ { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 },
+ { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 },
+ { X86::VAESDECLASTrr, X86::VAESDECLASTrm, TB_ALIGN_16 },
+ { X86::VAESDECrr, X86::VAESDECrm, TB_ALIGN_16 },
+ { X86::VAESENCLASTrr, X86::VAESENCLASTrm, TB_ALIGN_16 },
+ { X86::VAESENCrr, X86::VAESENCrm, TB_ALIGN_16 },
+
+ // SHA foldable instructions
+ { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 },
+ { X86::SHA1MSG2rr, X86::SHA1MSG2rm, TB_ALIGN_16 },
+ { X86::SHA1NEXTErr, X86::SHA1NEXTErm, TB_ALIGN_16 },
+ { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 },
+ { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 },
+ { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 },
+ { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
@@ -1338,6 +1415,11 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
{ X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
{ X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
+ // AVX-512 VPERMI instructions with 3 source operands.
+ { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
+ { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
+ { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
+ { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
@@ -1454,6 +1536,8 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::VMOVDQAYrm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
+ case X86::VMOVDQA32rm:
+ case X86::VMOVDQA64rm:
return true;
}
}
@@ -2890,23 +2974,29 @@ static bool isHReg(unsigned Reg) {
// Try and copy between VR128/VR64 and GR64 registers.
static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
- bool HasAVX) {
+ const X86Subtarget& Subtarget) {
+
+
// SrcReg(VR128) -> DestReg(GR64)
// SrcReg(VR64) -> DestReg(GR64)
// SrcReg(GR64) -> DestReg(VR128)
// SrcReg(GR64) -> DestReg(VR64)
+ bool HasAVX = Subtarget.hasAVX();
+ bool HasAVX512 = Subtarget.hasAVX512();
if (X86::GR64RegClass.contains(DestReg)) {
- if (X86::VR128RegClass.contains(SrcReg))
+ if (X86::VR128XRegClass.contains(SrcReg))
// Copy from a VR128 register to a GR64 register.
- return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr;
+ return HasAVX512 ? X86::VMOVPQIto64Zrr: (HasAVX ? X86::VMOVPQIto64rr :
+ X86::MOVPQIto64rr);
if (X86::VR64RegClass.contains(SrcReg))
// Copy from a VR64 register to a GR64 register.
return X86::MOVSDto64rr;
} else if (X86::GR64RegClass.contains(SrcReg)) {
// Copy from a GR64 register to a VR128 register.
- if (X86::VR128RegClass.contains(DestReg))
- return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr;
+ if (X86::VR128XRegClass.contains(DestReg))
+ return HasAVX512 ? X86::VMOV64toPQIZrr: (HasAVX ? X86::VMOV64toPQIrr :
+ X86::MOV64toPQIrr);
// Copy from a GR64 register to a VR64 register.
if (X86::VR64RegClass.contains(DestReg))
return X86::MOV64toSDrr;
@@ -2915,14 +3005,30 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
// SrcReg(FR32) -> DestReg(GR32)
// SrcReg(GR32) -> DestReg(FR32)
- if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg))
+ if (X86::GR32RegClass.contains(DestReg) && X86::FR32XRegClass.contains(SrcReg))
// Copy from a FR32 register to a GR32 register.
- return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
+ return HasAVX512 ? X86::VMOVSS2DIZrr : (HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr);
- if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
+ if (X86::FR32XRegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
// Copy from a GR32 register to a FR32 register.
- return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
+ return HasAVX512 ? X86::VMOVDI2SSZrr : (HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr);
+ return 0;
+}
+static
+unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
+ if (X86::VR128XRegClass.contains(DestReg, SrcReg) ||
+ X86::VR256XRegClass.contains(DestReg, SrcReg) ||
+ X86::VR512RegClass.contains(DestReg, SrcReg)) {
+ DestReg = get512BitSuperRegister(DestReg);
+ SrcReg = get512BitSuperRegister(SrcReg);
+ return X86::VMOVAPSZrr;
+ }
+ if ((X86::VK8RegClass.contains(DestReg) ||
+ X86::VK16RegClass.contains(DestReg)) &&
+ (X86::VK8RegClass.contains(SrcReg) ||
+ X86::VK16RegClass.contains(SrcReg)))
+ return X86::KMOVWkk;
return 0;
}
@@ -2932,7 +3038,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool KillSrc) const {
// First deal with the normal symmetric copies.
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
- unsigned Opc;
+ bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
+ unsigned Opc = 0;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
else if (X86::GR32RegClass.contains(DestReg, SrcReg))
@@ -2950,14 +3057,17 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
"8-bit H register can not be copied outside GR8_NOREX");
} else
Opc = X86::MOV8rr;
- } else if (X86::VR128RegClass.contains(DestReg, SrcReg))
+ }
+ else if (X86::VR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MMX_MOVQ64rr;
+ else if (HasAVX512)
+ Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg);
+ else if (X86::VR128RegClass.contains(DestReg, SrcReg))
Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
else if (X86::VR256RegClass.contains(DestReg, SrcReg))
Opc = X86::VMOVAPSYrr;
- else if (X86::VR64RegClass.contains(DestReg, SrcReg))
- Opc = X86::MMX_MOVQ64rr;
- else
- Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, HasAVX);
+ if (!Opc)
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, TM.getSubtarget<X86Subtarget>());
if (Opc) {
BuildMI(MBB, MI, DL, get(Opc), DestReg)
@@ -3005,6 +3115,18 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
bool isStackAligned,
const TargetMachine &TM,
bool load) {
+ if (TM.getSubtarget<X86Subtarget>().hasAVX512()) {
+ if (X86::VK8RegClass.hasSubClassEq(RC) ||
+ X86::VK16RegClass.hasSubClassEq(RC))
+ return load ? X86::KMOVWkm : X86::KMOVWmk;
+ if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC))
+ return load ? X86::VMOVSSZrm : X86::VMOVSSZmr;
+ if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC))
+ return load ? X86::VMOVSDZrm : X86::VMOVSDZmr;
+ if (X86::VR512RegClass.hasSubClassEq(RC))
+ return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
+ }
+
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
switch (RC->getSize()) {
default:
@@ -3046,7 +3168,8 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
return load ? X86::LD_Fp80m : X86::ST_FpP80m;
case 16: {
- assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
+ assert((X86::VR128RegClass.hasSubClassEq(RC) ||
+ X86::VR128XRegClass.hasSubClassEq(RC))&& "Unknown 16-byte regclass");
// If stack is realigned we can use aligned stores.
if (isStackAligned)
return load ?
@@ -3058,12 +3181,19 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
(HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
}
case 32:
- assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
+ assert((X86::VR256RegClass.hasSubClassEq(RC) ||
+ X86::VR256XRegClass.hasSubClassEq(RC)) && "Unknown 32-byte regclass");
// If stack is realigned we can use aligned stores.
if (isStackAligned)
return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
else
return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
+ case 64:
+ assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");
+ if (isStackAligned)
+ return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
+ else
+ return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
}
}
@@ -3090,7 +3220,7 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const MachineFunction &MF = *MBB.getParent();
assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
"Stack slot too small for store");
- unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
@@ -3106,7 +3236,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
bool isAligned = MMOBegin != MMOEnd &&
(*MMOBegin)->getAlignment() >= Alignment;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
@@ -3126,7 +3256,7 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
- unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
@@ -3140,7 +3270,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
bool isAligned = MMOBegin != MMOEnd &&
(*MMOBegin)->getAlignment() >= Alignment;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
@@ -3722,6 +3852,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case X86::AVX_SET0:
assert(HasAVX && "AVX not supported");
return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
+ case X86::AVX512_512_SET0:
+ return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
case X86::V_SETALLONES:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
@@ -3729,6 +3861,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
+ case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
+ case X86::KSET1B:
+ case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
}
return false;
}
@@ -3942,18 +4077,6 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
case X86::SQRTSSr_Int:
- // AVX encoded versions
- case X86::VCVTSD2SSrr:
- case X86::Int_VCVTSD2SSrr:
- case X86::VCVTSS2SDrr:
- case X86::Int_VCVTSS2SDrr:
- case X86::VRCPSSr:
- case X86::VROUNDSDr:
- case X86::VROUNDSDr_Int:
- case X86::VROUNDSSr:
- case X86::VROUNDSSr_Int:
- case X86::VRSQRTSSr:
- case X86::VSQRTSSr:
return true;
}
@@ -3985,10 +4108,77 @@ getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
return 16;
}
+// Return true for any instruction the copies the high bits of the first source
+// operand into the unused high bits of the destination operand.
+static bool hasUndefRegUpdate(unsigned Opcode) {
+ switch (Opcode) {
+ case X86::VCVTSI2SSrr:
+ case X86::Int_VCVTSI2SSrr:
+ case X86::VCVTSI2SS64rr:
+ case X86::Int_VCVTSI2SS64rr:
+ case X86::VCVTSI2SDrr:
+ case X86::Int_VCVTSI2SDrr:
+ case X86::VCVTSI2SD64rr:
+ case X86::Int_VCVTSI2SD64rr:
+ case X86::VCVTSD2SSrr:
+ case X86::Int_VCVTSD2SSrr:
+ case X86::VCVTSS2SDrr:
+ case X86::Int_VCVTSS2SDrr:
+ case X86::VRCPSSr:
+ case X86::VROUNDSDr:
+ case X86::VROUNDSDr_Int:
+ case X86::VROUNDSSr:
+ case X86::VROUNDSSr_Int:
+ case X86::VRSQRTSSr:
+ case X86::VSQRTSSr:
+
+ // AVX-512
+ case X86::VCVTSD2SSZrr:
+ case X86::VCVTSS2SDZrr:
+ return true;
+ }
+
+ return false;
+}
+
+/// Inform the ExeDepsFix pass how many idle instructions we would like before
+/// certain undef register reads.
+///
+/// This catches the VCVTSI2SD family of instructions:
+///
+/// vcvtsi2sdq %rax, %xmm0<undef>, %xmm14
+///
+/// We should to be careful *not* to catch VXOR idioms which are presumably
+/// handled specially in the pipeline:
+///
+/// vxorps %xmm1<undef>, %xmm1<undef>, %xmm1
+///
+/// Like getPartialRegUpdateClearance, this makes a strong assumption that the
+/// high bits that are passed-through are not live.
+unsigned X86InstrInfo::
+getUndefRegClearance(const MachineInstr *MI, unsigned &OpNum,
+ const TargetRegisterInfo *TRI) const {
+ if (!hasUndefRegUpdate(MI->getOpcode()))
+ return 0;
+
+ // Set the OpNum parameter to the first source operand.
+ OpNum = 1;
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.isUndef() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ // Use the same magic number as getPartialRegUpdateClearance.
+ return 16;
+ }
+ return 0;
+}
+
void X86InstrInfo::
breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const {
unsigned Reg = MI->getOperand(OpNum).getReg();
+ // If MI kills this register, the false dependence is already broken.
+ if (MI->killsRegister(Reg, TRI))
+ return;
if (X86::VR128RegClass.contains(Reg)) {
// These instructions are all floating point domain, so xorps is the best
// choice.
@@ -4008,10 +4198,75 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
MI->addRegisterKilled(Reg, TRI, true);
}
-MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+static MachineInstr* foldPatchpoint(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex,
+ const TargetInstrInfo &TII) {
+ unsigned StartIdx = 0;
+ switch (MI->getOpcode()) {
+ case TargetOpcode::STACKMAP:
+ StartIdx = 2; // Skip ID, nShadowBytes.
+ break;
+ case TargetOpcode::PATCHPOINT: {
+ // For PatchPoint, the call args are not foldable.
+ PatchPointOpers opers(MI);
+ StartIdx = opers.getVarIdx();
+ break;
+ }
+ default:
+ llvm_unreachable("unexpected stackmap opcode");
+ }
+
+ // Return false if any operands requested for folding are not foldable (not
+ // part of the stackmap's live values).
+ for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end();
+ I != E; ++I) {
+ if (*I < StartIdx)
+ return 0;
+ }
+
+ MachineInstr *NewMI =
+ MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(MF, NewMI);
+
+ // No need to fold return, the meta data, and function arguments
+ for (unsigned i = 0; i < StartIdx; ++i)
+ MIB.addOperand(MI->getOperand(i));
+
+ for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) {
+ assert(MO.getReg() && "patchpoint can only fold a vreg operand");
+ // Compute the spill slot size and offset.
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(MO.getReg());
+ unsigned SpillSize;
+ unsigned SpillOffset;
+ bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize,
+ SpillOffset, &MF.getTarget());
+ if (!Valid)
+ report_fatal_error("cannot spill patchpoint subregister operand");
+
+ MIB.addOperand(MachineOperand::CreateImm(StackMaps::IndirectMemRefOp));
+ MIB.addOperand(MachineOperand::CreateImm(SpillSize));
+ MIB.addOperand(MachineOperand::CreateFI(FrameIndex));
+ addOffset(MIB, SpillOffset);
+ }
+ else
+ MIB.addOperand(MO);
+ }
+ return NewMI;
+}
+
+MachineInstr*
+X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ // Special case stack map and patch point intrinsics.
+ if (MI->getOpcode() == TargetOpcode::STACKMAP
+ || MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+ return foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
+ }
// Check switch flag
if (NoFusing) return NULL;
@@ -4025,6 +4280,10 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
+ // If the function stack isn't realigned we don't want to fold instructions
+ // that need increased alignment.
+ if (!RI.needsStackRealignment(MF))
+ Alignment = std::min(Alignment, TM.getFrameLowering()->getStackAlignment());
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
unsigned RCSize = 0;
@@ -4054,6 +4313,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const {
+ // If loading from a FrameIndex, fold directly from the FrameIndex.
+ unsigned NumOps = LoadMI->getDesc().getNumOperands();
+ int FrameIndex;
+ if (isLoadFromStackSlot(LoadMI, FrameIndex))
+ return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+
// Check switch flag
if (NoFusing) return NULL;
@@ -4179,7 +4444,6 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return NULL;
// Folding a normal load. Just copy the load's address operands.
- unsigned NumOps = LoadMI->getDesc().getNumOperands();
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
MOs.push_back(LoadMI->getOperand(i));
break;
@@ -5001,6 +5265,37 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::VSQRTSSm:
case X86::VSQRTSSm_Int:
case X86::VSQRTSSr:
+ case X86::VSQRTPDZrm:
+ case X86::VSQRTPDZrr:
+ case X86::VSQRTPSZrm:
+ case X86::VSQRTPSZrr:
+ case X86::VSQRTSDZm:
+ case X86::VSQRTSDZm_Int:
+ case X86::VSQRTSDZr:
+ case X86::VSQRTSSZm_Int:
+ case X86::VSQRTSSZr:
+ case X86::VSQRTSSZm:
+ case X86::VDIVSDZrm:
+ case X86::VDIVSDZrr:
+ case X86::VDIVSSZrm:
+ case X86::VDIVSSZrr:
+
+ case X86::VGATHERQPSZrm:
+ case X86::VGATHERQPDZrm:
+ case X86::VGATHERDPDZrm:
+ case X86::VGATHERDPSZrm:
+ case X86::VPGATHERQDZrm:
+ case X86::VPGATHERQQZrm:
+ case X86::VPGATHERDDZrm:
+ case X86::VPGATHERDQZrm:
+ case X86::VSCATTERQPDZmr:
+ case X86::VSCATTERQPSZmr:
+ case X86::VSCATTERDPDZmr:
+ case X86::VSCATTERDPSZmr:
+ case X86::VPSCATTERQDZmr:
+ case X86::VPSCATTERQQZmr:
+ case X86::VPSCATTERDDZmr:
+ case X86::VPSCATTERDQZmr:
return true;
}
}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index a0d1ba7..600e392 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -152,6 +152,8 @@ class X86InstrInfo : public X86GenInstrInfo {
MemOp2RegOpTableType &M2RTable,
unsigned RegOp, unsigned MemOp, unsigned Flags);
+ virtual void anchor();
+
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -369,6 +371,8 @@ public:
unsigned getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const;
+ unsigned getUndefRegClearance(const MachineInstr *MI, unsigned &OpNum,
+ const TargetRegisterInfo *TRI) const;
void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 0960a2a..6e5d543 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -248,11 +248,12 @@ def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
-def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>;
def X86blsi : SDNode<"X86ISD::BLSI", SDTIntUnaryOp>;
def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>;
def X86blsr : SDNode<"X86ISD::BLSR", SDTIntUnaryOp>;
+def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntShiftOp>;
+def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
@@ -278,53 +279,52 @@ def ptr_rc_nosp : PointerLikeRegClass<1>;
// *mem - Operand definitions for the funky X86 addressing mode operands.
//
-def X86MemAsmOperand : AsmOperandClass {
- let Name = "Mem"; let PredicateMethod = "isMem";
+def X86MemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
}
-def X86Mem8AsmOperand : AsmOperandClass {
- let Name = "Mem8"; let PredicateMethod = "isMem8";
+def X86Mem8AsmOperand : AsmOperandClass {
+ let Name = "Mem8"; let RenderMethod = "addMemOperands";
}
-def X86Mem16AsmOperand : AsmOperandClass {
- let Name = "Mem16"; let PredicateMethod = "isMem16";
+def X86Mem16AsmOperand : AsmOperandClass {
+ let Name = "Mem16"; let RenderMethod = "addMemOperands";
}
-def X86Mem32AsmOperand : AsmOperandClass {
- let Name = "Mem32"; let PredicateMethod = "isMem32";
+def X86Mem32AsmOperand : AsmOperandClass {
+ let Name = "Mem32"; let RenderMethod = "addMemOperands";
}
-def X86Mem64AsmOperand : AsmOperandClass {
- let Name = "Mem64"; let PredicateMethod = "isMem64";
+def X86Mem64AsmOperand : AsmOperandClass {
+ let Name = "Mem64"; let RenderMethod = "addMemOperands";
}
-def X86Mem80AsmOperand : AsmOperandClass {
- let Name = "Mem80"; let PredicateMethod = "isMem80";
+def X86Mem80AsmOperand : AsmOperandClass {
+ let Name = "Mem80"; let RenderMethod = "addMemOperands";
}
-def X86Mem128AsmOperand : AsmOperandClass {
- let Name = "Mem128"; let PredicateMethod = "isMem128";
+def X86Mem128AsmOperand : AsmOperandClass {
+ let Name = "Mem128"; let RenderMethod = "addMemOperands";
}
-def X86Mem256AsmOperand : AsmOperandClass {
- let Name = "Mem256"; let PredicateMethod = "isMem256";
+def X86Mem256AsmOperand : AsmOperandClass {
+ let Name = "Mem256"; let RenderMethod = "addMemOperands";
+}
+def X86Mem512AsmOperand : AsmOperandClass {
+ let Name = "Mem512"; let RenderMethod = "addMemOperands";
}
// Gather mem operands
def X86MemVX32Operand : AsmOperandClass {
- let Name = "MemVX32"; let PredicateMethod = "isMemVX32";
+ let Name = "MemVX32"; let RenderMethod = "addMemOperands";
}
def X86MemVY32Operand : AsmOperandClass {
- let Name = "MemVY32"; let PredicateMethod = "isMemVY32";
+ let Name = "MemVY32"; let RenderMethod = "addMemOperands";
+}
+def X86MemVZ32Operand : AsmOperandClass {
+ let Name = "MemVZ32"; let RenderMethod = "addMemOperands";
}
def X86MemVX64Operand : AsmOperandClass {
- let Name = "MemVX64"; let PredicateMethod = "isMemVX64";
+ let Name = "MemVX64"; let RenderMethod = "addMemOperands";
}
def X86MemVY64Operand : AsmOperandClass {
- let Name = "MemVY64"; let PredicateMethod = "isMemVY64";
+ let Name = "MemVY64"; let RenderMethod = "addMemOperands";
}
-
def X86MemVZ64Operand : AsmOperandClass {
- let Name = "MemVZ64"; let PredicateMethod = "isMemVZ64";
-}
-def X86MemVZ32Operand : AsmOperandClass {
- let Name = "MemVZ32"; let PredicateMethod = "isMemVZ32";
-}
-def X86Mem512AsmOperand : AsmOperandClass {
- let Name = "Mem512"; let PredicateMethod = "isMem512";
+ let Name = "MemVZ64"; let RenderMethod = "addMemOperands";
}
def X86AbsMemAsmOperand : AsmOperandClass {
@@ -343,29 +343,29 @@ def opaque48mem : X86MemOperand<"printopaquemem">;
def opaque80mem : X86MemOperand<"printopaquemem">;
def opaque512mem : X86MemOperand<"printopaquemem">;
-def i8mem : X86MemOperand<"printi8mem"> {
+def i8mem : X86MemOperand<"printi8mem"> {
let ParserMatchClass = X86Mem8AsmOperand; }
-def i16mem : X86MemOperand<"printi16mem"> {
+def i16mem : X86MemOperand<"printi16mem"> {
let ParserMatchClass = X86Mem16AsmOperand; }
-def i32mem : X86MemOperand<"printi32mem"> {
+def i32mem : X86MemOperand<"printi32mem"> {
let ParserMatchClass = X86Mem32AsmOperand; }
-def i64mem : X86MemOperand<"printi64mem"> {
+def i64mem : X86MemOperand<"printi64mem"> {
let ParserMatchClass = X86Mem64AsmOperand; }
-def i128mem : X86MemOperand<"printi128mem"> {
+def i128mem : X86MemOperand<"printi128mem"> {
let ParserMatchClass = X86Mem128AsmOperand; }
-def i256mem : X86MemOperand<"printi256mem"> {
+def i256mem : X86MemOperand<"printi256mem"> {
let ParserMatchClass = X86Mem256AsmOperand; }
-def i512mem : X86MemOperand<"printi512mem"> {
+def i512mem : X86MemOperand<"printi512mem"> {
let ParserMatchClass = X86Mem512AsmOperand; }
-def f32mem : X86MemOperand<"printf32mem"> {
+def f32mem : X86MemOperand<"printf32mem"> {
let ParserMatchClass = X86Mem32AsmOperand; }
-def f64mem : X86MemOperand<"printf64mem"> {
+def f64mem : X86MemOperand<"printf64mem"> {
let ParserMatchClass = X86Mem64AsmOperand; }
-def f80mem : X86MemOperand<"printf80mem"> {
+def f80mem : X86MemOperand<"printf80mem"> {
let ParserMatchClass = X86Mem80AsmOperand; }
-def f128mem : X86MemOperand<"printf128mem"> {
+def f128mem : X86MemOperand<"printf128mem"> {
let ParserMatchClass = X86Mem128AsmOperand; }
-def f256mem : X86MemOperand<"printf256mem">{
+def f256mem : X86MemOperand<"printf256mem">{
let ParserMatchClass = X86Mem256AsmOperand; }
def f512mem : X86MemOperand<"printf512mem">{
let ParserMatchClass = X86Mem512AsmOperand; }
@@ -439,17 +439,49 @@ let OperandType = "OPERAND_PCREL",
def i32imm_pcrel : Operand<i32>;
def i16imm_pcrel : Operand<i16>;
-def offset8 : Operand<i64>;
-def offset16 : Operand<i64>;
-def offset32 : Operand<i64>;
-def offset64 : Operand<i64>;
-
// Branch targets have OtherVT type and print as pc-relative values.
def brtarget : Operand<OtherVT>;
def brtarget8 : Operand<OtherVT>;
}
+def X86MemOffs8AsmOperand : AsmOperandClass {
+ let Name = "MemOffs8";
+ let RenderMethod = "addMemOffsOperands";
+ let SuperClasses = [X86Mem8AsmOperand];
+}
+def X86MemOffs16AsmOperand : AsmOperandClass {
+ let Name = "MemOffs16";
+ let RenderMethod = "addMemOffsOperands";
+ let SuperClasses = [X86Mem16AsmOperand];
+}
+def X86MemOffs32AsmOperand : AsmOperandClass {
+ let Name = "MemOffs32";
+ let RenderMethod = "addMemOffsOperands";
+ let SuperClasses = [X86Mem32AsmOperand];
+}
+def X86MemOffs64AsmOperand : AsmOperandClass {
+ let Name = "MemOffs64";
+ let RenderMethod = "addMemOffsOperands";
+ let SuperClasses = [X86Mem64AsmOperand];
+}
+
+let OperandType = "OPERAND_MEMORY" in {
+def offset8 : Operand<i64> {
+ let ParserMatchClass = X86MemOffs8AsmOperand;
+ let PrintMethod = "printMemOffs8"; }
+def offset16 : Operand<i64> {
+ let ParserMatchClass = X86MemOffs16AsmOperand;
+ let PrintMethod = "printMemOffs16"; }
+def offset32 : Operand<i64> {
+ let ParserMatchClass = X86MemOffs32AsmOperand;
+ let PrintMethod = "printMemOffs32"; }
+def offset64 : Operand<i64> {
+ let ParserMatchClass = X86MemOffs64AsmOperand;
+ let PrintMethod = "printMemOffs64"; }
+}
+
+
def SSECC : Operand<i8> {
let PrintMethod = "printSSECC";
let OperandType = "OPERAND_IMMEDIATE";
@@ -470,6 +502,14 @@ class ImmZExtAsmOperandClass : AsmOperandClass {
let RenderMethod = "addImmOperands";
}
+def X86GR32orGR64AsmOperand : AsmOperandClass {
+ let Name = "GR32orGR64";
+}
+
+def GR32orGR64 : RegisterOperand<GR32> {
+ let ParserMatchClass = X86GR32orGR64AsmOperand;
+}
+
// Sign-extended immediate classes. We don't need to define the full lattice
// here because there is no instruction with an ambiguity between ImmSExti64i32
// and ImmSExti32i8.
@@ -617,13 +657,13 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
-def HasAVX512 : Predicate<"Subtarget->hasAVX512()">;
+def HasAVX512 : Predicate<"Subtarget->hasAVX512()">;
def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
def HasCDI : Predicate<"Subtarget->hasCDI()">;
def HasPFI : Predicate<"Subtarget->hasPFI()">;
-def HasEMI : Predicate<"Subtarget->hasERI()">;
+def HasERI : Predicate<"Subtarget->hasERI()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
@@ -632,6 +672,7 @@ def HasFMA : Predicate<"Subtarget->hasFMA()">;
def UseFMAOnAVX : Predicate<"Subtarget->hasFMA() && !Subtarget->hasAVX512()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
def HasXOP : Predicate<"Subtarget->hasXOP()">;
+def HasTBM : Predicate<"Subtarget->hasTBM()">;
def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
def HasF16C : Predicate<"Subtarget->hasF16C()">;
@@ -643,9 +684,10 @@ def HasRTM : Predicate<"Subtarget->hasRTM()">;
def HasHLE : Predicate<"Subtarget->hasHLE()">;
def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
def HasADX : Predicate<"Subtarget->hasADX()">;
+def HasSHA : Predicate<"Subtarget->hasSHA()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
-def HasPrefetchW : Predicate<"Subtarget->has3DNow() || Subtarget->hasPRFCHW()">;
+def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
@@ -944,53 +986,56 @@ let Defs = [EFLAGS] in {
def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))],
- IIC_BSF>, TB, OpSize, Sched<[WriteShift]>;
+ IIC_BIT_SCAN_REG>, TB, OpSize, Sched<[WriteShift]>;
def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))],
- IIC_BSF>, TB, OpSize, Sched<[WriteShiftLd]>;
+ IIC_BIT_SCAN_MEM>, TB, OpSize, Sched<[WriteShiftLd]>;
def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB,
+ [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))],
+ IIC_BIT_SCAN_REG>, TB,
Sched<[WriteShift]>;
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))],
- IIC_BSF>, TB, Sched<[WriteShiftLd]>;
+ IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>;
def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))],
- IIC_BSF>, TB, Sched<[WriteShift]>;
+ IIC_BIT_SCAN_REG>, TB, Sched<[WriteShift]>;
def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))],
- IIC_BSF>, TB, Sched<[WriteShiftLd]>;
+ IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>;
def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>,
+ [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))],
+ IIC_BIT_SCAN_REG>,
TB, OpSize, Sched<[WriteShift]>;
def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))],
- IIC_BSR>, TB,
+ IIC_BIT_SCAN_MEM>, TB,
OpSize, Sched<[WriteShiftLd]>;
def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB,
+ [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))],
+ IIC_BIT_SCAN_REG>, TB,
Sched<[WriteShift]>;
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))],
- IIC_BSR>, TB, Sched<[WriteShiftLd]>;
+ IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>;
def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB,
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BIT_SCAN_REG>, TB,
Sched<[WriteShift]>;
def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))],
- IIC_BSR>, TB, Sched<[WriteShiftLd]>;
+ IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>;
} // Defs = [EFLAGS]
let SchedRW = [WriteMicrocoded] in {
@@ -1072,9 +1117,12 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
[(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
} // SchedRW
+let hasSideEffects = 0 in {
+
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
let SchedRW = [WriteALU] in {
+let mayLoad = 1 in {
def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
@@ -1084,6 +1132,8 @@ def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
"mov{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
+}
+let mayStore = 1 in {
def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
"mov{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
@@ -1094,34 +1144,40 @@ def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
"mov{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
}
+}
// These forms all have full 64-bit absolute addresses in their instructions
// and use the movabs mnemonic to indicate this specific form.
-def MOV64o8a : RIi64_NOREX<0xA0, RawFrm, (outs), (ins offset64:$src),
+let mayLoad = 1 in {
+def MOV64o8a : RIi64_NOREX<0xA0, RawFrm, (outs), (ins offset8:$src),
"movabs{b}\t{$src, %al|al, $src}", []>,
Requires<[In64BitMode]>;
-def MOV64o16a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset64:$src),
+def MOV64o16a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset16:$src),
"movabs{w}\t{$src, %ax|ax, $src}", []>, OpSize,
Requires<[In64BitMode]>;
-def MOV64o32a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset64:$src),
+def MOV64o32a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset32:$src),
"movabs{l}\t{$src, %eax|eax, $src}", []>,
Requires<[In64BitMode]>;
def MOV64o64a : RIi64<0xA1, RawFrm, (outs), (ins offset64:$src),
"movabs{q}\t{$src, %rax|rax, $src}", []>,
Requires<[In64BitMode]>;
+}
-def MOV64ao8 : RIi64_NOREX<0xA2, RawFrm, (outs offset64:$dst), (ins),
+let mayStore = 1 in {
+def MOV64ao8 : RIi64_NOREX<0xA2, RawFrm, (outs offset8:$dst), (ins),
"movabs{b}\t{%al, $dst|$dst, al}", []>,
Requires<[In64BitMode]>;
-def MOV64ao16 : RIi64_NOREX<0xA3, RawFrm, (outs offset64:$dst), (ins),
+def MOV64ao16 : RIi64_NOREX<0xA3, RawFrm, (outs offset16:$dst), (ins),
"movabs{w}\t{%ax, $dst|$dst, ax}", []>, OpSize,
Requires<[In64BitMode]>;
-def MOV64ao32 : RIi64_NOREX<0xA3, RawFrm, (outs offset64:$dst), (ins),
+def MOV64ao32 : RIi64_NOREX<0xA3, RawFrm, (outs offset32:$dst), (ins),
"movabs{l}\t{%eax, $dst|$dst, eax}", []>,
Requires<[In64BitMode]>;
def MOV64ao64 : RIi64<0xA3, RawFrm, (outs offset64:$dst), (ins),
"movabs{q}\t{%rax, $dst|$dst, rax}", []>,
Requires<[In64BitMode]>;
+}
+} // hasSideEffects = 0
let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
@@ -1173,7 +1229,7 @@ def MOV8rr_NOREX : I<0x88, MRMDestReg,
(outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>,
Sched<[WriteMove]>;
-let mayStore = 1 in
+let mayStore = 1, neverHasSideEffects = 1 in
def MOV8mr_NOREX : I<0x88, MRMDestMem,
(outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
@@ -1814,6 +1870,30 @@ let Predicates = [HasBMI2], Defs = [EFLAGS] in {
int_x86_bmi_bzhi_64, loadi64>, VEX_W;
}
+def : Pat<(X86bzhi GR32:$src1, GR8:$src2),
+ (BZHI32rr GR32:$src1,
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+def : Pat<(X86bzhi (loadi32 addr:$src1), GR8:$src2),
+ (BZHI32rm addr:$src1,
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+def : Pat<(X86bzhi GR64:$src1, GR8:$src2),
+ (BZHI64rr GR64:$src1,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+def : Pat<(X86bzhi (loadi64 addr:$src1), GR8:$src2),
+ (BZHI64rm addr:$src1,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+let Predicates = [HasBMI] in {
+ def : Pat<(X86bextr GR32:$src1, GR32:$src2),
+ (BEXTR32rr GR32:$src1, GR32:$src2)>;
+ def : Pat<(X86bextr (loadi32 addr:$src1), GR32:$src2),
+ (BEXTR32rm addr:$src1, GR32:$src2)>;
+ def : Pat<(X86bextr GR64:$src1, GR64:$src2),
+ (BEXTR64rr GR64:$src1, GR64:$src2)>;
+ def : Pat<(X86bextr (loadi64 addr:$src1), GR64:$src2),
+ (BEXTR64rm addr:$src1, GR64:$src2)>;
+} // HasBMI
+
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
X86MemOperand x86memop, Intrinsic Int,
PatFrag ld_frag> {
@@ -1838,6 +1918,134 @@ let Predicates = [HasBMI2] in {
}
//===----------------------------------------------------------------------===//
+// TBM Instructions
+//
+let Predicates = [HasTBM], Defs = [EFLAGS] in {
+
+multiclass tbm_ternary_imm_intr<bits<8> opc, RegisterClass RC, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ Intrinsic Int, Operand immtype,
+ SDPatternOperator immoperator> {
+ def ri : Ii32<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, immtype:$cntl),
+ !strconcat(OpcodeStr,
+ "\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
+ [(set RC:$dst, (Int RC:$src1, immoperator:$cntl))]>,
+ XOP, XOPA, VEX;
+ def mi : Ii32<opc, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop:$src1, immtype:$cntl),
+ !strconcat(OpcodeStr,
+ "\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
+ [(set RC:$dst, (Int (ld_frag addr:$src1), immoperator:$cntl))]>,
+ XOP, XOPA, VEX;
+}
+
+defm BEXTRI32 : tbm_ternary_imm_intr<0x10, GR32, "bextr", i32mem, loadi32,
+ int_x86_tbm_bextri_u32, i32imm, imm>;
+defm BEXTRI64 : tbm_ternary_imm_intr<0x10, GR64, "bextr", i64mem, loadi64,
+ int_x86_tbm_bextri_u64, i64i32imm,
+ i64immSExt32>, VEX_W;
+
+multiclass tbm_binary_rm<bits<8> opc, Format FormReg, Format FormMem,
+ RegisterClass RC, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag> {
+let hasSideEffects = 0 in {
+ def rr : I<opc, FormReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
+ []>, XOP, XOP9, VEX_4V;
+ let mayLoad = 1 in
+ def rm : I<opc, FormMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
+ []>, XOP, XOP9, VEX_4V;
+}
+}
+
+multiclass tbm_binary_intr<bits<8> opc, string OpcodeStr,
+ Format FormReg, Format FormMem> {
+ defm NAME#32 : tbm_binary_rm<opc, FormReg, FormMem, GR32, OpcodeStr, i32mem,
+ loadi32>;
+ defm NAME#64 : tbm_binary_rm<opc, FormReg, FormMem, GR64, OpcodeStr, i64mem,
+ loadi64>, VEX_W;
+}
+
+defm BLCFILL : tbm_binary_intr<0x01, "blcfill", MRM1r, MRM1m>;
+defm BLCI : tbm_binary_intr<0x02, "blci", MRM6r, MRM6m>;
+defm BLCIC : tbm_binary_intr<0x01, "blcic", MRM5r, MRM5m>;
+defm BLCMSK : tbm_binary_intr<0x02, "blcmsk", MRM1r, MRM1m>;
+defm BLCS : tbm_binary_intr<0x01, "blcs", MRM3r, MRM3m>;
+defm BLSFILL : tbm_binary_intr<0x01, "blsfill", MRM2r, MRM2m>;
+defm BLSIC : tbm_binary_intr<0x01, "blsic", MRM6r, MRM6m>;
+defm T1MSKC : tbm_binary_intr<0x01, "t1mskc", MRM7r, MRM7m>;
+defm TZMSK : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m>;
+} // HasTBM, EFLAGS
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments to auto generate TBM instructions.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasTBM] in {
+ def : Pat<(X86bextr GR32:$src1, (i32 imm:$src2)),
+ (BEXTRI32ri GR32:$src1, imm:$src2)>;
+ def : Pat<(X86bextr (loadi32 addr:$src1), (i32 imm:$src2)),
+ (BEXTRI32mi addr:$src1, imm:$src2)>;
+ def : Pat<(X86bextr GR64:$src1, i64immSExt32:$src2),
+ (BEXTRI64ri GR64:$src1, i64immSExt32:$src2)>;
+ def : Pat<(X86bextr (loadi64 addr:$src1), i64immSExt32:$src2),
+ (BEXTRI64mi addr:$src1, i64immSExt32:$src2)>;
+
+ // FIXME: patterns for the load versions are not implemented
+ def : Pat<(and GR32:$src, (add GR32:$src, 1)),
+ (BLCFILL32rr GR32:$src)>;
+ def : Pat<(and GR64:$src, (add GR64:$src, 1)),
+ (BLCFILL64rr GR64:$src)>;
+
+ def : Pat<(or GR32:$src, (not (add GR32:$src, 1))),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or GR64:$src, (not (add GR64:$src, 1))),
+ (BLCI64rr GR64:$src)>;
+
+ // Extra patterns because opt can optimize the above patterns to this.
+ def : Pat<(or GR32:$src, (sub -2, GR32:$src)),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or GR64:$src, (sub -2, GR64:$src)),
+ (BLCI64rr GR64:$src)>;
+
+ def : Pat<(and (not GR32:$src), (add GR32:$src, 1)),
+ (BLCIC32rr GR32:$src)>;
+ def : Pat<(and (not GR64:$src), (add GR64:$src, 1)),
+ (BLCIC64rr GR64:$src)>;
+
+ def : Pat<(xor GR32:$src, (add GR32:$src, 1)),
+ (BLCMSK32rr GR32:$src)>;
+ def : Pat<(xor GR64:$src, (add GR64:$src, 1)),
+ (BLCMSK64rr GR64:$src)>;
+
+ def : Pat<(or GR32:$src, (add GR32:$src, 1)),
+ (BLCS32rr GR32:$src)>;
+ def : Pat<(or GR64:$src, (add GR64:$src, 1)),
+ (BLCS64rr GR64:$src)>;
+
+ def : Pat<(or GR32:$src, (add GR32:$src, -1)),
+ (BLSFILL32rr GR32:$src)>;
+ def : Pat<(or GR64:$src, (add GR64:$src, -1)),
+ (BLSFILL64rr GR64:$src)>;
+
+ def : Pat<(or (not GR32:$src), (add GR32:$src, -1)),
+ (BLSIC32rr GR32:$src)>;
+ def : Pat<(or (not GR64:$src), (add GR64:$src, -1)),
+ (BLSIC64rr GR64:$src)>;
+
+ def : Pat<(or (not GR32:$src), (add GR32:$src, 1)),
+ (T1MSKC32rr GR32:$src)>;
+ def : Pat<(or (not GR64:$src), (add GR64:$src, 1)),
+ (T1MSKC64rr GR64:$src)>;
+
+ def : Pat<(and (not GR32:$src), (add GR32:$src, -1)),
+ (TZMSK32rr GR32:$src)>;
+ def : Pat<(and (not GR64:$src), (add GR64:$src, -1)),
+ (TZMSK64rr GR64:$src)>;
+} // HasTBM
+
+//===----------------------------------------------------------------------===//
// Subsystems.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index cb12956..ba58143 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -204,7 +204,7 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
//===----------------------------------------------------------------------===//
def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms",
- [(int_x86_mmx_emms)]>;
+ [(int_x86_mmx_emms)], IIC_MMX_EMMS>;
//===----------------------------------------------------------------------===//
// MMX Scalar Instructions
@@ -236,10 +236,10 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
(MMX_X86movd2w (x86mmx VR64:$src)))],
IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>;
-let neverHasSideEffects = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
- [], IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
+ [(set VR64:$dst, (bitconvert GR64:$src))],
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
// These are 64 bit moves, but since the OS X assembler doesn't
// recognize a register-register movq, we write them as
@@ -250,10 +250,6 @@ def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
"movd\t{$src, $dst|$dst, $src}",
[(set GR64:$dst,
(bitconvert VR64:$src))], IIC_MMX_MOV_REG_MM>;
-def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (bitconvert GR64:$src))], IIC_MMX_MOV_MM_RM>;
let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", [],
@@ -289,7 +285,7 @@ def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
(i64 (bitconvert (x86mmx VR64:$src))))))],
IIC_MMX_MOVQ_RR>;
-let neverHasSideEffects = 1 in
+let isCodeGenOnly = 1, hasSideEffects = 1 in {
def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
[], IIC_MMX_MOVQ_RR>;
@@ -297,6 +293,7 @@ def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[], IIC_MMX_MOVQ_RR>;
+}
} // SchedRW
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
@@ -304,21 +301,15 @@ def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)],
IIC_MMX_MOVQ_RM>, Sched<[WriteStore]>;
-let AddedComplexity = 15 in
-// movd to MMX register zero-extends
-def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))],
- IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
-let AddedComplexity = 20 in
-def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
- (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (x86mmx (X86vzmovl (x86mmx
- (scalar_to_vector (loadi32 addr:$src))))))],
- IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
+let Predicates = [HasMMX] in {
+ let AddedComplexity = 15 in
+ // movd to MMX register zero-extends
+ def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))),
+ (MMX_MOVD64rr GR32:$src)>;
+ let AddedComplexity = 20 in
+ def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
+ (MMX_MOVD64rm addr:$src)>;
+}
// Arithmetic Instructions
defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
@@ -555,18 +546,18 @@ let Constraints = "$src1 = $dst" in {
// Extract / Insert
def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
- (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
- "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
- (iPTR imm:$src2)))],
- IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
+ (outs GR32orGR64:$dst), (ins VR64:$src1, i32i8imm:$src2),
+ "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1,
+ (iPTR imm:$src2)))],
+ IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
let Constraints = "$src1 = $dst" in {
def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
(outs VR64:$dst),
- (ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
+ (ins VR64:$src1, GR32orGR64:$src2, i32i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
- GR32:$src2, (iPTR imm:$src3)))],
+ GR32orGR64:$src2, (iPTR imm:$src3)))],
IIC_MMX_PINSRW>, Sched<[WriteShuffle]>;
def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
@@ -580,9 +571,10 @@ let Constraints = "$src1 = $dst" in {
}
// Mask creation
-def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
+ (ins VR64:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst,
+ [(set GR32orGR64:$dst,
(int_x86_mmx_pmovmskb VR64:$src))]>;
@@ -599,10 +591,10 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
// Misc.
let SchedRW = [WriteShuffle] in {
let Uses = [EDI] in
-def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
- "maskmovq\t{$mask, $src|$src, $mask}",
- [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)],
- IIC_MMX_MASKMOV>;
+def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)],
+ IIC_MMX_MASKMOV>;
let Uses = [RDI] in
def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index a86006a..a5debc0 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -151,6 +151,34 @@ def SSE_MOVU_ITINS : OpndItins<
IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM
>;
+def SSE_DPPD_ITINS : OpndItins<
+ IIC_SSE_DPPD_RR, IIC_SSE_DPPD_RM
+>;
+
+def SSE_DPPS_ITINS : OpndItins<
+ IIC_SSE_DPPS_RR, IIC_SSE_DPPD_RM
+>;
+
+def DEFAULT_ITINS : OpndItins<
+ IIC_ALU_NONMEM, IIC_ALU_MEM
+>;
+
+def SSE_EXTRACT_ITINS : OpndItins<
+ IIC_SSE_EXTRACTPS_RR, IIC_SSE_EXTRACTPS_RM
+>;
+
+def SSE_INSERT_ITINS : OpndItins<
+ IIC_SSE_INSERTPS_RR, IIC_SSE_INSERTPS_RM
+>;
+
+def SSE_MPSADBW_ITINS : OpndItins<
+ IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM
+>;
+
+def SSE_PMULLD_ITINS : OpndItins<
+ IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM
+>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//
@@ -455,10 +483,10 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// SSE 1 & 2 - Move FP Scalar Instructions
//
// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
-// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
-// is used instead. Register-to-register movss/movsd is not modeled as an
-// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
-// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+// register copies because it's a partial register update; Register-to-register
+// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires
+// that the insert be implementable in terms of a copy, and just mentioned, we
+// don't use movss/movsd for copies.
//===----------------------------------------------------------------------===//
multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
@@ -526,7 +554,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
}
// Patterns
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
let AddedComplexity = 15 in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVS{S,D} to the lower bits.
@@ -1074,23 +1102,6 @@ let Predicates = [UseSSE1] in {
(MOVUPSmr addr:$dst, VR128:$src)>;
}
-// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
-// bits are disregarded. FIXME: Set encoding to pseudo!
-let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
-def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX;
-def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX;
-def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>;
-def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>;
-}
-
// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
@@ -1103,15 +1114,15 @@ let isCodeGenOnly = 1 in {
"movapd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (alignedloadfsf64 addr:$src))],
IIC_SSE_MOVA_P_RM>, VEX;
+ def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
+ IIC_SSE_MOVA_P_RM>;
+ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
+ IIC_SSE_MOVA_P_RM>;
}
-def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
- IIC_SSE_MOVA_P_RM>;
-def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
- IIC_SSE_MOVA_P_RM>;
}
//===----------------------------------------------------------------------===//
@@ -1327,7 +1338,7 @@ let Predicates = [UseSSE2] in {
// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
//===----------------------------------------------------------------------===//
-let AddedComplexity = 20 in {
+let AddedComplexity = 20, Predicates = [UseAVX] in {
def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1358,7 +1369,7 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
// MOVLHPS patterns
def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
(VMOVLHPSrr VR128:$src1, VR128:$src2)>;
@@ -1440,7 +1451,7 @@ let neverHasSideEffects = 1 in {
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, Predicates = [UseAVX] in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Sched<[WriteCvtI2F]>;
@@ -1452,6 +1463,7 @@ let neverHasSideEffects = 1 in {
} // neverHasSideEffects = 1
}
+let Predicates = [UseAVX] in {
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_32>,
@@ -1485,7 +1497,7 @@ def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
-
+}
// The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly
@@ -1499,12 +1511,12 @@ defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
XD, VEX_4V, VEX_W, VEX_LIG;
-def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+let Predicates = [UseAVX] in {
+ def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>;
-def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
-let Predicates = [HasAVX] in {
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
@@ -1606,19 +1618,21 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
+let Predicates = [UseAVX] in {
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si",
SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si",
SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
-
+}
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W;
+let Predicates = [UseAVX] in {
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
SSE_CVT_Scalar, 0>, XS, VEX_4V;
@@ -1633,7 +1647,7 @@ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
SSE_CVT_Scalar, 0>, XD,
VEX_4V, VEX_W;
-
+}
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32,
@@ -1652,6 +1666,7 @@ let Constraints = "$src1 = $dst" in {
/// SSE 1 Only
// Aliases for intrinsics
+let Predicates = [UseAVX] in {
defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS, VEX;
@@ -1666,6 +1681,7 @@ defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
"cvttsd2si", SSE_CVT_SD2SI>,
XD, VEX, VEX_W;
+}
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS;
@@ -1679,13 +1695,14 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
"cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W;
+let Predicates = [UseAVX] in {
defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
-
+}
defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_32>, XS;
@@ -1707,6 +1724,7 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
SSEPackedSingle, SSE_CVT_PS>,
TB, Requires<[UseSSE2]>;
+let Predicates = [UseAVX] in {
def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
(VCVTSS2SIrr GR32:$dst, VR128:$src), 0>;
def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
@@ -1723,6 +1741,7 @@ def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>;
+}
def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
(CVTSS2SIrr GR32:$dst, VR128:$src), 0>;
@@ -1744,7 +1763,7 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
/// SSE 2 Only
// Convert scalar double to scalar single
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, Predicates = [UseAVX] in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
@@ -1760,7 +1779,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
}
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
- Requires<[HasAVX]>;
+ Requires<[UseAVX]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
@@ -1778,27 +1797,27 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>,
Sched<[WriteCvtF2F]>;
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>,
Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in {
def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
Sched<[WriteCvtF2F]>;
def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
- "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>,
@@ -1807,7 +1826,7 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, Predicates = [UseAVX] in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1824,16 +1843,16 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
}
def : Pat<(f64 (fextend FR32:$src)),
- (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>;
+ (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>;
def : Pat<(fextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>;
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
+ Requires<[UseAVX, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
- Requires<[HasAVX, OptForSpeed]>;
+ Requires<[UseAVX, OptForSpeed]>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
@@ -1861,14 +1880,14 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>,
Sched<[WriteCvtF2F]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>,
Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
@@ -1895,7 +1914,7 @@ def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
+ (int_x86_sse2_cvtps2dq (loadv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
@@ -1905,7 +1924,7 @@ def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
+ (int_x86_avx_cvt_ps2dq_256 (loadv8f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
@@ -1934,7 +1953,7 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX,
+ (int_x86_sse2_cvtpd2dq (loadv2f64 addr:$src)))]>, VEX,
Sched<[WriteCvtF2ILd]>;
// YMM only
@@ -1946,7 +1965,7 @@ def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
+ (int_x86_avx_cvt_pd2dq_256 (loadv4f64 addr:$src)))]>,
VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
(VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
@@ -1972,7 +1991,7 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (memopv4f32 addr:$src)))],
+ (loadv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
@@ -1982,7 +2001,7 @@ def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
- (memopv8f32 addr:$src)))],
+ (loadv8f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
Sched<[WriteCvtF2ILd]>;
@@ -1999,27 +2018,27 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PSrr VR128:$src)>;
- def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+ def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
(VCVTDQ2PSrm addr:$src)>;
def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
(VCVTDQ2PSrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
+ def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))),
(VCVTDQ2PSrm addr:$src)>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(VCVTTPS2DQrr VR128:$src)>;
- def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
+ def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
(VCVTTPS2DQrm addr:$src)>;
def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
(VCVTDQ2PSYrr VR256:$src)>;
- def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (memopv4i64 addr:$src)))),
+ def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (loadv4i64 addr:$src)))),
(VCVTDQ2PSYrm addr:$src)>;
def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
(VCVTTPS2DQYrr VR256:$src)>;
- def : Pat<(v8i32 (fp_to_sint (memopv8f32 addr:$src))),
+ def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
(VCVTTPS2DQYrm addr:$src)>;
}
@@ -2056,7 +2075,7 @@ def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memopv2f64 addr:$src)))],
+ (loadv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
// YMM only
@@ -2068,7 +2087,7 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
+ (int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
@@ -2076,7 +2095,7 @@ def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
let Predicates = [HasAVX] in {
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
(VCVTTPD2DQYrr VR256:$src)>;
- def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
+ def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
(VCVTTPD2DQYrm addr:$src)>;
} // Predicates = [HasAVX]
@@ -2110,7 +2129,7 @@ def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
+ (int_x86_avx_cvt_ps2_pd_256 (loadv4f32 addr:$src)))],
IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
}
@@ -2140,7 +2159,7 @@ def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvtdq2_pd_256
- (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L,
+ (bitconvert (loadv2i64 addr:$src))))]>, VEX, VEX_L,
Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -2162,7 +2181,7 @@ def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
let Predicates = [HasAVX] in {
def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PDYrr VR128:$src)>;
- def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+ def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
(VCVTDQ2PDYrm addr:$src)>;
} // Predicates = [HasAVX]
@@ -2181,7 +2200,7 @@ def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2psx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
+ (int_x86_sse2_cvtpd2ps (loadv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
// YMM only
@@ -2193,7 +2212,7 @@ def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
+ (int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
@@ -2215,13 +2234,13 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
let Predicates = [HasAVX] in {
def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
(VCVTDQ2PSYrr VR256:$src)>;
- def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
+ def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))),
(VCVTDQ2PSYrm addr:$src)>;
// Match fround and fextend for 128/256-bit conversions
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(VCVTPD2PSrr VR128:$src)>;
- def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
+ def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))),
(VCVTPD2PSXrm addr:$src)>;
def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
(VCVTPD2PSYrr VR256:$src)>;
@@ -2299,7 +2318,7 @@ let Constraints = "$src1 = $dst" in {
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSE_ALU_F32S>, // same latency as 32 bit compare
+ SSE_ALU_F64S>,
XD;
}
@@ -2334,7 +2353,7 @@ let Constraints = "$src1 = $dst" in {
SSE_ALU_F32S>, XS;
defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
- SSE_ALU_F32S>, // same latency as f32
+ SSE_ALU_F64S>,
XD;
}
@@ -2403,26 +2422,27 @@ let Defs = [EFLAGS] in {
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
Operand CC, Intrinsic Int, string asm,
- string asm_alt, Domain d> {
+ string asm_alt, Domain d,
+ OpndItins itins = SSE_ALU_F32P> {
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
- IIC_SSE_CMPP_RR, d>,
+ itins.rr, d>,
Sched<[WriteFAdd]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
- IIC_SSE_CMPP_RM, d>,
+ itins.rm, d>,
Sched<[WriteFAddLd, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rri_alt : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>;
+ asm_alt, [], itins.rr, d>, Sched<[WriteFAdd]>;
def rmi_alt : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RM, d>,
+ asm_alt, [], itins.rm, d>,
Sched<[WriteFAddLd, ReadAfterLd]>;
}
}
@@ -2447,11 +2467,11 @@ let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSEPackedSingle>, TB;
+ SSEPackedSingle, SSE_ALU_F32P>, TB;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSEPackedDouble>, TB, OpSize;
+ SSEPackedDouble, SSE_ALU_F64P>, TB, OpSize;
}
let Predicates = [HasAVX] in {
@@ -2511,16 +2531,16 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv4f32, SSEPackedSingle>, TB, VEX_4V;
+ loadv4f32, SSEPackedSingle>, TB, VEX_4V;
defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv8f32, SSEPackedSingle>, TB, VEX_4V, VEX_L;
+ loadv8f32, SSEPackedSingle>, TB, VEX_4V, VEX_L;
defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ loadv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ loadv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2535,13 +2555,13 @@ let Constraints = "$src1 = $dst" in {
let Predicates = [HasAVX] in {
def : Pat<(v4i32 (X86Shufp VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (bc_v4i32 (loadv2i64 addr:$src2)), (i8 imm:$imm))),
(VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v2i64 (X86Shufp VR128:$src1,
- (memopv2i64 addr:$src2), (i8 imm:$imm))),
+ (loadv2i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
@@ -2550,13 +2570,13 @@ let Predicates = [HasAVX] in {
def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v8i32 (X86Shufp VR256:$src1,
- (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
(VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86Shufp VR256:$src1,
- (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (loadv4i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
}
@@ -2600,29 +2620,29 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
Sched<[WriteShuffleLd, ReadAfterLd]>;
}
-defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
+defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedSingle>, TB, VEX_4V;
-defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
+defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64,
VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, TB, OpSize, VEX_4V;
-defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
+defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32,
VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedSingle>, TB, VEX_4V;
-defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
+defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, TB, OpSize, VEX_4V;
-defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, memopv8f32,
+defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32,
VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedSingle>, TB, VEX_4V, VEX_L;
-defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, memopv4f64,
+defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64,
VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
-defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, memopv8f32,
+defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32,
VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedSingle>, TB, VEX_4V, VEX_L;
-defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, memopv4f64,
+defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
@@ -2642,20 +2662,20 @@ let Constraints = "$src1 = $dst" in {
} // Constraints = "$src1 = $dst"
let Predicates = [HasAVX1Only] in {
- def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
@@ -2686,13 +2706,10 @@ let Predicates = [UseSSE2] in {
/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
Domain d> {
- def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
- Sched<[WriteVecLogic]>;
- def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
- IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>;
+ def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set GR32orGR64:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
+ Sched<[WriteVecLogic]>;
}
let Predicates = [HasAVX] in {
@@ -2709,29 +2726,15 @@ let Predicates = [HasAVX] in {
OpSize, VEX, VEX_L;
def : Pat<(i32 (X86fgetsign FR32:$src)),
- (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(i64 (X86fgetsign FR32:$src)),
- (VMOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ (SUBREG_TO_REG (i64 0),
+ (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>;
def : Pat<(i32 (X86fgetsign FR64:$src)),
- (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
def : Pat<(i64 (X86fgetsign FR64:$src)),
- (VMOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>;
-
- // Assembler Only
- def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>;
- def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedDouble>, TB,
- OpSize, VEX, Sched<[WriteVecLogic]>;
- def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>;
- def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedDouble>, TB,
- OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>;
+ (SUBREG_TO_REG (i64 0),
+ (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>;
}
defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
@@ -2740,16 +2743,18 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
SSEPackedDouble>, TB, OpSize;
def : Pat<(i32 (X86fgetsign FR32:$src)),
- (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+ (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>,
Requires<[UseSSE1]>;
def : Pat<(i64 (X86fgetsign FR32:$src)),
- (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+ (SUBREG_TO_REG (i64 0),
+ (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>,
Requires<[UseSSE1]>;
def : Pat<(i32 (X86fgetsign FR64:$src)),
- (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+ (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>,
Requires<[UseSSE2]>;
def : Pat<(i64 (X86fgetsign FR64:$src)),
- (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+ (SUBREG_TO_REG (i64 0),
+ (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>,
Requires<[UseSSE2]>;
//===---------------------------------------------------------------------===//
@@ -2788,7 +2793,7 @@ multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
OpndItins itins, bit IsCommutable = 0> {
let Predicates = [HasAVX] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
- VR128, memopv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
+ VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
@@ -2796,7 +2801,7 @@ let Constraints = "$src1 = $dst" in
let Predicates = [HasAVX2] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
- OpVT256, VR256, memopv4i64, i256mem, itins,
+ OpVT256, VR256, loadv4i64, i256mem, itins,
IsCommutable, 0>, VEX_4V, VEX_L;
}
@@ -2836,16 +2841,18 @@ multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
}
// Alias bitwise logical operations using SSE logical ops on packed FP values.
-defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
- SSE_BIT_ITINS_P>;
-defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for,
- SSE_BIT_ITINS_P>;
-defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
- SSE_BIT_ITINS_P>;
-
-let isCommutable = 0 in
- defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", X86fandn,
+let isCodeGenOnly = 1 in {
+ defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
+ SSE_BIT_ITINS_P>;
+ defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for,
SSE_BIT_ITINS_P>;
+ defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
+ SSE_BIT_ITINS_P>;
+
+ let isCommutable = 0 in
+ defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", X86fandn,
+ SSE_BIT_ITINS_P>;
+}
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
///
@@ -2855,14 +2862,14 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "ps"), f256mem,
[(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
+ (loadv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem,
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
(bc_v4i64 (v4f64 VR256:$src2))))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>,
+ (loadv4i64 addr:$src2)))], 0>,
TB, OpSize, VEX_4V, VEX_L;
// In AVX no need to add a pattern for 128-bit logical rr ps, because they
@@ -2872,14 +2879,14 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, [],
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
+ (loadv2i64 addr:$src2)))], 0>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
(bc_v2i64 (v2f64 VR128:$src2))))],
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0>,
+ (loadv2i64 addr:$src2)))], 0>,
TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
@@ -2921,120 +2928,93 @@ let isCommutable = 0 in
/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
/// classes below
-multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SizeItins itins,
- bit Is2Addr = 1> {
- defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
- OpNode, FR32, f32mem,
- itins.s, Is2Addr>, XS;
- defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
- OpNode, FR64, f64mem,
- itins.d, Is2Addr>, XD;
-}
-
multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
SDNode OpNode, SizeItins itins> {
-let Predicates = [HasAVX] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- VR128, v4f32, f128mem, memopv4f32,
+ VR128, v4f32, f128mem, loadv4f32,
SSEPackedSingle, itins.s, 0>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- VR128, v2f64, f128mem, memopv2f64,
+ VR128, v2f64, f128mem, loadv2f64,
SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V;
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
- OpNode, VR256, v8f32, f256mem, memopv8f32,
+ OpNode, VR256, v8f32, f256mem, loadv8f32,
SSEPackedSingle, itins.s, 0>, TB, VEX_4V, VEX_L;
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
- OpNode, VR256, v4f64, f256mem, memopv4f64,
+ OpNode, VR256, v4f64, f256mem, loadv4f64,
SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
- defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
- v4f32, f128mem, memopv4f32, SSEPackedSingle,
- itins.s, 1>, TB;
- defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
- v2f64, f128mem, memopv2f64, SSEPackedDouble,
- itins.d, 1>, TB, OpSize;
-}
-}
-multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
- SizeItins itins,
- bit Is2Addr = 1> {
- defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
- itins.s, Is2Addr>, XS;
- defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
- itins.d, Is2Addr>, XD;
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
+ v4f32, f128mem, memopv4f32, SSEPackedSingle,
+ itins.s>, TB;
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
+ v2f64, f128mem, memopv2f64, SSEPackedDouble,
+ itins.d>, TB, OpSize;
+ }
}
-// Binary Arithmetic instructions
-defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>;
-defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>;
-let isCommutable = 0 in {
- defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>;
- defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>;
- defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>;
- defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>;
-}
+multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SizeItins itins> {
+ defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
+ OpNode, FR32, f32mem, itins.s, 0>, XS, VEX_4V, VEX_LIG;
+ defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
+ OpNode, FR64, f64mem, itins.d, 0>, XD, VEX_4V, VEX_LIG;
-let isCodeGenOnly = 1 in {
- defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
- defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
+ let Constraints = "$src1 = $dst" in {
+ defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
+ OpNode, FR32, f32mem, itins.s>, XS;
+ defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
+ OpNode, FR64, f64mem, itins.d>, XD;
+ }
}
-defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>,
- basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
-defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>,
- basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
+multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
+ SizeItins itins> {
+ defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
+ itins.s, 0>, XS, VEX_4V, VEX_LIG;
+ defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
+ itins.d, 0>, XD, VEX_4V, VEX_LIG;
-let isCommutable = 0 in {
- defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
- defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
- basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
- defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>,
- basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
- defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>,
- basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
+ let Constraints = "$src1 = $dst" in {
+ defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
+ itins.s>, XS;
+ defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
+ itins.d>, XD;
+ }
}
-let Constraints = "$src1 = $dst" in {
- defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
- defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
-
- let isCommutable = 0 in {
- defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
- defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
- defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>;
- defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>;
- }
+// Binary Arithmetic instructions
+defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
+defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
+ basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
+let isCommutable = 0 in {
+ defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
+ defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
+ basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
+ defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>;
+ defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>;
}
let isCodeGenOnly = 1 in {
- defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
- defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
- let Constraints = "$src1 = $dst" in {
- defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>;
- defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>;
- }
+ defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>;
+ defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>;
}
/// Unop Arithmetic
@@ -3180,7 +3160,7 @@ let Predicates = [HasAVX] in {
def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
+ [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))],
itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
@@ -3190,7 +3170,7 @@ let Predicates = [HasAVX] in {
def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
+ [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))],
itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
@@ -3217,7 +3197,7 @@ let Predicates = [HasAVX] in {
def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
+ [(set VR128:$dst, (V4F32Int (loadv4f32 addr:$src)))],
itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
@@ -3228,7 +3208,7 @@ let Predicates = [HasAVX] in {
(ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
+ [(set VR256:$dst, (V8F32Int (loadv8f32 addr:$src)))],
itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
@@ -3298,7 +3278,7 @@ let Predicates = [HasAVX] in {
def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
+ [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))],
itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
@@ -3308,7 +3288,7 @@ let Predicates = [HasAVX] in {
def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
+ [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))],
itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
@@ -3341,30 +3321,31 @@ defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
int_x86_avx_rcp_ps_256, SSE_RCPP>;
-def : Pat<(f32 (fsqrt FR32:$src)),
- (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
-def : Pat<(f32 (fsqrt (load addr:$src))),
- (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
-def : Pat<(f64 (fsqrt FR64:$src)),
- (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
-def : Pat<(f64 (fsqrt (load addr:$src))),
- (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
-
-def : Pat<(f32 (X86frsqrt FR32:$src)),
- (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
-def : Pat<(f32 (X86frsqrt (load addr:$src))),
- (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
-
-def : Pat<(f32 (X86frcp FR32:$src)),
- (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
-def : Pat<(f32 (X86frcp (load addr:$src))),
- (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
-
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
+ def : Pat<(f32 (fsqrt FR32:$src)),
+ (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+ def : Pat<(f32 (fsqrt (load addr:$src))),
+ (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+ def : Pat<(f64 (fsqrt FR64:$src)),
+ (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
+ def : Pat<(f64 (fsqrt (load addr:$src))),
+ (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+ def : Pat<(f32 (X86frsqrt FR32:$src)),
+ (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+ def : Pat<(f32 (X86frsqrt (load addr:$src))),
+ (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+ def : Pat<(f32 (X86frcp FR32:$src)),
+ (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+ def : Pat<(f32 (X86frcp (load addr:$src))),
+ (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+}
+let Predicates = [UseAVX] in {
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
(COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS VR128:$src, FR32)),
@@ -3378,7 +3359,9 @@ let Predicates = [HasAVX] in {
VR128)>;
def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
(VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
+}
+let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
(COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS VR128:$src, FR32)),
@@ -3662,7 +3645,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
XS, Requires<[UseSSE2]>;
}
-let mayStore = 1, SchedRW = [WriteStore] in {
+let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
@@ -3725,7 +3708,7 @@ multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
bit IsCommutable = 0> {
let Predicates = [HasAVX] in
defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128,
- VR128, memopv2i64, i128mem, itins,
+ VR128, loadv2i64, i128mem, itins,
IsCommutable, 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
@@ -3734,7 +3717,7 @@ let Constraints = "$src1 = $dst" in
let Predicates = [HasAVX2] in
defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256,
- VR256, memopv4i64, i256mem, itins,
+ VR256, loadv4i64, i256mem, itins,
IsCommutable, 0>, VEX_4V, VEX_L;
}
@@ -3761,11 +3744,11 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
(bc_frag (memopv2i64 addr:$src2)))))], itins.rm>,
Sched<[WriteVecShiftLd, ReadAfterLd]>;
def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
- (ins RC:$src1, i32i8imm:$src2),
+ (ins RC:$src1, i8imm:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>,
+ [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))], itins.ri>,
Sched<[WriteVecShift]>;
}
@@ -3849,15 +3832,15 @@ defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>;
defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
- int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>;
+ int_x86_avx2_psad_bw, SSE_PMADD, 1>;
let Predicates = [HasAVX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
- memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
+ loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
VEX_4V;
let Predicates = [HasAVX2] in
defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
- VR256, memopv4i64, i256mem,
+ VR256, loadv4i64, i256mem,
SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in
defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
@@ -3993,12 +3976,14 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pslldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>;
+ (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))],
+ IIC_SSE_INTSHDQ_P_RI>;
def PSRLDQri : PDIi8<0x73, MRM3r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"psrldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>;
+ (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))],
+ IIC_SSE_INTSHDQ_P_RI>;
// PSRADQri doesn't exist in SSE[1-3].
}
} // Constraints = "$src1 = $dst"
@@ -4082,14 +4067,14 @@ let Predicates = [HasAVX] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
+ (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX,
Sched<[WriteShuffleLd]>;
}
@@ -4100,14 +4085,14 @@ let Predicates = [HasAVX2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L,
+ (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L,
Sched<[WriteShuffleLd]>;
}
@@ -4118,14 +4103,14 @@ let Predicates = [UseSSE2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>;
def mi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>,
+ (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>,
Sched<[WriteShuffleLd]>;
}
}
@@ -4136,7 +4121,7 @@ defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS;
defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD;
let Predicates = [HasAVX] in {
- def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ def : Pat<(v4f32 (X86PShufd (loadv4f32 addr:$src1), (i8 imm:$imm))),
(VPSHUFDmi addr:$src1, imm:$imm)>;
def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
(VPSHUFDri VR128:$src1, imm:$imm)>;
@@ -4259,13 +4244,13 @@ let ExeDomain = SSEPackedInt in {
multiclass sse2_pinsrw<bit Is2Addr = 1> {
def rri : Ii8<0xC4, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1,
- GR32:$src2, i32i8imm:$src3),
+ GR32orGR64:$src2, i32i8imm:$src3),
!if(Is2Addr,
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>,
- Sched<[WriteShuffle]>;
+ (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))],
+ IIC_SSE_PINSRW>, Sched<[WriteShuffle]>;
def rmi : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
i16mem:$src2, i32i8imm:$src3),
@@ -4281,29 +4266,24 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
// Extract
let Predicates = [HasAVX] in
def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
- (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))]>, TB, OpSize, VEX,
+ [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))]>, TB, OpSize, VEX,
Sched<[WriteShuffle]>;
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
- (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))], IIC_SSE_PEXTRW>,
+ [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))], IIC_SSE_PEXTRW>,
Sched<[WriteShuffleLd, ReadAfterLd]>;
// Insert
-let Predicates = [HasAVX] in {
- defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V;
- def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
- "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>;
-}
+let Predicates = [HasAVX] in
+defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V;
-let Constraints = "$src1 = $dst" in
- defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[UseSSE2]>;
+let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
+defm PINSRW : sse2_pinsrw, TB, OpSize;
} // ExeDomain = SSEPackedInt
@@ -4313,24 +4293,23 @@ let Constraints = "$src1 = $dst" in
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
-def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
+ (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
+ [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
IIC_SSE_MOVMSK>, VEX;
-def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK>, VEX;
let Predicates = [HasAVX2] in {
-def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
+ (ins VR256:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX, VEX_L;
-def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+ [(set GR32orGR64:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>,
+ VEX, VEX_L;
}
-def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
+ [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
IIC_SSE_MOVMSK>;
} // ExeDomain = SSEPackedInt
@@ -4341,25 +4320,25 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
-let Uses = [EDI] in
+let Uses = [EDI], Predicates = [HasAVX,In32BitMode] in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
IIC_SSE_MASKMOV>, VEX;
-let Uses = [RDI] in
+let Uses = [RDI], Predicates = [HasAVX,In64BitMode] in
def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
IIC_SSE_MASKMOV>, VEX;
-let Uses = [EDI] in
+let Uses = [EDI], Predicates = [UseSSE2,In32BitMode] in
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
IIC_SSE_MASKMOV>;
-let Uses = [RDI] in
+let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in
def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
@@ -4386,12 +4365,13 @@ def VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
IIC_SSE_MOVDQ>,
VEX, Sched<[WriteLoad]>;
def VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
+let isCodeGenOnly = 1 in
def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
@@ -4410,6 +4390,7 @@ def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
+let isCodeGenOnly = 1 in
def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
@@ -4418,25 +4399,27 @@ def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
//===---------------------------------------------------------------------===//
// Move Int Doubleword to Single Scalar
//
-def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
-
-def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
- IIC_SSE_MOVDQ>,
- VEX, Sched<[WriteLoad]>;
-def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
-
-def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
- IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
+let isCodeGenOnly = 1 in {
+ def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
+
+ def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
+ IIC_SSE_MOVDQ>,
+ VEX, Sched<[WriteLoad]>;
+ def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))],
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
+
+ def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
+}
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int to Packed Double Int
@@ -4463,12 +4446,24 @@ def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
+def : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>;
+
+def : Pat<(v4i64 (X86Vinsert (bc_v4i64 (v8i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>;
+
+def : Pat<(v8i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>;
+
+def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>;
+
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int first element to Doubleword Int
//
let SchedRW = [WriteMove] in {
def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "movq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>,
@@ -4484,121 +4479,112 @@ def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
//===---------------------------------------------------------------------===//
// Bitcast FR64 <-> GR64
//
-let Predicates = [HasAVX] in
-def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
- "vmovq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
- VEX, Sched<[WriteLoad]>;
-def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+let isCodeGenOnly = 1 in {
+ let Predicates = [UseAVX] in
+ def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
+ VEX, Sched<[WriteLoad]>;
+ def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
+ def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
+
+ def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
+ def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
-def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
+ def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
-
-def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
- IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
-def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
-def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+}
//===---------------------------------------------------------------------===//
// Move Scalar Single to Double Int
//
-def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (bitconvert FR32:$src))],
- IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>;
-def VMOVSS2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
-def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (bitconvert FR32:$src))],
- IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
-def MOVSS2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+let isCodeGenOnly = 1 in {
+ def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))],
+ IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>;
+ def VMOVSS2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
+ def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))],
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
+ def MOVSS2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+}
//===---------------------------------------------------------------------===//
// Patterns and instructions to describe movd/movq to XMM register zero-extends
//
-let SchedRW = [WriteMove] in {
+let isCodeGenOnly = 1, SchedRW = [WriteMove] in {
let AddedComplexity = 15 in {
-def VMOVZDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector GR32:$src)))))],
- IIC_SSE_MOVDQ>, VEX;
def VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+ "movq\t{$src, $dst|$dst, $src}", // X86-64 only
[(set VR128:$dst, (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector GR64:$src)))))],
IIC_SSE_MOVDQ>,
VEX, VEX_W;
-}
-let AddedComplexity = 15 in {
-def MOVZDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector GR32:$src)))))],
- IIC_SSE_MOVDQ>;
def MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
[(set VR128:$dst, (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector GR64:$src)))))],
IIC_SSE_MOVDQ>;
}
-} // SchedRW
+} // isCodeGenOnly, SchedRW
-let AddedComplexity = 20, SchedRW = [WriteLoad] in {
-def VMOVZDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
- (loadi32 addr:$src))))))],
- IIC_SSE_MOVDQ>, VEX;
-def MOVZDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
- (loadi32 addr:$src))))))],
- IIC_SSE_MOVDQ>;
-} // AddedComplexity, SchedRW
+let Predicates = [UseAVX] in {
+ let AddedComplexity = 15 in
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
+ (VMOVDI2PDIrr GR32:$src)>;
-let Predicates = [HasAVX] in {
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
let AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ (VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
- (VMOVZDI2PDIrm addr:$src)>;
+ (VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
- (VMOVZDI2PDIrm addr:$src)>;
+ (VMOVDI2PDIrm addr:$src)>;
}
// Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>;
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
}
-let Predicates = [UseSSE2], AddedComplexity = 20 in {
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
- (MOVZDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
- (MOVZDI2PDIrm addr:$src)>;
+let Predicates = [UseSSE2] in {
+ let AddedComplexity = 15 in
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
+ (MOVDI2PDIrr GR32:$src)>;
+
+ let AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ (MOVDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (MOVDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (MOVDI2PDIrm addr:$src)>;
+ }
}
// These are the correct encodings of the instructions so that we know how to
@@ -4607,15 +4593,12 @@ let Predicates = [UseSSE2], AddedComplexity = 20 in {
def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
(MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
- (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
-def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
(MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
-def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
- (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
-def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
- (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
-def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
- (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX.
+def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
+ (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
+ (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>;
//===---------------------------------------------------------------------===//
// SSE2 - Move Quadword
@@ -4630,7 +4613,7 @@ def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
- VEX, Requires<[HasAVX]>;
+ VEX, Requires<[UseAVX]>;
def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4667,16 +4650,15 @@ def MOVLQ128mr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)],
IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
-let AddedComplexity = 20 in
+let isCodeGenOnly = 1, AddedComplexity = 20 in {
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
- XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>;
+ XS, VEX, Requires<[UseAVX]>, Sched<[WriteLoad]>;
-let AddedComplexity = 20 in
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4684,10 +4666,9 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
+}
-let Predicates = [HasAVX], AddedComplexity = 20 in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (VMOVZQI2PQIrm addr:$src)>;
+let Predicates = [UseAVX], AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
(VMOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)),
@@ -4695,8 +4676,6 @@ let Predicates = [HasAVX], AddedComplexity = 20 in {
}
let Predicates = [UseSSE2], AddedComplexity = 20 in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (MOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
(MOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
@@ -4719,7 +4698,7 @@ def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
IIC_SSE_MOVQ_RR>,
- XS, VEX, Requires<[HasAVX]>;
+ XS, VEX, Requires<[UseAVX]>;
let AddedComplexity = 15 in
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -4728,14 +4707,14 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
XS, Requires<[UseSSE2]>;
} // SchedRW
-let SchedRW = [WriteVecLogicLd] in {
+let isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in {
let AddedComplexity = 20 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl
(loadv2i64 addr:$src))))],
IIC_SSE_MOVDQ>,
- XS, VEX, Requires<[HasAVX]>;
+ XS, VEX, Requires<[UseAVX]>;
let AddedComplexity = 20 in {
def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -4744,49 +4723,19 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
IIC_SSE_MOVDQ>,
XS, Requires<[UseSSE2]>;
}
-} // SchedRW
+} // isCodeGenOnly, SchedRW
let AddedComplexity = 20 in {
- let Predicates = [HasAVX] in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (VMOVZPQILo2PQIrm addr:$src)>;
+ let Predicates = [UseAVX] in {
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(VMOVZPQILo2PQIrr VR128:$src)>;
}
let Predicates = [UseSSE2] in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (MOVZPQILo2PQIrm addr:$src)>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>;
}
}
-// Instructions to match in the assembler
-let SchedRW = [WriteMove] in {
-def VMOVQs64rr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "movq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVDQ>, VEX, VEX_W;
-def VMOVQd64rr : VS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVDQ>, VEX, VEX_W;
-// Recognize "movd" with GR64 destination, but encode as a "movq"
-def VMOVQd64rr_alt : VS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "movd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVDQ>, VEX, VEX_W;
-} // SchedRW
-
-// Instructions for the disassembler
-// xr = XMM register
-// xm = mem64
-
-let SchedRW = [WriteMove] in {
-let Predicates = [HasAVX] in
-def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
-def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
-} // SchedRW
-
//===---------------------------------------------------------------------===//
// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
//===---------------------------------------------------------------------===//
@@ -4805,13 +4754,13 @@ def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
let Predicates = [HasAVX] in {
defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v4f32, VR128, memopv4f32, f128mem>, VEX;
+ v4f32, VR128, loadv4f32, f128mem>, VEX;
defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v4f32, VR128, memopv4f32, f128mem>, VEX;
+ v4f32, VR128, loadv4f32, f128mem>, VEX;
defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L;
+ v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L;
defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L;
+ v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L;
}
defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
memopv4f32, f128mem>;
@@ -4821,19 +4770,19 @@ defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
let Predicates = [HasAVX] in {
def : Pat<(v4i32 (X86Movshdup VR128:$src)),
(VMOVSHDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))),
(VMOVSHDUPrm addr:$src)>;
def : Pat<(v4i32 (X86Movsldup VR128:$src)),
(VMOVSLDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (loadv2i64 addr:$src)))),
(VMOVSLDUPrm addr:$src)>;
def : Pat<(v8i32 (X86Movshdup VR256:$src)),
(VMOVSHDUPYrr VR256:$src)>;
- def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))),
+ def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (loadv4i64 addr:$src)))),
(VMOVSHDUPYrm addr:$src)>;
def : Pat<(v8i32 (X86Movsldup VR256:$src)),
(VMOVSLDUPYrr VR256:$src)>;
- def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))),
+ def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (loadv4i64 addr:$src)))),
(VMOVSLDUPYrm addr:$src)>;
}
@@ -4887,20 +4836,20 @@ let Predicates = [HasAVX] in {
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
let Predicates = [HasAVX] in {
- def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ def : Pat<(X86Movddup (loadv2f64 addr:$src)),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
- def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
- def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (bc_v2f64
(v2i64 (scalar_to_vector (loadi64 addr:$src))))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
// 256-bit version
- def : Pat<(X86Movddup (memopv4f64 addr:$src)),
+ def : Pat<(X86Movddup (loadv4f64 addr:$src)),
(VMOVDDUPYrm addr:$src)>;
- def : Pat<(X86Movddup (memopv4i64 addr:$src)),
+ def : Pat<(X86Movddup (loadv4i64 addr:$src)),
(VMOVDDUPYrm addr:$src)>;
def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
(VMOVDDUPYrm addr:$src)>;
@@ -5102,12 +5051,12 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
// Helper fragments to match sext vXi1 to vXiY.
def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
VR128:$src))>;
-def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i32 15)))>;
-def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i32 31)))>;
+def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>;
+def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>;
def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
VR256:$src))>;
-def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i32 15)))>;
-def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i32 31)))>;
+def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>;
+def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>;
let Predicates = [HasAVX] in {
defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb",
@@ -5263,34 +5212,34 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(IntId256 VR256:$src1,
- (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
+ (bitconvert (loadv4i64 addr:$src2))))]>, OpSize;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PHADDSUBW, 0>, VEX_4V;
defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PHADDSUBD, 0>, VEX_4V;
defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PHADDSUBW, 0>, VEX_4V;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PHADDSUBD, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PSIGN, 0>, VEX_4V;
defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PSIGN, 0>, VEX_4V;
defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PSIGN, 0>, VEX_4V;
defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PSHUFB, 0>, VEX_4V;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128,
@@ -5310,28 +5259,28 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw>, VEX_4V, VEX_L;
@@ -5389,7 +5338,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>;
+ [], IIC_SSE_PALIGNRR>, OpSize, Sched<[WriteShuffle]>;
let mayLoad = 1 in
def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
@@ -5397,7 +5346,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
+ [], IIC_SSE_PALIGNRM>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
}
@@ -5489,16 +5438,17 @@ def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>,
// SSE4.1 - Packed Move with Sign/Zero Extend
//===----------------------------------------------------------------------===//
-multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+ [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, OpSize;
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
- OpSize;
+ (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))],
+ itins.rm>, OpSize;
}
multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
@@ -5509,22 +5459,23 @@ multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (IntId (load addr:$src)))]>, OpSize;
+ [(set VR256:$dst, (IntId (load addr:$src)))]>,
+ OpSize;
}
let Predicates = [HasAVX] in {
-defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
- VEX;
-defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
- VEX;
-defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
- VEX;
-defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
- VEX;
-defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
- VEX;
-defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
- VEX;
+defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw",
+ int_x86_sse41_pmovsxbw>, VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd",
+ int_x86_sse41_pmovsxwd>, VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq",
+ int_x86_sse41_pmovsxdq>, VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw",
+ int_x86_sse41_pmovzxbw>, VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd",
+ int_x86_sse41_pmovzxwd>, VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq",
+ int_x86_sse41_pmovzxdq>, VEX;
}
let Predicates = [HasAVX2] in {
@@ -5542,12 +5493,12 @@ defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
int_x86_avx2_pmovzxdq>, VEX, VEX_L;
}
-defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
-defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
-defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
-defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
-defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
-defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
+defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw, SSE_INTALU_ITINS_P>;
+defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd, SSE_INTALU_ITINS_P>;
+defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq, SSE_INTALU_ITINS_P>;
+defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw, SSE_INTALU_ITINS_P>;
+defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd, SSE_INTALU_ITINS_P>;
+defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq, SSE_INTALU_ITINS_P>;
let Predicates = [HasAVX] in {
// Common patterns involving scalar load.
@@ -5645,32 +5596,39 @@ let Predicates = [HasAVX2] in {
(VPMOVZXDQYrr VR128:$src)>;
def : Pat<(v8i32 (X86vzmovly (v8i16 VR128:$src))),
(VPMOVZXWDYrr VR128:$src)>;
+ def : Pat<(v16i16 (X86vzmovly (v16i8 VR128:$src))),
+ (VPMOVZXBWYrr VR128:$src)>;
}
def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
+ def : Pat<(v16i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
}
let Predicates = [HasAVX] in {
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
+ def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
}
let Predicates = [UseSSE41] in {
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
+ def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
}
-multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+ [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, OpSize;
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+ (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))],
+ itins.rm>,
OpSize;
}
@@ -5709,10 +5667,14 @@ defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
int_x86_avx2_pmovzxwq>, VEX, VEX_L;
}
-defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
-defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
-defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
-defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
+defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd,
+ SSE_INTALU_ITINS_P>;
+defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq,
+ SSE_INTALU_ITINS_P>;
+defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd,
+ SSE_INTALU_ITINS_P>;
+defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq,
+ SSE_INTALU_ITINS_P>;
let Predicates = [HasAVX] in {
// Common patterns involving scalar load
@@ -5740,7 +5702,8 @@ let Predicates = [UseSSE41] in {
(PMOVZXWQrm addr:$src)>;
}
-multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
@@ -5779,8 +5742,10 @@ defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq",
defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
int_x86_avx2_pmovzxbq>, VEX, VEX_L;
}
-defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
-defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
+defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq,
+ SSE_INTALU_ITINS_P>;
+defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq,
+ SSE_INTALU_ITINS_P>;
let Predicates = [HasAVX2] in {
def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
@@ -6032,35 +5997,39 @@ let Predicates = [UseSSE41] in {
/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
(ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
+ imm:$src2))]>,
OpSize;
let neverHasSideEffects = 1, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// FIXME:
// There's an AssertZext in the way of writing the store pattern
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in
defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
- def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX;
-}
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+ let isCodeGenOnly = 1, hasSideEffects = 0 in
+ def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+
let neverHasSideEffects = 1, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
@@ -6122,31 +6091,28 @@ defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
/// destination
-multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr,
+ OpndItins itins = DEFAULT_ITINS> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
(ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR32:$dst,
- (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
+ [(set GR32orGR64:$dst,
+ (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))],
+ itins.rr>,
OpSize;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
- addr:$dst)]>, OpSize;
+ addr:$dst)], itins.rm>, OpSize;
}
let ExeDomain = SSEPackedSingle in {
- let Predicates = [UseAVX] in {
+ let Predicates = [UseAVX] in
defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
- def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, OpSize, VEX;
- }
- defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
+ defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>;
}
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
@@ -6167,13 +6133,13 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ (ins VR128:$src1, GR32orGR64:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
+ (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, OpSize;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
!if(Is2Addr,
@@ -6246,7 +6212,8 @@ let Constraints = "$src1 = $dst" in
// are optimized inserts that won't zero arbitrary elements in the destination
// vector. The next one matches the intrinsic and could zero arbitrary elements
// in the target vector.
-multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
!if(Is2Addr,
@@ -6254,7 +6221,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+ (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
OpSize;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
@@ -6265,14 +6232,14 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
[(set VR128:$dst,
(X86insrtps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
- imm:$src3))]>, OpSize;
+ imm:$src3))], itins.rm>, OpSize;
}
let ExeDomain = SSEPackedSingle in {
- let Predicates = [HasAVX] in
+ let Predicates = [UseAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
- defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+ defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>;
}
//===----------------------------------------------------------------------===//
@@ -6290,7 +6257,8 @@ let ExeDomain = SSEPackedSingle in {
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))],
+ IIC_SSE_ROUNDPS_REG>,
OpSize;
// Vector intrinsic operation, mem
@@ -6299,7 +6267,8 @@ let ExeDomain = SSEPackedSingle in {
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
- (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))],
+ IIC_SSE_ROUNDPS_MEM>,
OpSize;
} // ExeDomain = SSEPackedSingle
@@ -6309,7 +6278,8 @@ let ExeDomain = SSEPackedDouble in {
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))],
+ IIC_SSE_ROUNDPS_REG>,
OpSize;
// Vector intrinsic operation, mem
@@ -6318,7 +6288,8 @@ let ExeDomain = SSEPackedDouble in {
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
- (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))],
+ IIC_SSE_ROUNDPS_REG>,
OpSize;
} // ExeDomain = SSEPackedDouble
}
@@ -6402,11 +6373,11 @@ let ExeDomain = GenericDomain in {
let Predicates = [HasAVX] in {
// Intrinsic form
defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
- memopv4f32, memopv2f64,
+ loadv4f32, loadv2f64,
int_x86_sse41_round_ps,
int_x86_sse41_round_pd>, VEX;
defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
- memopv8f32, memopv4f64,
+ loadv8f32, loadv4f64,
int_x86_avx_round_ps_256,
int_x86_avx_round_pd_256>, VEX, VEX_L;
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
@@ -6544,7 +6515,7 @@ def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
OpSize, VEX;
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
+ [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
OpSize, VEX;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
@@ -6553,7 +6524,7 @@ def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
OpSize, VEX, VEX_L;
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
+ [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
OpSize, VEX, VEX_L;
}
@@ -6582,13 +6553,13 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
let Defs = [EFLAGS], Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
-defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
-defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>,
+defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32>,
VEX_L;
}
let ExeDomain = SSEPackedDouble in {
-defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
-defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>,
+defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64>,
VEX_L;
}
}
@@ -6600,30 +6571,33 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>,
let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
+ [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)],
+ IIC_SSE_POPCNT_RR>,
OpSize, XS;
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (ctpop (loadi16 addr:$src))),
- (implicit EFLAGS)]>, OpSize, XS;
+ (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, OpSize, XS;
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
+ [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)],
+ IIC_SSE_POPCNT_RR>,
XS;
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (ctpop (loadi32 addr:$src))),
- (implicit EFLAGS)]>, XS;
+ (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS;
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
+ [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)],
+ IIC_SSE_POPCNT_RR>,
XS;
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (ctpop (loadi64 addr:$src))),
- (implicit EFLAGS)]>, XS;
+ (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS;
}
@@ -6651,14 +6625,16 @@ defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Is2Addr = 1> {
+ Intrinsic IntId128, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))],
+ itins.rr>, OpSize;
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -6666,7 +6642,8 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))],
+ itins.rm>, OpSize;
}
/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator
@@ -6682,14 +6659,15 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(IntId256 VR256:$src1,
- (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
+ (bitconvert (loadv4i64 addr:$src2))))]>, OpSize;
}
/// SS48I_binop_rm - Simple SSE41 binary operator.
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, bit Is2Addr = 1> {
+ X86MemOperand x86memop, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
@@ -6712,21 +6690,21 @@ let Predicates = [HasAVX] in {
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
0>, VEX_4V;
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
0>, VEX_4V;
}
@@ -6736,21 +6714,21 @@ let Predicates = [HasAVX2] in {
defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
int_x86_avx2_packusdw>, VEX_4V, VEX_L;
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
}
@@ -6759,22 +6737,23 @@ let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
- memopv2i64, i128mem>;
- defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq,
+ 1, SSE_INTMUL_ITINS_P>;
}
let Predicates = [HasAVX] in {
@@ -6792,15 +6771,16 @@ let Predicates = [HasAVX2] in {
let Constraints = "$src1 = $dst" in {
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_PMULLD_ITINS>;
defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALUQ_ITINS_P>;
}
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, bit Is2Addr = 1> {
+ X86MemOperand x86memop, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u32u8imm:$src3),
@@ -6809,7 +6789,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>,
OpSize;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
@@ -6820,7 +6800,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
(IntId RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>,
OpSize;
}
@@ -6828,40 +6808,40 @@ let Predicates = [HasAVX] in {
let isCommutable = 0 in {
let ExeDomain = SSEPackedSingle in {
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, memopv4f32, f128mem, 0>, VEX_4V;
+ VR128, loadv4f32, f128mem, 0>, VEX_4V;
defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, memopv8f32,
+ int_x86_avx_blend_ps_256, VR256, loadv8f32,
f256mem, 0>, VEX_4V, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, memopv2f64, f128mem, 0>, VEX_4V;
+ VR128, loadv2f64, f128mem, 0>, VEX_4V;
defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256,VR256, memopv4f64,
+ int_x86_avx_blend_pd_256,VR256, loadv4f64,
f256mem, 0>, VEX_4V, VEX_L;
}
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
- VR128, memopv2i64, i128mem, 0>, VEX_4V;
+ VR128, loadv2i64, i128mem, 0>, VEX_4V;
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv2i64, i128mem, 0>, VEX_4V;
+ VR128, loadv2i64, i128mem, 0>, VEX_4V;
}
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- VR128, memopv4f32, f128mem, 0>, VEX_4V;
+ VR128, loadv4f32, f128mem, 0>, VEX_4V;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- VR128, memopv2f64, f128mem, 0>, VEX_4V;
+ VR128, loadv2f64, f128mem, 0>, VEX_4V;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
- VR256, memopv8f32, i256mem, 0>, VEX_4V, VEX_L;
+ VR256, loadv8f32, i256mem, 0>, VEX_4V, VEX_L;
}
let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
- VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ VR256, loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
- VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ VR256, loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
}
}
@@ -6869,21 +6849,27 @@ let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
let ExeDomain = SSEPackedSingle in
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
- VR128, memopv4f32, f128mem>;
+ VR128, memopv4f32, f128mem,
+ 1, SSE_INTALU_ITINS_P>;
let ExeDomain = SSEPackedDouble in
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
- VR128, memopv2f64, f128mem>;
+ VR128, memopv2f64, f128mem,
+ 1, SSE_INTALU_ITINS_P>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
- VR128, memopv2i64, i128mem>;
+ VR128, memopv2i64, i128mem,
+ 1, SSE_INTALU_ITINS_P>;
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv2i64, i128mem>;
+ VR128, memopv2i64, i128mem,
+ 1, SSE_INTMUL_ITINS_P>;
}
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
- VR128, memopv4f32, f128mem>;
+ VR128, memopv4f32, f128mem, 1,
+ SSE_DPPS_ITINS>;
let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
- VR128, memopv2f64, f128mem>;
+ VR128, memopv2f64, f128mem, 1,
+ SSE_DPPD_ITINS>;
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
@@ -6910,23 +6896,23 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedDouble in {
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
- memopv2f64, int_x86_sse41_blendvpd>;
+ loadv2f64, int_x86_sse41_blendvpd>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
- memopv4f64, int_x86_avx_blendv_pd_256>, VEX_L;
+ loadv4f64, int_x86_avx_blendv_pd_256>, VEX_L;
} // ExeDomain = SSEPackedDouble
let ExeDomain = SSEPackedSingle in {
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
- memopv4f32, int_x86_sse41_blendvps>;
+ loadv4f32, int_x86_sse41_blendvps>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
- memopv8f32, int_x86_avx_blendv_ps_256>, VEX_L;
+ loadv8f32, int_x86_avx_blendv_ps_256>, VEX_L;
} // ExeDomain = SSEPackedSingle
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
- memopv2i64, int_x86_sse41_pblendvb>;
+ loadv2i64, int_x86_sse41_pblendvb>;
}
let Predicates = [HasAVX2] in {
defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
- memopv4i64, int_x86_avx2_pblendvb>, VEX_L;
+ loadv4i64, int_x86_avx2_pblendvb>, VEX_L;
}
let Predicates = [HasAVX] in {
@@ -6979,7 +6965,7 @@ let Predicates = [HasAVX] in {
let Predicates = [HasAVX2] in {
def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
(v32i8 VR256:$src2))),
- (VPBLENDVBYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
+ (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
(imm:$mask))),
(VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
@@ -6988,13 +6974,14 @@ let Predicates = [HasAVX2] in {
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- X86MemOperand x86memop, Intrinsic IntId> {
+ X86MemOperand x86memop, Intrinsic IntId,
+ OpndItins itins = DEFAULT_ITINS> {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
- OpSize;
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))],
+ itins.rr>, OpSize;
def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src2),
@@ -7002,7 +6989,8 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
"\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId VR128:$src1,
- (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize;
+ (bitconvert (mem_frag addr:$src2)), XMM0))],
+ itins.rm>, OpSize;
}
}
@@ -7099,11 +7087,11 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX] in
defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
let Predicates = [HasAVX2] in
defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
@@ -7263,70 +7251,100 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
// crc intrinsic instruction
// This set of instructions are only rm, the only difference is the size
// of r and m.
+class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
+ RegisterClass RCIn, SDPatternOperator Int> :
+ SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
+ !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
+ [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))], IIC_CRC32_REG>;
+
+class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
+ X86MemOperand x86memop, SDPatternOperator Int> :
+ SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
+ !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
+ [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))],
+ IIC_CRC32_MEM>;
+
let Constraints = "$src1 = $dst" in {
- def CRC32r32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i8mem:$src2),
- "crc32{b}\t{$src2, $src1|$src1, $src2}",
- [(set GR32:$dst,
- (int_x86_sse42_crc32_32_8 GR32:$src1,
- (load addr:$src2)))]>;
- def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR8:$src2),
- "crc32{b}\t{$src2, $src1|$src1, $src2}",
- [(set GR32:$dst,
- (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>;
- def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i16mem:$src2),
- "crc32{w}\t{$src2, $src1|$src1, $src2}",
- [(set GR32:$dst,
- (int_x86_sse42_crc32_32_16 GR32:$src1,
- (load addr:$src2)))]>,
- OpSize;
- def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR16:$src2),
- "crc32{w}\t{$src2, $src1|$src1, $src2}",
- [(set GR32:$dst,
- (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>,
- OpSize;
- def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "crc32{l}\t{$src2, $src1|$src1, $src2}",
- [(set GR32:$dst,
- (int_x86_sse42_crc32_32_32 GR32:$src1,
- (load addr:$src2)))]>;
- def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "crc32{l}\t{$src2, $src1|$src1, $src2}",
- [(set GR32:$dst,
- (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>;
- def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i8mem:$src2),
- "crc32{b}\t{$src2, $src1|$src1, $src2}",
- [(set GR64:$dst,
- (int_x86_sse42_crc32_64_8 GR64:$src1,
- (load addr:$src2)))]>,
- REX_W;
- def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR8:$src2),
- "crc32{b}\t{$src2, $src1|$src1, $src2}",
- [(set GR64:$dst,
- (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>,
- REX_W;
- def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "crc32{q}\t{$src2, $src1|$src1, $src2}",
- [(set GR64:$dst,
- (int_x86_sse42_crc32_64_64 GR64:$src1,
- (load addr:$src2)))]>,
- REX_W;
- def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "crc32{q}\t{$src2, $src1|$src1, $src2}",
- [(set GR64:$dst,
- (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>,
- REX_W;
+ def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
+ int_x86_sse42_crc32_32_8>;
+ def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8,
+ int_x86_sse42_crc32_32_8>;
+ def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem,
+ int_x86_sse42_crc32_32_16>, OpSize;
+ def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16,
+ int_x86_sse42_crc32_32_16>, OpSize;
+ def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem,
+ int_x86_sse42_crc32_32_32>;
+ def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32,
+ int_x86_sse42_crc32_32_32>;
+ def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem,
+ int_x86_sse42_crc32_64_64>, REX_W;
+ def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64,
+ int_x86_sse42_crc32_64_64>, REX_W;
+ let hasSideEffects = 0 in {
+ let mayLoad = 1 in
+ def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem,
+ null_frag>, REX_W;
+ def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8,
+ null_frag>, REX_W;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SHA-NI Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
+ bit UsesXMM0 = 0> {
+ def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [!if(UsesXMM0,
+ (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
+ (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8;
+
+ def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [!if(UsesXMM0,
+ (set VR128:$dst, (IntId VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)),
+ (set VR128:$dst, (IntId VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8;
+}
+
+let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
+ def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
+ (i8 imm:$src3)))]>, TA;
+ def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_sha1rnds4 VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ (i8 imm:$src3)))]>, TA;
+
+ defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte>;
+ defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1>;
+ defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2>;
+
+ let Uses=[XMM0] in
+ defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 1>;
+
+ defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1>;
+ defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2>;
}
+// Aliases with explicit %xmm0
+def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
+ (SHA256RNDS2rr VR128:$dst, VR128:$src2)>;
+def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
+ (SHA256RNDS2rm VR128:$dst, i128mem:$src2)>;
+
//===----------------------------------------------------------------------===//
// AES-NI Instructions
//===----------------------------------------------------------------------===//
@@ -7383,7 +7401,7 @@ let Predicates = [HasAVX, HasAES] in {
def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"vaesimc\t{$src1, $dst|$dst, $src1}",
- [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
+ [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>,
OpSize, VEX;
}
def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
@@ -7410,7 +7428,7 @@ let Predicates = [HasAVX, HasAES] in {
(ins i128mem:$src1, i8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
+ (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>,
OpSize, VEX;
}
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
@@ -7441,7 +7459,7 @@ def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
- (memopv2i64 addr:$src2), imm:$src3))]>;
+ (loadv2i64 addr:$src2), imm:$src3))]>;
// Carry-less Multiplication instructions
let Constraints = "$src1 = $dst" in {
@@ -7449,13 +7467,15 @@ def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
- (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>;
+ (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))],
+ IIC_SSE_PCLMULQDQ_RR>;
def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
- (memopv2i64 addr:$src2), imm:$src3))]>;
+ (memopv2i64 addr:$src2), imm:$src3))],
+ IIC_SSE_PCLMULQDQ_RM>;
} // Constraints = "$src1 = $dst"
@@ -7595,11 +7615,11 @@ def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
@@ -7623,22 +7643,22 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
- (bc_v4i32 (memopv2i64 addr:$src2)),
+ (bc_v4i32 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
- (bc_v16i8 (memopv2i64 addr:$src2)),
+ (bc_v16i8 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
- (bc_v8i16 (memopv2i64 addr:$src2)),
+ (bc_v8i16 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
@@ -7670,12 +7690,12 @@ def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
(v4f64 VR256:$src1),
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(alignedstore (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTF128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTF128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
}
@@ -7785,15 +7805,15 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
let ExeDomain = SSEPackedSingle in {
defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
+ loadv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L;
+ loadv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
- memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
+ loadv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
- memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
+ loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
}
let Predicates = [HasAVX] in {
@@ -7801,15 +7821,15 @@ def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>;
def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPDYri VR256:$src1, imm:$imm)>;
-def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
+def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (loadv4i64 addr:$src1)),
(i8 imm:$imm))),
(VPERMILPSYmi addr:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPermilp (loadv4i64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDYmi addr:$src1, imm:$imm)>;
def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
(VPERMILPDri VR128:$src1, imm:$imm)>;
-def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))),
+def : Pat<(v2i64 (X86VPermilp (loadv2i64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDmi addr:$src1, imm:$imm)>;
}
@@ -7825,7 +7845,7 @@ def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv8f32 addr:$src2),
+ [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv8f32 addr:$src2),
(i8 imm:$src3)))]>, VEX_4V, VEX_L;
}
@@ -7833,7 +7853,7 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
- (memopv4f64 addr:$src2), (i8 imm:$imm))),
+ (loadv4f64 addr:$src2), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
}
@@ -7848,16 +7868,16 @@ def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
- (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
- (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (loadv4i64 addr:$src2), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
- (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (bc_v32i8 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
- (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
}
@@ -7901,7 +7921,7 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
TA, OpSize, VEX;
}
-let Predicates = [HasAVX, HasF16C] in {
+let Predicates = [HasF16C] in {
defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L;
defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
@@ -7935,9 +7955,9 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
let isCommutable = 0 in {
defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
- VR128, memopv2i64, i128mem>;
+ VR128, loadv2i64, i128mem>;
defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
- VR256, memopv4i64, i256mem>, VEX_L;
+ VR256, loadv4i64, i256mem>, VEX_L;
}
def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
@@ -8115,9 +8135,9 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
VEX_4V, VEX_L;
}
-defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>;
+defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32>;
let ExeDomain = SSEPackedSingle in
-defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, v8f32>;
+defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
ValueType OpVT> {
@@ -8137,9 +8157,9 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
(i8 imm:$src2))))]>, VEX, VEX_L;
}
-defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W;
+defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64>, VEX_W;
let ExeDomain = SSEPackedDouble in
-defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W;
+defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64>, VEX_W;
//===----------------------------------------------------------------------===//
// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
@@ -8152,7 +8172,7 @@ def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, i8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
+ [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
(i8 imm:$src3)))]>, VEX_4V, VEX_L;
let Predicates = [HasAVX2] in {
@@ -8163,13 +8183,13 @@ def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)),
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (loadv4i64 addr:$src2)),
(i8 imm:$imm))),
(VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
- (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
-def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)),
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)),
(i8 imm:$imm))),
(VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
}
@@ -8208,22 +8228,22 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
- (bc_v4i32 (memopv2i64 addr:$src2)),
+ (bc_v4i32 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
- (bc_v16i8 (memopv2i64 addr:$src2)),
+ (bc_v16i8 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
- (bc_v8i16 (memopv2i64 addr:$src2)),
+ (bc_v8i16 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
@@ -8262,20 +8282,20 @@ def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
(v32i8 VR256:$src1),
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(alignedstore (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
}
@@ -8333,7 +8353,7 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1,
- (vt128 (bitconvert (memopv2i64 addr:$src2))))))]>,
+ (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
VEX_4V;
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
@@ -8346,7 +8366,7 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1,
- (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>,
+ (vt256 (bitconvert (loadv4i64 addr:$src2))))))]>,
VEX_4V, VEX_L;
}
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
index 2aa08fa..2b6ee5c 100644
--- a/lib/Target/X86/X86InstrXOP.td
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -20,23 +20,21 @@ multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
}
-let isAsmParserOnly = 1 in {
- defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, memopv2i64>;
- defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, memopv2i64>;
- defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, memopv2i64>;
- defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, memopv2i64>;
- defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, memopv2i64>;
- defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, memopv2i64>;
- defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, memopv2i64>;
- defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, memopv2i64>;
- defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, memopv2i64>;
- defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, memopv2i64>;
- defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, memopv2i64>;
- defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, memopv2i64>;
- defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>;
- defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>;
- defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>;
-}
+defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, memopv2i64>;
+defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, memopv2i64>;
+defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, memopv2i64>;
+defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, memopv2i64>;
+defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, memopv2i64>;
+defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, memopv2i64>;
+defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, memopv2i64>;
+defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, memopv2i64>;
+defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, memopv2i64>;
+defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, memopv2i64>;
+defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, memopv2i64>;
+defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, memopv2i64>;
+defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>;
+defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>;
+defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>;
// Scalar load 2 addr operand instructions
multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
@@ -49,12 +47,10 @@ multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
[(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, VEX;
}
-let isAsmParserOnly = 1 in {
- defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
- ssmem, sse_load_f32>;
- defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
- sdmem, sse_load_f64>;
-}
+defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
+ ssmem, sse_load_f32>;
+defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
+ sdmem, sse_load_f64>;
multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
PatFrag memop> {
@@ -66,10 +62,8 @@ multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
}
-let isAsmParserOnly = 1 in {
- defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>;
- defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>;
-}
+defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>;
+defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>;
multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
PatFrag memop> {
@@ -81,12 +75,8 @@ multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
[(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX, VEX_L;
}
-let isAsmParserOnly = 1 in {
- defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256,
- memopv8f32>;
- defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256,
- memopv4f64>;
-}
+defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, memopv8f32>;
+defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, memopv4f64>;
multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
@@ -107,20 +97,18 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
VEX_4VOp3;
}
-let isAsmParserOnly = 1 in {
- defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
- defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
- defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
- defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
- defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
- defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
- defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
- defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
- defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
- defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
- defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
- defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
-}
+defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
+defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
+defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
+defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
+defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
+defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
+defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
+defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
+defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
+defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
+defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
+defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
@@ -134,12 +122,10 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
(Int (bitconvert (memopv2i64 addr:$src1)), imm:$src2))]>, VEX;
}
-let isAsmParserOnly = 1 in {
- defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
- defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
- defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
- defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
-}
+defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
+defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
+defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
+defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
// Instruction where second source can be memory, but third must be register
multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
@@ -158,20 +144,18 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
VR128:$src3))]>, VEX_4V, VEX_I8IMM;
}
-let isAsmParserOnly = 1 in {
- defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>;
- defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>;
- defm VPMACSWW : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>;
- defm VPMACSWD : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>;
- defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>;
- defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>;
- defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>;
- defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>;
- defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>;
- defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>;
- defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>;
- defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
-}
+defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>;
+defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>;
+defm VPMACSWW : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>;
+defm VPMACSWD : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>;
+defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>;
+defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>;
+defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>;
+defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>;
+defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>;
+defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>;
+defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>;
+defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
// Instruction where second source can be memory, third must be imm8
multiclass xop4opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
@@ -190,16 +174,14 @@ multiclass xop4opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
imm:$src3))]>, VEX_4V;
}
-let isAsmParserOnly = 1 in {
- defm VPCOMB : xop4opimm<0xCC, "vpcomb", int_x86_xop_vpcomb>;
- defm VPCOMW : xop4opimm<0xCD, "vpcomw", int_x86_xop_vpcomw>;
- defm VPCOMD : xop4opimm<0xCE, "vpcomd", int_x86_xop_vpcomd>;
- defm VPCOMQ : xop4opimm<0xCF, "vpcomq", int_x86_xop_vpcomq>;
- defm VPCOMUB : xop4opimm<0xEC, "vpcomub", int_x86_xop_vpcomub>;
- defm VPCOMUW : xop4opimm<0xED, "vpcomuw", int_x86_xop_vpcomuw>;
- defm VPCOMUD : xop4opimm<0xEE, "vpcomud", int_x86_xop_vpcomud>;
- defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", int_x86_xop_vpcomuq>;
-}
+defm VPCOMB : xop4opimm<0xCC, "vpcomb", int_x86_xop_vpcomb>;
+defm VPCOMW : xop4opimm<0xCD, "vpcomw", int_x86_xop_vpcomw>;
+defm VPCOMD : xop4opimm<0xCE, "vpcomd", int_x86_xop_vpcomd>;
+defm VPCOMQ : xop4opimm<0xCF, "vpcomq", int_x86_xop_vpcomq>;
+defm VPCOMUB : xop4opimm<0xEC, "vpcomub", int_x86_xop_vpcomub>;
+defm VPCOMUW : xop4opimm<0xED, "vpcomuw", int_x86_xop_vpcomuw>;
+defm VPCOMUD : xop4opimm<0xEE, "vpcomud", int_x86_xop_vpcomud>;
+defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", int_x86_xop_vpcomuq>;
// Instruction where either second or third source can be memory
multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
@@ -227,10 +209,8 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
VEX_4V, VEX_I8IMM;
}
-let isAsmParserOnly = 1 in {
- defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
- defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
-}
+defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
+defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
@@ -257,9 +237,7 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
VEX_4V, VEX_I8IMM, VEX_L;
}
-let isAsmParserOnly = 1 in {
- defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
-}
+defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index fc86e1e..e99f2d9 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -127,7 +127,7 @@ extern "C" {
"movaps %xmm6, 96(%rsp)\n"
"movaps %xmm7, 112(%rsp)\n"
// JIT callee
-#ifdef _WIN64
+#if defined(_WIN64) || defined(__CYGWIN__)
"subq $32, %rsp\n"
"movq %rbp, %rcx\n" // Pass prev frame and return address
"movq 8(%rbp), %rdx\n"
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index c7c00b5..6649c82 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -17,6 +17,7 @@
#include "X86COFFMachineModuleInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -34,14 +35,12 @@ namespace {
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
class X86MCInstLower {
MCContext &Ctx;
- Mangler *Mang;
const MachineFunction &MF;
const TargetMachine &TM;
const MCAsmInfo &MAI;
X86AsmPrinter &AsmPrinter;
public:
- X86MCInstLower(Mangler *mang, const MachineFunction &MF,
- X86AsmPrinter &asmprinter);
+ X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
@@ -50,13 +49,16 @@ public:
private:
MachineModuleInfoMachO &getMachOMMI() const;
+ Mangler *getMang() const {
+ return AsmPrinter.Mang;
+ }
};
} // end anonymous namespace
-X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
+X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
X86AsmPrinter &asmprinter)
-: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
+: Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()),
MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
@@ -81,7 +83,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
isImplicitlyPrivate = true;
- Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ getMang()->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
} else if (MO.isSymbol()) {
Name += MAI.getGlobalPrefix();
Name += MO.getSymbolName();
@@ -110,7 +112,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym =
MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
@@ -124,7 +126,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym =
MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
@@ -140,7 +142,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
if (MO.isGlobal()) {
StubSym =
MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
} else {
Name.erase(Name.end()-5, Name.end());
@@ -591,18 +593,6 @@ ReSimplify:
case X86::MOVSX64rr32:
SimplifyMOVSX(OutMI);
break;
-
- case X86::MORESTACK_RET:
- OutMI.setOpcode(X86::RET);
- break;
-
- case X86::MORESTACK_RET_RESTORE_R10:
- OutMI.setOpcode(X86::MOV64rr);
- OutMI.addOperand(MCOperand::CreateReg(X86::R10));
- OutMI.addOperand(MCOperand::CreateReg(X86::RAX));
-
- AsmPrinter.OutStreamer.EmitInstruction(MCInstBuilder(X86::RET));
- break;
}
}
@@ -685,8 +675,140 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
.addExpr(tlsRef));
}
+static std::pair<StackMaps::Location, MachineInstr::const_mop_iterator>
+parseMemoryOperand(StackMaps::Location::LocationType LocTy, unsigned Size,
+ MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE) {
+
+ typedef StackMaps::Location Location;
+
+ assert(std::distance(MOI, MOE) >= 5 && "Too few operands to encode mem op.");
+
+ const MachineOperand &Base = *MOI;
+ const MachineOperand &Scale = *(++MOI);
+ const MachineOperand &Index = *(++MOI);
+ const MachineOperand &Disp = *(++MOI);
+ const MachineOperand &ZeroReg = *(++MOI);
+
+ // Sanity check for supported operand format.
+ assert(Base.isReg() &&
+ Scale.isImm() && Scale.getImm() == 1 &&
+ Index.isReg() && Index.getReg() == 0 &&
+ Disp.isImm() && ZeroReg.isReg() && (ZeroReg.getReg() == 0) &&
+ "Unsupported x86 memory operand sequence.");
+ (void)Scale;
+ (void)Index;
+ (void)ZeroReg;
+
+ return std::make_pair(
+ Location(LocTy, Size, Base.getReg(), Disp.getImm()), ++MOI);
+}
+
+std::pair<StackMaps::Location, MachineInstr::const_mop_iterator>
+X86AsmPrinter::stackmapOperandParser(MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE,
+ const TargetMachine &TM) {
+
+ typedef StackMaps::Location Location;
+
+ const MachineOperand &MOP = *MOI;
+ assert(!MOP.isRegMask() && (!MOP.isReg() || !MOP.isImplicit()) &&
+ "Register mask and implicit operands should not be processed.");
+
+ if (MOP.isImm()) {
+ // Verify anyregcc
+ // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
+
+ switch (MOP.getImm()) {
+ default: llvm_unreachable("Unrecognized operand type.");
+ case StackMaps::DirectMemRefOp: {
+ unsigned Size = TM.getDataLayout()->getPointerSizeInBits();
+ assert((Size % 8) == 0 && "Need pointer size in bytes.");
+ Size /= 8;
+ return parseMemoryOperand(StackMaps::Location::Direct, Size,
+ llvm::next(MOI), MOE);
+ }
+ case StackMaps::IndirectMemRefOp: {
+ ++MOI;
+ int64_t Size = MOI->getImm();
+ assert(Size > 0 && "Need a valid size for indirect memory locations.");
+ return parseMemoryOperand(StackMaps::Location::Indirect, Size,
+ llvm::next(MOI), MOE);
+ }
+ case StackMaps::ConstantOp: {
+ ++MOI;
+ assert(MOI->isImm() && "Expected constant operand.");
+ int64_t Imm = MOI->getImm();
+ return std::make_pair(
+ Location(Location::Constant, sizeof(int64_t), 0, Imm), ++MOI);
+ }
+ }
+ }
+
+ // Otherwise this is a reg operand. The physical register number will
+ // ultimately be encoded as a DWARF regno. The stack map also records the size
+ // of a spill slot that can hold the register content. (The runtime can
+ // track the actual size of the data type if it needs to.)
+ assert(MOP.isReg() && "Expected register operand here.");
+ assert(TargetRegisterInfo::isPhysicalRegister(MOP.getReg()) &&
+ "Virtreg operands should have been rewritten before now.");
+ const TargetRegisterClass *RC =
+ TM.getRegisterInfo()->getMinimalPhysRegClass(MOP.getReg());
+ assert(!MOP.getSubReg() && "Physical subreg still around.");
+ return std::make_pair(
+ Location(Location::Register, RC->getSize(), MOP.getReg(), 0), ++MOI);
+}
+
+// Lower a stackmap of the form:
+// <id>, <shadowBytes>, ...
+static void LowerSTACKMAP(MCStreamer &OutStreamer,
+ StackMaps &SM,
+ const MachineInstr &MI)
+{
+ unsigned NumNOPBytes = MI.getOperand(1).getImm();
+ SM.recordStackMap(MI);
+ // Emit padding.
+ // FIXME: These nops ensure that the stackmap's shadow is covered by
+ // instructions from the same basic block, but the nops should not be
+ // necessary if instructions from the same block follow the stackmap.
+ for (unsigned i = 0; i < NumNOPBytes; ++i)
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::NOOP));
+}
+
+// Lower a patchpoint of the form:
+// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
+static void LowerPATCHPOINT(MCStreamer &OutStreamer,
+ StackMaps &SM,
+ const MachineInstr &MI) {
+ SM.recordPatchPoint(MI);
+
+ PatchPointOpers opers(&MI);
+ unsigned ScratchIdx = opers.getNextScratchIdx();
+ unsigned EncodedBytes = 0;
+ int64_t CallTarget = opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
+ if (CallTarget) {
+ // Emit MOV to materialize the target address and the CALL to target.
+ // This is encoded with 12-13 bytes, depending on which register is used.
+ // We conservatively assume that it is 12 bytes and emit in worst case one
+ // extra NOP byte.
+ EncodedBytes = 12;
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::MOV64ri)
+ .addReg(MI.getOperand(ScratchIdx).getReg())
+ .addImm(CallTarget));
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::CALL64r)
+ .addReg(MI.getOperand(ScratchIdx).getReg()));
+ }
+ // Emit padding.
+ unsigned NumBytes = opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
+ assert(NumBytes >= EncodedBytes &&
+ "Patchpoint can't request size less than the length of a call.");
+
+ for (unsigned i = EncodedBytes; i < NumBytes; ++i)
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::NOOP));
+}
+
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
- X86MCInstLower MCInstLowering(Mang, *MF, *this);
+ X86MCInstLower MCInstLowering(*MF, *this);
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
@@ -774,6 +896,24 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(DotExpr));
return;
}
+
+ case TargetOpcode::STACKMAP:
+ return LowerSTACKMAP(OutStreamer, SM, *MI);
+
+ case TargetOpcode::PATCHPOINT:
+ return LowerPATCHPOINT(OutStreamer, SM, *MI);
+
+ case X86::MORESTACK_RET:
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::RET));
+ return;
+
+ case X86::MORESTACK_RET_RESTORE_R10:
+ // Return, then restore R10.
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::RET));
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::MOV64rr)
+ .addReg(X86::R10)
+ .addReg(X86::RAX));
+ return;
}
MCInst TmpInst;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 0923310..dbda556 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -101,8 +101,8 @@ int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
bool
X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- // Only enable when post-RA scheduling is enabled and this is needed.
- return TM.getSubtargetImpl()->postRAScheduler();
+ // ExeDepsFixer and PostRAScheduler require liveness.
+ return true;
}
int
@@ -239,6 +239,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
case CallingConv::HiPE:
return CSR_NoRegs_SaveList;
+ case CallingConv::WebKit_JS:
+ return CSR_64_SaveList;
+ case CallingConv::AnyReg:
+ return CSR_MostRegs_64_SaveList;
+
case CallingConv::Intel_OCL_BI: {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
@@ -296,6 +301,8 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
}
if (CC == CallingConv::GHC || CC == CallingConv::HiPE)
return CSR_NoRegs_RegMask;
+ if (CC == CallingConv::WebKit_JS || CC == CallingConv::AnyReg)
+ return CSR_MostRegs_64_RegMask;
if (!Is64Bit)
return CSR_32_RegMask;
if (CC == CallingConv::Cold)
@@ -512,14 +519,6 @@ unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
-unsigned X86RegisterInfo::getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
-}
-
-unsigned X86RegisterInfo::getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
-}
-
namespace llvm {
unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
bool High) {
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index fb17682..22251b2 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -126,10 +126,6 @@ public:
unsigned getBaseRegister() const { return BasePtr; }
// FIXME: Move to FrameInfok
unsigned getSlotSize() const { return SlotSize; }
-
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
};
// getX86SubSuperRegister - X86 utility function. It returns the sub or super
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 62ba2bc..9748261 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -19,6 +19,10 @@ def HaswellModel : SchedMachineModel {
let MicroOpBufferSize = 192; // Based on the reorder buffer.
let LoadLatency = 4;
let MispredictPenalty = 16;
+
+ // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
+ // the scheduler to assign a default model to unrecognized opcodes.
+ let CompleteModel = 0;
}
let SchedModel = HaswellModel in {
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index 52ead94..3011c6d 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -20,6 +20,10 @@ def SandyBridgeModel : SchedMachineModel {
let MicroOpBufferSize = 168; // Based on the reorder buffer.
let LoadLatency = 4;
let MispredictPenalty = 16;
+
+ // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
+ // the scheduler to assign a default model to unrecognized opcodes.
+ let CompleteModel = 0;
}
let SchedModel = SandyBridgeModel in {
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index ceb2e05..0556437 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -141,9 +141,12 @@ def IIC_IDIV64 : InstrItinClass;
// neg/not/inc/dec
def IIC_UNARY_REG : InstrItinClass;
def IIC_UNARY_MEM : InstrItinClass;
-// add/sub/and/or/xor/adc/sbc/cmp/test
+// add/sub/and/or/xor/sbc/cmp/test
def IIC_BIN_MEM : InstrItinClass;
def IIC_BIN_NONMEM : InstrItinClass;
+// adc/sbc
+def IIC_BIN_CARRY_MEM : InstrItinClass;
+def IIC_BIN_CARRY_NONMEM : InstrItinClass;
// shift/rotate
def IIC_SR : InstrItinClass;
// shift double
@@ -250,11 +253,11 @@ def IIC_SSE_INTSH_P_RR : InstrItinClass;
def IIC_SSE_INTSH_P_RM : InstrItinClass;
def IIC_SSE_INTSH_P_RI : InstrItinClass;
-def IIC_SSE_CMPP_RR : InstrItinClass;
-def IIC_SSE_CMPP_RM : InstrItinClass;
+def IIC_SSE_INTSHDQ_P_RI : InstrItinClass;
def IIC_SSE_SHUFP : InstrItinClass;
-def IIC_SSE_PSHUF : InstrItinClass;
+def IIC_SSE_PSHUF_RI : InstrItinClass;
+def IIC_SSE_PSHUF_MI : InstrItinClass;
def IIC_SSE_UNPCK : InstrItinClass;
@@ -316,7 +319,8 @@ def IIC_SSE_PSIGN_RM : InstrItinClass;
def IIC_SSE_PMADD : InstrItinClass;
def IIC_SSE_PMULHRSW : InstrItinClass;
-def IIC_SSE_PALIGNR : InstrItinClass;
+def IIC_SSE_PALIGNRR : InstrItinClass;
+def IIC_SSE_PALIGNRM : InstrItinClass;
def IIC_SSE_MWAIT : InstrItinClass;
def IIC_SSE_MONITOR : InstrItinClass;
@@ -492,8 +496,8 @@ def IIC_PUSH_REG : InstrItinClass;
def IIC_PUSH_F : InstrItinClass;
def IIC_PUSH_A : InstrItinClass;
def IIC_BSWAP : InstrItinClass;
-def IIC_BSF : InstrItinClass;
-def IIC_BSR : InstrItinClass;
+def IIC_BIT_SCAN_MEM : InstrItinClass;
+def IIC_BIT_SCAN_REG : InstrItinClass;
def IIC_MOVS : InstrItinClass;
def IIC_STOS : InstrItinClass;
def IIC_SCAS : InstrItinClass;
@@ -540,6 +544,33 @@ def IIC_BOUND : InstrItinClass;
def IIC_ARPL_REG : InstrItinClass;
def IIC_ARPL_MEM : InstrItinClass;
def IIC_MOVBE : InstrItinClass;
+def IIC_AES : InstrItinClass;
+def IIC_BLEND_MEM : InstrItinClass;
+def IIC_BLEND_NOMEM : InstrItinClass;
+def IIC_CBW : InstrItinClass;
+def IIC_CRC32_REG : InstrItinClass;
+def IIC_CRC32_MEM : InstrItinClass;
+def IIC_SSE_DPPD_RR : InstrItinClass;
+def IIC_SSE_DPPD_RM : InstrItinClass;
+def IIC_SSE_DPPS_RR : InstrItinClass;
+def IIC_SSE_DPPS_RM : InstrItinClass;
+def IIC_MMX_EMMS : InstrItinClass;
+def IIC_SSE_EXTRACTPS_RR : InstrItinClass;
+def IIC_SSE_EXTRACTPS_RM : InstrItinClass;
+def IIC_SSE_INSERTPS_RR : InstrItinClass;
+def IIC_SSE_INSERTPS_RM : InstrItinClass;
+def IIC_SSE_MPSADBW_RR : InstrItinClass;
+def IIC_SSE_MPSADBW_RM : InstrItinClass;
+def IIC_SSE_PMULLD_RR : InstrItinClass;
+def IIC_SSE_PMULLD_RM : InstrItinClass;
+def IIC_SSE_ROUNDPS_REG : InstrItinClass;
+def IIC_SSE_ROUNDPS_MEM : InstrItinClass;
+def IIC_SSE_ROUNDPD_REG : InstrItinClass;
+def IIC_SSE_ROUNDPD_MEM : InstrItinClass;
+def IIC_SSE_POPCNT_RR : InstrItinClass;
+def IIC_SSE_POPCNT_RM : InstrItinClass;
+def IIC_SSE_PCLMULQDQ_RR : InstrItinClass;
+def IIC_SSE_PCLMULQDQ_RM : InstrItinClass;
def IIC_NOP : InstrItinClass;
@@ -561,7 +592,7 @@ def IIC_NOP : InstrItinClass;
// latencies. Since these latencies are not used for pipeline hazards,
// they do not need to be exact.
//
-// The GenericModel contains no instruciton itineraries.
+// The GenericModel contains no instruction itineraries.
def GenericModel : SchedMachineModel {
let IssueWidth = 4;
let MicroOpBufferSize = 32;
@@ -572,3 +603,4 @@ def GenericModel : SchedMachineModel {
include "X86ScheduleAtom.td"
include "X86SchedSandyBridge.td"
include "X86SchedHaswell.td"
+include "X86ScheduleSLM.td"
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 14a1471..ba72f29 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the itinerary class data for the Intel Atom (Bonnell)
-// processors.
+// This file defines the itinerary class data for the Intel Atom
+// in order (Saltwell-32nm/Bonnell-45nm) processors.
//
//===----------------------------------------------------------------------===//
@@ -79,9 +79,12 @@ def AtomItineraries : ProcessorItineraries<
// neg/not/inc/dec
InstrItinData<IIC_UNARY_REG, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [Port0]>] >,
- // add/sub/and/or/xor/adc/sbc/cmp/test
+ // add/sub/and/or/xor/cmp/test
InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_BIN_MEM, [InstrStage<1, [Port0]>] >,
+ // adc/sbc
+ InstrItinData<IIC_BIN_CARRY_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_BIN_CARRY_MEM, [InstrStage<1, [Port0]>] >,
// shift/rotate
InstrItinData<IIC_SR, [InstrStage<1, [Port0]>] >,
// shift double
@@ -203,11 +206,11 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<3, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CMPP_RR, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CMPP_RM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_INTSHDQ_P_RI, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_PSHUF, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,
@@ -278,7 +281,8 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [Port0]>] >,
InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_SSE_PALIGNR, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PALIGNRR, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PALIGNRM, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_SSE_MWAIT, [InstrStage<46, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_MONITOR, [InstrStage<45, [Port0, Port1]>] >,
@@ -470,8 +474,8 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_PUSH_A, [InstrStage<8, [Port0, Port1]>] >,
InstrItinData<IIC_BSWAP, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_BSF, [InstrStage<16, [Port0, Port1]>] >,
- InstrItinData<IIC_BSR, [InstrStage<16, [Port0, Port1]>] >,
+ InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<16, [Port0, Port1]>] >,
+ InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<16, [Port0, Port1]>] >,
InstrItinData<IIC_MOVS, [InstrStage<3, [Port0, Port1]>] >,
InstrItinData<IIC_STOS, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_SCAS, [InstrStage<2, [Port0, Port1]>] >,
@@ -518,6 +522,8 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_ARPL_REG, [InstrStage<24, [Port0, Port1]>] >,
InstrItinData<IIC_ARPL_MEM, [InstrStage<23, [Port0, Port1]>] >,
InstrItinData<IIC_MOVBE, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_CBW, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_EMMS, [InstrStage<5, [Port0, Port1]>] >,
InstrItinData<IIC_NOP, [InstrStage<1, [Port0, Port1]>] >
]>;
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
new file mode 100644
index 0000000..6c2a304
--- /dev/null
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -0,0 +1,668 @@
+//===- X86ScheduleSLM.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Intel Atom
+// (Silvermont) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def IEC_RSV0 : FuncUnit;
+def IEC_RSV1 : FuncUnit;
+def FPC_RSV0 : FuncUnit;
+def FPC_RSV1 : FuncUnit;
+def MEC_RSV : FuncUnit;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def SLMItineraries : ProcessorItineraries<
+ [ IEC_RSV0, IEC_RSV1, FPC_RSV0, FPC_RSV1, MEC_RSV ],
+ [], [
+ // [InstrStage<N, [FPC_RSV0, FPC_RSV1]>]
+ // [InstrStage<N, [FPC_RSV0, FPC_RSV1], 0>, InstrStage<N, [MEC_RSV]>]
+ // [InstrStage<N, [IEC_RSV0, IEC_RSV1]>]
+ // [InstrStage<N, [IEC_RSV0, IEC_RSV1], 0>,InstrStage<N,[MEC_RSV]>]
+ //
+ // Default is 1 cycle, IEC_RSV0 or IEC_RSV1
+ //InstrItinData<IIC_DEFAULT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_ALU_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LEA, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LEA_16, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // mul
+ InstrItinData<IIC_MUL8, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MUL16_MEM, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_MUL16_REG, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MUL32_MEM, [InstrStage<3, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_MUL32_REG, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MUL64, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ // imul by al, ax, eax, rax
+ InstrItinData<IIC_IMUL8, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL16_MEM, [InstrStage<6, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<6, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL16_REG, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL32_MEM, [InstrStage<6, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<6, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL32_REG, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL64, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ // imul reg by reg|mem
+ InstrItinData<IIC_IMUL16_RM, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL16_RR, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL32_RM, [InstrStage<3, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL32_RR, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL64_RM, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL64_RR, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ // imul reg = reg/mem * imm
+ InstrItinData<IIC_IMUL16_RRI, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL32_RRI, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL64_RRI, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL16_RMI, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL32_RMI, [InstrStage<3, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL64_RMI, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ // idiv - min latency
+ InstrItinData<IIC_IDIV8, [InstrStage<34, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IDIV16, [InstrStage<35, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IDIV32, [InstrStage<35, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IDIV64, [InstrStage<49, [IEC_RSV0, IEC_RSV1]>] >,
+ // div - min latency
+ InstrItinData<IIC_DIV8_REG, [InstrStage<25, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_DIV8_MEM, [InstrStage<25, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<25, [MEC_RSV]>] >,
+ InstrItinData<IIC_DIV16, [InstrStage<26, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_DIV32, [InstrStage<26, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_DIV64, [InstrStage<38, [IEC_RSV0, IEC_RSV1]>] >,
+ // neg/not/inc/dec
+ InstrItinData<IIC_UNARY_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ // add/sub/and/or/xor/adc/sbc/cmp/test
+ InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BIN_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ // adc/sbb
+ InstrItinData<IIC_BIN_CARRY_NONMEM, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BIN_CARRY_MEM, [InstrStage<2, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ // shift/rotate
+ InstrItinData<IIC_SR, [InstrStage<1, [IEC_RSV0], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ // shift double
+ InstrItinData<IIC_SHD16_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,
+ InstrItinData<IIC_SHD16_REG_CL, [InstrStage<4, [IEC_RSV0]>] >,
+ InstrItinData<IIC_SHD16_MEM_IM, [InstrStage<2, [IEC_RSV0], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_SHD16_MEM_CL, [InstrStage<4, [IEC_RSV0], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SHD32_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,
+ InstrItinData<IIC_SHD32_REG_CL, [InstrStage<4, [IEC_RSV0]>] >,
+ InstrItinData<IIC_SHD32_MEM_IM, [InstrStage<2, [IEC_RSV0], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_SHD32_MEM_CL, [InstrStage<4, [IEC_RSV0], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SHD64_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,
+ InstrItinData<IIC_SHD64_REG_CL, [InstrStage<4, [IEC_RSV0]>] >,
+ InstrItinData<IIC_SHD64_MEM_IM, [InstrStage<2, [IEC_RSV0], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_SHD64_MEM_CL, [InstrStage<4, [IEC_RSV0], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ // cmov
+ InstrItinData<IIC_CMOV16_RM, [InstrStage<2, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_CMOV16_RR, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMOV32_RM, [InstrStage<2, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_CMOV32_RR, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMOV64_RM, [InstrStage<2, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_CMOV64_RR, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>] >,
+ // set
+ InstrItinData<IIC_SET_M, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SET_R, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // jcc
+ InstrItinData<IIC_Jcc, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // jcxz/jecxz/jrcxz
+ InstrItinData<IIC_JCXZ, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // jmp rel
+ InstrItinData<IIC_JMP_REL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // jmp indirect
+ InstrItinData<IIC_JMP_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_JMP_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ // jmp far
+ InstrItinData<IIC_JMP_FAR_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_JMP_FAR_PTR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // loop/loope/loopne
+ InstrItinData<IIC_LOOP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LOOPE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LOOPNE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // call - all but reg/imm
+ InstrItinData<IIC_CALL_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CALL_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_CALL_FAR_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_CALL_FAR_PTR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ //ret
+ InstrItinData<IIC_RET, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_RET_IMM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ //sign extension movs
+ InstrItinData<IIC_MOVSX, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOVSX_R16_R8, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOVSX_R16_M8, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_MOVSX_R16_R16, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOVSX_R32_R32, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ //zero extension movs
+ InstrItinData<IIC_MOVZX, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_REP_MOVS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_REP_STOS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ // SSE binary operations
+ // arithmetic fp scalar
+ InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<3, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_ALU_F32S_RM, [InstrStage<3, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_ALU_F64S_RR, [InstrStage<3, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_ALU_F64S_RM, [InstrStage<3, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MUL_F32S_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MUL_F32S_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MUL_F64S_RR, [InstrStage<2, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MUL_F64S_RM, [InstrStage<2, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_DIV_F32S_RR, [InstrStage<13, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_DIV_F32S_RM, [InstrStage<13, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<13, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_DIV_F64S_RR, [InstrStage<13, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_DIV_F64S_RM, [InstrStage<13, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<13, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_COMIS_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_COMIS_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_HADDSUB_RR, [InstrStage<6, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_HADDSUB_RM, [InstrStage<6, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<6, [MEC_RSV]>] >,
+
+ // arithmetic fp parallel
+ InstrItinData<IIC_SSE_ALU_F32P_RR, [InstrStage<3, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_ALU_F32P_RM, [InstrStage<3, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_ALU_F64P_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_ALU_F64P_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MUL_F32P_RR, [InstrStage<2, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MUL_F32P_RM, [InstrStage<2, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MUL_F64P_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MUL_F64P_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_DIV_F32P_RR, [InstrStage<27, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_DIV_F32P_RM, [InstrStage<27, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<27, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_DIV_F64P_RR, [InstrStage<27, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_DIV_F64P_RM, [InstrStage<27, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<27, [MEC_RSV]>] >,
+
+ // bitwise parallel
+ InstrItinData<IIC_SSE_BIT_P_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_BIT_P_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ // arithmetic int parallel
+ InstrItinData<IIC_SSE_INTALU_P_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_INTALU_P_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_INTALUQ_P_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_INTALUQ_P_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+
+ // multiply int parallel
+ InstrItinData<IIC_SSE_INTMUL_P_RR, [InstrStage<5, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_INTMUL_P_RM, [InstrStage<5, [FPC_RSV0], 0>,
+ InstrStage<5, [MEC_RSV]>] >,
+
+ // shift parallel
+ InstrItinData<IIC_SSE_INTSH_P_RR, [InstrStage<2, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<2, [FPC_RSV0], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [FPC_RSV0]>] >,
+
+ InstrItinData<IIC_SSE_INTSHDQ_P_RI, [InstrStage<1, [FPC_RSV0]>] >,
+
+ InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [FPC_RSV0], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [FPC_RSV0]>] >,
+
+ InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<26, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_SQRTPS_RM, [InstrStage<26, [FPC_RSV0], 0>,
+ InstrStage<26, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_SQRTSS_RR, [InstrStage<13, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_SQRTSS_RM, [InstrStage<13, [FPC_RSV0], 0>,
+ InstrStage<13, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_SQRTPD_RR, [InstrStage<26, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_SQRTPD_RM, [InstrStage<26, [FPC_RSV0], 0>,
+ InstrStage<26, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_SQRTSD_RR, [InstrStage<13, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_SQRTSD_RM, [InstrStage<13, [FPC_RSV0], 0>,
+ InstrStage<13, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<9, [FPC_RSV0], 0>,
+ InstrStage<9, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_RCPS_RM, [InstrStage<4, [FPC_RSV0], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_MOVMSK, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MASKMOV, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_PEXTRW, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PINSRW, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_PABS_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PABS_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_MOV_S_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MOV_S_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MOV_S_MR, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_MOVA_P_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MOVA_P_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MOVA_P_MR, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_MOVU_P_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MOVU_P_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MOVU_P_MR, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_MOV_LH, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_LDDQU, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_MOVDQ, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MOVD_ToGP, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MOVQ_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_MOVNT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_PREFETCH, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PAUSE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_LFENCE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MFENCE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_SFENCE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_LDMXCSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_STMXCSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_PHADDSUBD_RR, [InstrStage<6, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBD_RM, [InstrStage<6, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<6, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBSW_RR, [InstrStage<9, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBSW_RM, [InstrStage<9, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<9, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBW_RR, [InstrStage<9, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<9, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBW_RM, [InstrStage<9, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<9, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PSHUFB_RR, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PSHUFB_RM, [InstrStage<5, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<5, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PSIGN_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PSIGN_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PALIGNRR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PALIGNRM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MWAIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MONITOR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ // conversions
+ // to/from PD ...
+ InstrItinData<IIC_SSE_CVT_PD_RR, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_CVT_PD_RM, [InstrStage<5, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<5, [MEC_RSV]>] >,
+ // to/from PS except to/from PD and PS2PI
+ InstrItinData<IIC_SSE_CVT_PS_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_CVT_PS_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_CVT_Scalar_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_CVT_Scalar_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI32_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI32_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+
+ // MMX MOVs
+ InstrItinData<IIC_MMX_MOV_MM_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_MOV_REG_MM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_MOVQ_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_MOVQ_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // other MMX
+ InstrItinData<IIC_MMX_ALU_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_ALU_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_ALUQ_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_ALUQ_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBW_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBW_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBD_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBD_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PMUL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_MISC_FUNC_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_MISC_FUNC_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PSADBW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_SHIFT_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_SHIFT_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_SHIFT_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_UNPCK_H_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_UNPCK_H_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_UNPCK_L, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PCK_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PCK_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PSHUF, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PEXTR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PINSRW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_MASKMOV, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // conversions
+ // from/to PD
+ InstrItinData<IIC_MMX_CVT_PD_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_CVT_PD_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // from/to PI
+ InstrItinData<IIC_MMX_CVT_PS_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_CVT_PS_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_CMPX_LOCK, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_FILD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FLD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FLD80, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_FST, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FST80, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FIST, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_FLDZ, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FUCOM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FUCOMI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FCOMI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FNSTSW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FNSTCW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FLDCW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FNINIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FFREE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FNCLEX, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_WAIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FXAM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FNOP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FLDL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_F2XM1, [InstrStage<88, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FYL2X, [InstrStage<296, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FPTAN, [InstrStage<281, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FPATAN, [InstrStage<296, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FXTRACT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FPREM1, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FPSTP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FPREM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FYL2XP1, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FSINCOS, [InstrStage<281, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FRNDINT, [InstrStage<25, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FSCALE, [InstrStage<74, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FCOMPP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FXSAVE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FXRSTOR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FXCH, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+
+ // System instructions
+ InstrItinData<IIC_CPUID, [InstrStage<60, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_INT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_INT3, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_INVD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_INVLPG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IRET, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_HLT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LXS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LTR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_RDTSC, [InstrStage<30, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_RSM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SIDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SGDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SLDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_STR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SWAPGS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SYSCALL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SYS_ENTER_EXIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_IN_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IN_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_OUT_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_OUT_IR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_INS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_MOV_REG_DR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV_DR_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // worst case for mov REG_CRx
+ InstrItinData<IIC_MOV_REG_CR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV_CR_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_MOV_REG_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV_MEM_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV_SR_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV_SR_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // LAR
+ InstrItinData<IIC_LAR_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LAR_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // LSL
+ InstrItinData<IIC_LSL_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LSL_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_LGDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LIDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LLDT_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LLDT_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // push control register, segment registers
+ InstrItinData<IIC_PUSH_CS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_PUSH_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // pop control register, segment registers
+ InstrItinData<IIC_POP_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_POP_SR_SS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // VERR, VERW
+ InstrItinData<IIC_VERR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_VERW_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_VERW_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // WRMSR, RDMSR
+ InstrItinData<IIC_WRMSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_RDMSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_RDPMC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // SMSW, LMSW
+ InstrItinData<IIC_SMSW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LMSW_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LMSW_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_ENTER, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LEAVE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_POP_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_POP_REG16, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_POP_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_POP_F, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_POP_FD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_POP_A, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_PUSH_IMM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_PUSH_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_PUSH_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_PUSH_F, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_PUSH_A, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_BSWAP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<10, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<10, [MEC_RSV]>] >,
+ InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<10, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOVS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_STOS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SCAS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_AHF, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BT_MI, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_BT_MR, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_BT_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BT_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BTX_MI, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_BTX_MR, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_BTX_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BTX_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_XCHG_REG, [InstrStage<5, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_XCHG_MEM, [InstrStage<5, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<5, [MEC_RSV]>] >,
+ InstrItinData<IIC_XADD_REG, [InstrStage<5, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_XADD_MEM, [InstrStage<5, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<5, [MEC_RSV]>] >,
+ InstrItinData<IIC_CMPXCHG_MEM, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPXCHG_REG, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPXCHG_MEM8, [InstrStage<6, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<6, [MEC_RSV]>] >,
+ InstrItinData<IIC_CMPXCHG_REG8, [InstrStage<6, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<6, [MEC_RSV]>] >,
+ InstrItinData<IIC_CMPXCHG_8B, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPXCHG_16B, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LODS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_OUTS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CLC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CLD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CLI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CLTS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_STC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_STI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_STD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_XLAT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_AAA, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_AAD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_AAM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_AAS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_DAA, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_DAS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BOUND, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_ARPL_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_ARPL_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_MOVBE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_AES, [InstrStage<8, [FPC_RSV0]>] >,
+ InstrItinData<IIC_BLEND_NOMEM, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_BLEND_MEM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<10, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<10, [MEC_RSV]>] >,
+ InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<10, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CBW, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CRC32_REG, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CRC32_MEM, [InstrStage<3, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_DPPD_RR, [InstrStage<12, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_DPPD_RM, [InstrStage<12, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<12, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_DPPS_RR, [InstrStage<15, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_DPPS_RM, [InstrStage<15, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<15, [MEC_RSV]>] >,
+ InstrItinData<IIC_MMX_EMMS, [InstrStage<10, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_EXTRACTPS_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_EXTRACTPS_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_INSERTPS_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_INSERTPS_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MPSADBW_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MPSADBW_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PMULLD_RR, [InstrStage<11, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PMULLD_RM, [InstrStage<11, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<11, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_ROUNDPS_REG, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_ROUNDPS_MEM, [InstrStage<5, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<5, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_ROUNDPD_REG, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_ROUNDPD_MEM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_POPCNT_RR, [InstrStage<4, [IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_POPCNT_RM, [InstrStage<4, [IEC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PCLMULQDQ_RR, [InstrStage<10, [IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PCLMULQDQ_RM, [InstrStage<10, [IEC_RSV1], 0>,
+ InstrStage<10, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_NOP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >
+ ]>;
+
+// Silvermont machine model.
+def SLMModel : SchedMachineModel {
+ let IssueWidth = 2; // Allows 2 instructions per scheduling group.
+ let MinLatency = 1; // InstrStage cycles overrides MinLatency.
+ // OperandCycles may be used for expected latency.
+ let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
+ let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+
+ let Itineraries = SLMItineraries;
+}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index d1db79f..b9c620f 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -46,8 +46,6 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
!ConstantSize ||
ConstantSize->getZExtValue() >
Subtarget->getMaxInlineSizeThreshold()) {
- SDValue InFlag(0, 0);
-
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index fae90f2..01353b2 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -276,20 +276,29 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
(Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
(Family == 6 && Model == 0x2A) || // SandyBridge
(Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
- (Family == 6 && Model == 0x3A))) {// IvyBridge
+ (Family == 6 && Model == 0x3A) || // IvyBridge
+ (Family == 6 && Model == 0x3E) || // IvyBridge EP
+ (Family == 6 && Model == 0x3C) || // Haswell
+ (Family == 6 && Model == 0x3F) || // ...
+ (Family == 6 && Model == 0x45) || // ...
+ (Family == 6 && Model == 0x46))) { // ...
IsUAMemFast = true;
ToggleFeature(X86::FeatureFastUAMem);
}
- // Set processor type. Currently only Atom is detected.
+ // Set processor type. Currently only Atom or Silvermont (SLM) is detected.
if (Family == 6 &&
- (Model == 28 || Model == 38 || Model == 39
- || Model == 53 || Model == 54)) {
+ (Model == 28 || Model == 38 || Model == 39 ||
+ Model == 53 || Model == 54)) {
X86ProcFamily = IntelAtom;
UseLeaForSP = true;
ToggleFeature(X86::FeatureLeaForSP);
}
+ else if (Family == 6 &&
+ (Model == 55 || Model == 74 || Model == 77)) {
+ X86ProcFamily = IntelSLM;
+ }
unsigned MaxExtLevel;
X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
@@ -351,14 +360,38 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasRTM = true;
ToggleFeature(X86::FeatureRTM);
}
- if (IsIntel && ((EBX >> 19) & 0x1)) {
- HasADX = true;
- ToggleFeature(X86::FeatureADX);
+ if (IsIntel && ((EBX >> 16) & 0x1)) {
+ X86SSELevel = AVX512F;
+ ToggleFeature(X86::FeatureAVX512);
}
if (IsIntel && ((EBX >> 18) & 0x1)) {
HasRDSEED = true;
ToggleFeature(X86::FeatureRDSEED);
}
+ if (IsIntel && ((EBX >> 19) & 0x1)) {
+ HasADX = true;
+ ToggleFeature(X86::FeatureADX);
+ }
+ if (IsIntel && ((EBX >> 26) & 0x1)) {
+ HasPFI = true;
+ ToggleFeature(X86::FeaturePFI);
+ }
+ if (IsIntel && ((EBX >> 27) & 0x1)) {
+ HasERI = true;
+ ToggleFeature(X86::FeatureERI);
+ }
+ if (IsIntel && ((EBX >> 28) & 0x1)) {
+ HasCDI = true;
+ ToggleFeature(X86::FeatureCDI);
+ }
+ if (IsIntel && ((EBX >> 29) & 0x1)) {
+ HasSHA = true;
+ ToggleFeature(X86::FeatureSHA);
+ }
+ }
+ if (IsAMD && ((ECX >> 21) & 0x1)) {
+ HasTBM = true;
+ ToggleFeature(X86::FeatureTBM);
}
}
}
@@ -416,8 +449,8 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Make sure 64-bit features are available in 64-bit mode.
if (In64BitMode) {
- HasX86_64 = true; ToggleFeature(X86::Feature64Bit);
- HasCMov = true; ToggleFeature(X86::FeatureCMOV);
+ if (!HasX86_64) { HasX86_64 = true; ToggleFeature(X86::Feature64Bit); }
+ if (!HasCMov) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); }
if (X86SSELevel < SSE2) {
X86SSELevel = SSE2;
@@ -429,9 +462,9 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// CPUName may have been set by the CPU detection code. Make sure the
// new MCSchedModel is used.
- InitMCProcessorInfo(CPUName, FS);
+ InitCPUSchedModel(CPUName);
- if (X86ProcFamily == IntelAtom)
+ if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM)
PostRAScheduler = true;
InstrItins = getInstrItineraryForCPU(CPUName);
@@ -468,6 +501,7 @@ void X86Subtarget::initializeEnvironment() {
HasFMA = false;
HasFMA4 = false;
HasXOP = false;
+ HasTBM = false;
HasMOVBE = false;
HasRDRAND = false;
HasF16C = false;
@@ -479,8 +513,9 @@ void X86Subtarget::initializeEnvironment() {
HasHLE = false;
HasERI = false;
HasCDI = false;
- HasPFI=false;
+ HasPFI = false;
HasADX = false;
+ HasSHA = false;
HasPRFCHW = false;
HasRDSEED = false;
IsBTMemSlow = false;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 8793238..dd8c081 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -42,7 +42,7 @@ enum Style {
class X86Subtarget : public X86GenSubtargetInfo {
protected:
enum X86SSEEnum {
- NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
+ NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
};
enum X863DNowEnum {
@@ -50,7 +50,7 @@ protected:
};
enum X86ProcFamilyEnum {
- Others, IntelAtom
+ Others, IntelAtom, IntelSLM
};
/// X86ProcFamily - X86 processor family: Intel Atom, and others
@@ -97,6 +97,9 @@ protected:
/// HasXOP - Target has XOP instructions
bool HasXOP;
+ /// HasTBM - Target has TBM instructions.
+ bool HasTBM;
+
/// HasMOVBE - True if the processor has the MOVBE instruction.
bool HasMOVBE;
@@ -127,6 +130,9 @@ protected:
/// HasADX - Processor has ADX instructions.
bool HasADX;
+ /// HasSHA - Processor has SHA instructions.
+ bool HasSHA;
+
/// HasPRFCHW - Processor has PRFCHW instructions.
bool HasPRFCHW;
@@ -258,7 +264,7 @@ public:
bool hasSSE42() const { return X86SSELevel >= SSE42; }
bool hasAVX() const { return X86SSELevel >= AVX; }
bool hasAVX2() const { return X86SSELevel >= AVX2; }
- bool hasAVX512() const { return X86SSELevel >= AVX512; }
+ bool hasAVX512() const { return X86SSELevel >= AVX512F; }
bool hasFp256() const { return hasAVX(); }
bool hasInt256() const { return hasAVX2(); }
bool hasSSE4A() const { return HasSSE4A; }
@@ -271,6 +277,7 @@ public:
// FIXME: Favor FMA when both are enabled. Is this the right thing to do?
bool hasFMA4() const { return HasFMA4 && !HasFMA; }
bool hasXOP() const { return HasXOP; }
+ bool hasTBM() const { return HasTBM; }
bool hasMOVBE() const { return HasMOVBE; }
bool hasRDRAND() const { return HasRDRAND; }
bool hasF16C() const { return HasF16C; }
@@ -281,6 +288,7 @@ public:
bool hasRTM() const { return HasRTM; }
bool hasHLE() const { return HasHLE; }
bool hasADX() const { return HasADX; }
+ bool hasSHA() const { return HasSHA; }
bool hasPRFCHW() const { return HasPRFCHW; }
bool hasRDSEED() const { return HasRDSEED; }
bool isBTMemSlow() const { return IsBTMemSlow; }
@@ -311,10 +319,8 @@ public:
return (TargetTriple.getEnvironment() == Triple::ELF ||
TargetTriple.isOSBinFormatELF());
}
- bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
- bool isTargetNaCl() const {
- return TargetTriple.getOS() == Triple::NaCl;
- }
+ bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+ bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
@@ -327,15 +333,14 @@ public:
}
bool isTargetEnvMacho() const { return TargetTriple.isEnvironmentMachO(); }
+ bool isOSWindows() const { return TargetTriple.isOSWindows(); }
+
bool isTargetWin64() const {
- // FIXME: x86_64-cygwin has not been released yet.
return In64BitMode && TargetTriple.isOSWindows();
}
bool isTargetWin32() const {
- // FIXME: Cygwin is included for isTargetWin64 -- should it be included
- // here too?
- return !In64BitMode && (isTargetMingw() || isTargetWindows());
+ return !In64BitMode && (isTargetCygMing() || isTargetWindows());
}
bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
@@ -380,11 +385,14 @@ public:
/// memset with zero passed as the second argument. Otherwise it
/// returns null.
const char *getBZeroEntry() const;
-
+
/// This function returns true if the target has sincos() routine in its
/// compiler runtime or math libraries.
bool hasSinCos() const;
+ /// Enable the MachineScheduler pass for all X86 subtargets.
+ bool enableMachineScheduler() const LLVM_OVERRIDE { return true; }
+
/// enablePostRAScheduler - run for Atom optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 49ebd1a..ddf580f 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -92,7 +92,7 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
} else if (Subtarget.is64Bit()) {
// PIC in 64 bit mode is always rip-rel.
Subtarget.setPICStyle(PICStyles::RIPRel);
- } else if (Subtarget.isTargetCygMing()) {
+ } else if (Subtarget.isTargetCOFF()) {
Subtarget.setPICStyle(PICStyles::None);
} else if (Subtarget.isTargetDarwin()) {
if (getRelocationModel() == Reloc::PIC_)
@@ -114,14 +114,14 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
// Command line options for x86
//===----------------------------------------------------------------------===//
static cl::opt<bool>
-UseVZeroUpper("x86-use-vzeroupper",
+UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
cl::desc("Minimize AVX to SSE transition penalty"),
cl::init(true));
// Temporary option to control early if-conversion for x86 while adding machine
// models.
static cl::opt<bool>
-X86EarlyIfConv("x86-early-ifcvt",
+X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
cl::desc("Enable early if-conversion on X86"));
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index a19c5a6..086cd4d 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -25,7 +25,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
// On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
// is an indirect pc-relative reference.
if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
- const MCSymbol *Sym = Mang->getSymbol(GV);
+ const MCSymbol *Sym = getSymbol(*Mang, GV);
const MCExpr *Res =
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
const MCExpr *Four = MCConstantExpr::Create(4, getContext());
@@ -39,7 +39,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
MCSymbol *X86_64MachoTargetObjectFile::
getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI) const {
- return Mang->getSymbol(GV);
+ return getSymbol(*Mang, GV);
}
void
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 3bbddad..f88a666 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -101,6 +101,9 @@ public:
unsigned AddressSpace) const;
virtual unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex) const;
+
+ virtual unsigned getReductionCost(unsigned Opcode, Type *Ty,
+ bool IsPairwiseForm) const;
/// @}
};
@@ -127,8 +130,8 @@ X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
// TODO: Currently the __builtin_popcount() implementation using SSE3
// instructions is inefficient. Once the problem is fixed, we should
- // call ST->hasSSE3() instead of ST->hasSSE4().
- return ST->hasSSE41() ? PSK_FastHardware : PSK_Software;
+ // call ST->hasSSE3() instead of ST->hasPOPCNT().
+ return ST->hasPOPCNT() ? PSK_FastHardware : PSK_Software;
}
unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
@@ -174,7 +177,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- static const CostTblEntry<MVT> AVX2CostTable[] = {
+ static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
// customize them to detect the cases where shift amount is a scalar one.
{ ISD::SHL, MVT::v4i32, 1 },
@@ -211,13 +214,13 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// Look for AVX2 lowering tricks.
if (ST->hasAVX2()) {
- int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable),
- ISD, LT.second);
+ int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
if (Idx != -1)
return LT.first * AVX2CostTable[Idx].Cost;
}
- static const CostTblEntry<MVT> SSE2UniformConstCostTable[] = {
+ static const CostTblEntry<MVT::SimpleValueType>
+ SSE2UniformConstCostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
// Constant splats are cheaper for the following instructions.
@@ -238,15 +241,13 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
ST->hasSSE2()) {
- int Idx = CostTableLookup<MVT>(SSE2UniformConstCostTable,
- array_lengthof(SSE2UniformConstCostTable),
- ISD, LT.second);
+ int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second);
if (Idx != -1)
return LT.first * SSE2UniformConstCostTable[Idx].Cost;
}
- static const CostTblEntry<MVT> SSE2CostTable[] = {
+ static const CostTblEntry<MVT::SimpleValueType> SSE2CostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
// For some cases, where the shift amount is a scalar we would be able
@@ -287,13 +288,12 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
};
if (ST->hasSSE2()) {
- int Idx = CostTableLookup<MVT>(SSE2CostTable, array_lengthof(SSE2CostTable),
- ISD, LT.second);
+ int Idx = CostTableLookup(SSE2CostTable, ISD, LT.second);
if (Idx != -1)
return LT.first * SSE2CostTable[Idx].Cost;
}
- static const CostTblEntry<MVT> AVX1CostTable[] = {
+ static const CostTblEntry<MVT::SimpleValueType> AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
// Two ops + 1 extract + 1 insert = 4.
@@ -312,21 +312,19 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// Look for AVX1 lowering tricks.
if (ST->hasAVX() && !ST->hasAVX2()) {
- int Idx = CostTableLookup<MVT>(AVX1CostTable, array_lengthof(AVX1CostTable),
- ISD, LT.second);
+ int Idx = CostTableLookup(AVX1CostTable, ISD, LT.second);
if (Idx != -1)
return LT.first * AVX1CostTable[Idx].Cost;
}
// Custom lowering of vectors.
- static const CostTblEntry<MVT> CustomLowered[] = {
+ static const CostTblEntry<MVT::SimpleValueType> CustomLowered[] = {
// A v2i64/v4i64 and multiply is custom lowered as a series of long
// multiplies(3), shifts(4) and adds(2).
{ ISD::MUL, MVT::v2i64, 9 },
{ ISD::MUL, MVT::v4i64, 9 },
};
- int Idx = CostTableLookup<MVT>(CustomLowered, array_lengthof(CustomLowered),
- ISD, LT.second);
+ int Idx = CostTableLookup(CustomLowered, ISD, LT.second);
if (Idx != -1)
return LT.first * CustomLowered[Idx].Cost;
@@ -363,7 +361,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
- static const TypeConversionCostTblEntry<MVT> SSE2ConvTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ SSE2ConvTbl[] = {
// These are somewhat magic numbers justified by looking at the output of
// Intel's IACA, running some kernels and making sure when we take
// legalization into account the throughput will be overestimated.
@@ -387,9 +386,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
};
if (ST->hasSSE2() && !ST->hasAVX()) {
- int Idx = ConvertCostTableLookup<MVT>(SSE2ConvTbl,
- array_lengthof(SSE2ConvTbl),
- ISD, LTDest.second, LTSrc.second);
+ int Idx =
+ ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second);
if (Idx != -1)
return LTSrc.first * SSE2ConvTbl[Idx].Cost;
}
@@ -401,13 +399,17 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
if (!SrcTy.isSimple() || !DstTy.isSimple())
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
- static const TypeConversionCostTblEntry<MVT> AVXConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ AVXConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 },
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
@@ -446,9 +448,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
};
if (ST->hasAVX()) {
- int Idx = ConvertCostTableLookup<MVT>(AVXConversionTbl,
- array_lengthof(AVXConversionTbl),
- ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
if (Idx != -1)
return AVXConversionTbl[Idx].Cost;
}
@@ -466,7 +467,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- static const CostTblEntry<MVT> SSE42CostTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> SSE42CostTbl[] = {
{ ISD::SETCC, MVT::v2f64, 1 },
{ ISD::SETCC, MVT::v4f32, 1 },
{ ISD::SETCC, MVT::v2i64, 1 },
@@ -475,7 +476,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v16i8, 1 },
};
- static const CostTblEntry<MVT> AVX1CostTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> AVX1CostTbl[] = {
{ ISD::SETCC, MVT::v4f64, 1 },
{ ISD::SETCC, MVT::v8f32, 1 },
// AVX1 does not support 8-wide integer compare.
@@ -485,7 +486,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v32i8, 4 },
};
- static const CostTblEntry<MVT> AVX2CostTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> AVX2CostTbl[] = {
{ ISD::SETCC, MVT::v4i64, 1 },
{ ISD::SETCC, MVT::v8i32, 1 },
{ ISD::SETCC, MVT::v16i16, 1 },
@@ -493,22 +494,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
};
if (ST->hasAVX2()) {
- int Idx = CostTableLookup<MVT>(AVX2CostTbl, array_lengthof(AVX2CostTbl),
- ISD, MTy);
+ int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
if (Idx != -1)
return LT.first * AVX2CostTbl[Idx].Cost;
}
if (ST->hasAVX()) {
- int Idx = CostTableLookup<MVT>(AVX1CostTbl, array_lengthof(AVX1CostTbl),
- ISD, MTy);
+ int Idx = CostTableLookup(AVX1CostTbl, ISD, MTy);
if (Idx != -1)
return LT.first * AVX1CostTbl[Idx].Cost;
}
if (ST->hasSSE42()) {
- int Idx = CostTableLookup<MVT>(SSE42CostTbl, array_lengthof(SSE42CostTbl),
- ISD, MTy);
+ int Idx = CostTableLookup(SSE42CostTbl, ISD, MTy);
if (Idx != -1)
return LT.first * SSE42CostTbl[Idx].Cost;
}
@@ -613,3 +611,84 @@ unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
return TargetTransformInfo::getAddressComputationCost(Ty, IsComplex);
}
+
+unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
+ bool IsPairwise) const {
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+
+ MVT MTy = LT.second;
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
+ // and make it as the cost.
+
+ static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblPairWise[] = {
+ { ISD::FADD, MVT::v2f64, 2 },
+ { ISD::FADD, MVT::v4f32, 4 },
+ { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
+ { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
+ { ISD::ADD, MVT::v8i16, 5 },
+ };
+
+ static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblPairWise[] = {
+ { ISD::FADD, MVT::v4f32, 4 },
+ { ISD::FADD, MVT::v4f64, 5 },
+ { ISD::FADD, MVT::v8f32, 7 },
+ { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
+ { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
+ { ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8".
+ { ISD::ADD, MVT::v8i16, 5 },
+ { ISD::ADD, MVT::v8i32, 5 },
+ };
+
+ static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblNoPairWise[] = {
+ { ISD::FADD, MVT::v2f64, 2 },
+ { ISD::FADD, MVT::v4f32, 4 },
+ { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
+ { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3".
+ { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
+ };
+
+ static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblNoPairWise[] = {
+ { ISD::FADD, MVT::v4f32, 3 },
+ { ISD::FADD, MVT::v4f64, 3 },
+ { ISD::FADD, MVT::v8f32, 4 },
+ { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
+ { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "2.8".
+ { ISD::ADD, MVT::v4i64, 3 },
+ { ISD::ADD, MVT::v8i16, 4 },
+ { ISD::ADD, MVT::v8i32, 5 },
+ };
+
+ if (IsPairwise) {
+ if (ST->hasAVX()) {
+ int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX1CostTblPairWise[Idx].Cost;
+ }
+
+ if (ST->hasSSE42()) {
+ int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * SSE42CostTblPairWise[Idx].Cost;
+ }
+ } else {
+ if (ST->hasAVX()) {
+ int Idx = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
+ }
+
+ if (ST->hasSSE42()) {
+ int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * SSE42CostTblNoPairWise[Idx].Cost;
+ }
+ }
+
+ return TargetTransformInfo::getReductionCost(Opcode, ValTy, IsPairwise);
+}
+
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 477f75a..66ae9c2 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -122,11 +122,11 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
}
static bool clobbersAllYmmRegs(const MachineOperand &MO) {
- for (unsigned reg = X86::YMM0; reg < X86::YMM31; ++reg) {
+ for (unsigned reg = X86::YMM0; reg <= X86::YMM31; ++reg) {
if (!MO.clobbersPhysReg(reg))
return false;
}
- for (unsigned reg = X86::ZMM0; reg < X86::ZMM31; ++reg) {
+ for (unsigned reg = X86::ZMM0; reg <= X86::ZMM31; ++reg) {
if (!MO.clobbersPhysReg(reg))
return false;
}
@@ -148,6 +148,25 @@ static bool hasYmmReg(MachineInstr *MI) {
return false;
}
+/// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this
+/// instruction.
+static bool clobbersAnyYmmReg(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isRegMask())
+ continue;
+ for (unsigned reg = X86::YMM0; reg <= X86::YMM31; ++reg) {
+ if (MO.clobbersPhysReg(reg))
+ return true;
+ }
+ for (unsigned reg = X86::ZMM0; reg <= X86::ZMM31; ++reg) {
+ if (MO.clobbersPhysReg(reg))
+ return true;
+ }
+ }
+ return false;
+}
+
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
/// vzero upper instructions before function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
@@ -231,8 +250,9 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
bool BBHasCall = false;
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
- MachineInstr *MI = I;
DebugLoc dl = I->getDebugLoc();
+ MachineInstr *MI = I;
+
bool isControlFlow = MI->isCall() || MI->isReturn();
// Shortcut: don't need to check regular instructions in dirty state.
@@ -251,6 +271,14 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
if (!isControlFlow)
continue;
+ // If the call won't clobber any YMM register, skip it as well. It usually
+ // happens on helper function calls (such as '_chkstk', '_ftol2') where
+ // standard calling convention is not used (RegMask is not used to mark
+ // register clobbered and register usage (def/imp-def/use) is well-dfined
+ // and explicitly specified.
+ if (MI->isCall() && !clobbersAnyYmmReg(MI))
+ continue;
+
BBHasCall = true;
// The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 85d2a1d..3fa3b34 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_target(XCoreCodeGen
XCoreSubtarget.cpp
XCoreTargetMachine.cpp
XCoreTargetObjectFile.cpp
+ XCoreTargetTransformInfo.cpp
XCoreSelectionDAGInfo.cpp
)
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index 6f44551..3d1c474 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -23,10 +23,14 @@ XCoreMCAsmInfo::XCoreMCAsmInfo(StringRef TT) {
PrivateGlobalPrefix = ".L";
AscizDirective = ".asciiz";
- WeakDefDirective = "\t.weak\t";
- WeakRefDirective = "\t.weak\t";
+
+ HiddenVisibilityAttr = MCSA_Invalid;
+ HiddenDeclarationVisibilityAttr = MCSA_Invalid;
+ ProtectedVisibilityAttr = MCSA_Invalid;
// Debug
HasLEB128 = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+ DwarfRegNumForCFI = true;
}
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
index b5a9660..e53c96b 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -14,13 +14,13 @@
#ifndef XCORETARGETASMINFO_H
#define XCORETARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class StringRef;
class Target;
- class XCoreMCAsmInfo : public MCAsmInfo {
+ class XCoreMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit XCoreMCAsmInfo(StringRef TT);
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index 2f375fc..73c310b 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -31,6 +31,8 @@ namespace llvm {
CodeGenOpt::Level OptLevel);
ModulePass *createXCoreLowerThreadLocalPass();
+ ImmutablePass *createXCoreTargetTransformInfoPass(const XCoreTargetMachine *TM);
+
} // end namespace llvm;
#endif
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 35ba299..c03dfe6 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -36,6 +36,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -88,14 +89,12 @@ void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
MCSymbol *SymGlob = OutContext.GetOrCreateSymbol(
Twine(Sym->getName() + StringRef(".globound")));
OutStreamer.EmitSymbolAttribute(SymGlob, MCSA_Global);
-
- OutStreamer.EmitRawText("\t.set\t" + Twine(Sym->getName()) +
- ".globound," + Twine(ATy->getNumElements()));
-
+ OutStreamer.EmitAssignment(SymGlob,
+ MCConstantExpr::Create(ATy->getNumElements(),
+ OutContext));
if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
// TODO Use COMDAT groups for LinkOnceLinkage
- OutStreamer.EmitRawText(MAI->getWeakDefDirective() +Twine(Sym->getName())+
- ".globound");
+ OutStreamer.EmitSymbolAttribute(SymGlob, MCSA_Weak);
}
}
}
@@ -110,7 +109,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM));
- MCSymbol *GVSym = Mang->getSymbol(GV);
+ MCSymbol *GVSym = getSymbol(GV);
const Constant *C = GV->getInitializer();
unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
@@ -217,7 +216,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
O << *MO.getMBB()->getSymbol();
break;
case MachineOperand::MO_GlobalAddress:
- O << *Mang->getSymbol(MO.getGlobal());
+ O << *getSymbol(MO.getGlobal());
break;
case MachineOperand::MO_ExternalSymbol:
O << MO.getSymbolName();
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index b57cf9d..c34b35c 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -30,10 +30,6 @@
using namespace llvm;
// helper functions. FIXME: Eliminate.
-static inline bool isImmUs(unsigned val) {
- return val <= 11;
-}
-
static inline bool isImmU6(unsigned val) {
return val < (1 << 6);
}
@@ -92,11 +88,16 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo *MMI = &MF.getMMI();
+ const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
const XCoreInstrInfo &TII =
*static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ if (MFI->getMaxAlignment() > getStackAlignment())
+ report_fatal_error("emitPrologue unsupported alignment: "
+ + Twine(MFI->getMaxAlignment()));
+
bool FP = hasFP(MF);
const AttributeSet &PAL = MF.getFunction()->getAttributes();
@@ -119,21 +120,28 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
bool saveLR = XFI->getUsesLR();
// Do we need to allocate space on the stack?
if (FrameSize) {
+ bool LRSavedOnEntry = false;
int Opcode;
if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
MBB.addLiveIn(XCore::LR);
saveLR = false;
+ LRSavedOnEntry = true;
} else {
Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
}
BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
if (emitFrameMoves) {
-
// Show update of SP.
MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+ MMI->addFrameInst(MCCFIInstruction::createDefCfaOffset(FrameLabel,
+ -FrameSize*4));
+ if (LRSavedOnEntry) {
+ unsigned Reg = MRI->getDwarfRegNum(XCore::LR, true);
+ MMI->addFrameInst(MCCFIInstruction::createOffset(FrameLabel, Reg, 0));
+ }
}
}
if (saveLR) {
@@ -144,6 +152,9 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
if (emitFrameMoves) {
MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
+ unsigned Reg = MRI->getDwarfRegNum(XCore::LR, true);
+ MMI->addFrameInst(MCCFIInstruction::createOffset(SaveLRLabel, Reg,
+ LRSpillOffset));
}
}
@@ -156,15 +167,34 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
if (emitFrameMoves) {
MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
+ unsigned Reg = MRI->getDwarfRegNum(XCore::R10, true);
+ MMI->addFrameInst(MCCFIInstruction::createOffset(SaveR10Label, Reg,
+ FPSpillOffset));
}
// Set the FP from the SP.
unsigned FramePtr = XCore::R10;
- BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
- .addImm(0);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr).addImm(0);
if (emitFrameMoves) {
// Show FP is now valid.
MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+ unsigned Reg = MRI->getDwarfRegNum(FramePtr, true);
+ MMI->addFrameInst(MCCFIInstruction::createDefCfaRegister(FrameLabel,
+ Reg));
+ }
+ }
+
+ if (emitFrameMoves) {
+ // Frame moves for callee saved.
+ std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels =
+ XFI->getSpillLabels();
+ for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
+ MCSymbol *SpillLabel = SpillLabels[I].first;
+ CalleeSavedInfo &CSI = SpillLabels[I].second;
+ int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
+ unsigned Reg = MRI->getDwarfRegNum(CSI.getReg(), true);
+ MMI->addFrameInst(MCCFIInstruction::createOffset(SpillLabel, Reg,
+ Offset));
}
}
}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 6fc7eef5..89ad27d 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -59,6 +59,7 @@ getTargetNodeName(unsigned Opcode) const
case XCoreISD::CRC8 : return "XCoreISD::CRC8";
case XCoreISD::BR_JT : return "XCoreISD::BR_JT";
case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32";
+ case XCoreISD::MEMBARRIER : return "XCoreISD::MEMBARRIER";
default : return NULL;
}
}
@@ -79,7 +80,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setStackPointerRegisterToSaveRestore(XCore::SP);
- setSchedulingPreference(Sched::RegPressure);
+ setSchedulingPreference(Sched::Source);
// Use i32 for setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
@@ -148,6 +149,13 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ // Exception handling
+ setExceptionPointerRegister(XCore::R0);
+ setExceptionSelectorRegister(XCore::R1);
+
+ // Atomic operations
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
// TRAMPOLINE is custom lowered.
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
@@ -166,6 +174,24 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setMinFunctionAlignment(1);
}
+bool XCoreTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ if (Val.getOpcode() != ISD::LOAD)
+ return false;
+
+ EVT VT1 = Val.getValueType();
+ if (!VT1.isSimple() || !VT1.isInteger() ||
+ !VT2.isSimple() || !VT2.isInteger())
+ return false;
+
+ switch (VT1.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i8:
+ return true;
+ }
+
+ return false;
+}
+
SDValue XCoreTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode())
@@ -188,6 +214,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
}
@@ -259,11 +286,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
return GA;
}
-static inline SDValue BuildGetId(SelectionDAG &DAG, SDLoc dl) {
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
- DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
-}
-
SDValue XCoreTargetLowering::
LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
{
@@ -834,6 +856,12 @@ LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+SDValue XCoreTargetLowering::
+LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(XCoreISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
+}
+
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1200,7 +1228,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
ArgDI != ArgDE; ++ArgDI) {
if (ArgDI->Flags.isByVal() && ArgDI->Flags.getByValSize()) {
unsigned Size = ArgDI->Flags.getByValSize();
- unsigned Align = ArgDI->Flags.getByValAlign();
+ unsigned Align = std::max(StackSlotSize, ArgDI->Flags.getByValAlign());
// Create a new object on the stack and copy the pointee into it.
int FI = MFI->CreateStackObject(Size, Align, false, false);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 7761b7c..bc08497 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -70,7 +70,10 @@ namespace llvm {
BR_JT,
// Jumptable branch using long branches for each entry.
- BR_JT32
+ BR_JT32,
+
+ // Memory barrier.
+ MEMBARRIER
};
}
@@ -83,6 +86,10 @@ namespace llvm {
explicit XCoreTargetLowering(XCoreTargetMachine &TM);
+ using TargetLowering::isZExtFree;
+ virtual bool isZExtFree(SDValue Val, EVT VT2) const;
+
+
virtual unsigned getJumpTableEncoding() const;
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
@@ -154,6 +161,7 @@ namespace llvm {
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
// Inline asm support
std::pair<unsigned, const TargetRegisterClass*>
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index d6b8c2d..33c7f31 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -22,7 +22,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "XCoreGenInstrInfo.inc"
namespace llvm {
@@ -39,6 +39,10 @@ namespace XCore {
using namespace llvm;
+
+// Pin the vtable to this file.
+void XCoreInstrInfo::anchor() {}
+
XCoreInstrInfo::XCoreInstrInfo()
: XCoreGenInstrInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
RI() {
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 51d66a1..4429b07 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -24,6 +24,7 @@ namespace llvm {
class XCoreInstrInfo : public XCoreGenInstrInfo {
const XCoreRegisterInfo RI;
+ virtual void anchor();
public:
XCoreInstrInfo();
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 81fa84d..934a707 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -70,6 +70,11 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart,
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_XCoreCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def SDT_XCoreMEMBARRIER : SDTypeProfile<0, 0, []>;
+
+def XCoreMemBarrier : SDNode<"XCoreISD::MEMBARRIER", SDT_XCoreMEMBARRIER,
+ [SDNPHasChain]>;
+
//===----------------------------------------------------------------------===//
// Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
@@ -343,6 +348,10 @@ let usesCustomInserter = 1 in {
(select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
}
+let hasSideEffects = 1 in
+def Int_MemBarrier : PseudoInstXCore<(outs), (ins), "#MEMBARRIER",
+ [(XCoreMemBarrier)]>;
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 2e328b4..afce753 100644
--- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -22,6 +22,9 @@
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/NoFolder.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#define DEBUG_TYPE "xcore-lower-thread-local"
@@ -71,13 +74,104 @@ createLoweredInitializer(ArrayType *NewType, Constant *OriginalInitializer) {
return ConstantArray::get(NewType, Elements);
}
-static bool hasNonInstructionUse(GlobalVariable *GV) {
- for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
- ++UI)
- if (!isa<Instruction>(*UI))
- return true;
+static Instruction *
+createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
+ IRBuilder<true,NoFolder> Builder(Instr);
+ unsigned OpCode = CE->getOpcode();
+ switch (OpCode) {
+ case Instruction::GetElementPtr: {
+ SmallVector<Value *,4> CEOpVec(CE->op_begin(), CE->op_end());
+ ArrayRef<Value *> CEOps(CEOpVec);
+ return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(CEOps[0],
+ CEOps.slice(1)));
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return dyn_cast<Instruction>(
+ Builder.CreateBinOp((Instruction::BinaryOps)OpCode,
+ CE->getOperand(0), CE->getOperand(1),
+ CE->getName()));
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ return dyn_cast<Instruction>(
+ Builder.CreateCast((Instruction::CastOps)OpCode,
+ CE->getOperand(0), CE->getType(),
+ CE->getName()));
+ default:
+ llvm_unreachable("Unhandled constant expression!\n");
+ }
+}
+
+static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
+ do {
+ SmallVector<WeakVH,8> WUsers;
+ for (Value::use_iterator I = CE->use_begin(), E = CE->use_end();
+ I != E; ++I)
+ WUsers.push_back(WeakVH(*I));
+ std::sort(WUsers.begin(), WUsers.end());
+ WUsers.erase(std::unique(WUsers.begin(), WUsers.end()), WUsers.end());
+ while (!WUsers.empty())
+ if (WeakVH WU = WUsers.pop_back_val()) {
+ if (PHINode *PN = dyn_cast<PHINode>(WU)) {
+ for (int I = 0, E = PN->getNumIncomingValues(); I < E; ++I)
+ if (PN->getIncomingValue(I) == CE) {
+ BasicBlock *PredBB = PN->getIncomingBlock(I);
+ if (PredBB->getTerminator()->getNumSuccessors() > 1)
+ PredBB = SplitEdge(PredBB, PN->getParent(), P);
+ Instruction *InsertPos = PredBB->getTerminator();
+ Instruction *NewInst = createReplacementInstr(CE, InsertPos);
+ PN->setOperand(I, NewInst);
+ }
+ } else if (Instruction *Instr = dyn_cast<Instruction>(WU)) {
+ Instruction *NewInst = createReplacementInstr(CE, Instr);
+ Instr->replaceUsesOfWith(CE, NewInst);
+ } else {
+ ConstantExpr *CExpr = dyn_cast<ConstantExpr>(WU);
+ if (!CExpr || !replaceConstantExprOp(CExpr, P))
+ return false;
+ }
+ }
+ } while (CE->hasNUsesOrMore(1)); // We need to check becasue a recursive
+ // sibbling may have used 'CE' when createReplacementInstr was called.
+ CE->destroyConstant();
+ return true;
+}
- return false;
+static bool rewriteNonInstructionUses(GlobalVariable *GV, Pass *P) {
+ SmallVector<WeakVH,8> WUsers;
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I)
+ if (!isa<Instruction>(*I))
+ WUsers.push_back(WeakVH(*I));
+ while (!WUsers.empty())
+ if (WeakVH WU = WUsers.pop_back_val()) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(WU);
+ if (!CE || !replaceConstantExprOp(CE, P))
+ return false;
+ }
+ return true;
}
static bool isZeroLengthArray(Type *Ty) {
@@ -92,14 +186,16 @@ bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) {
return false;
// Skip globals that we can't lower and leave it for the backend to error.
- if (hasNonInstructionUse(GV) ||
+ if (!rewriteNonInstructionUses(GV, this) ||
!GV->getType()->isSized() || isZeroLengthArray(GV->getType()))
return false;
// Create replacement global.
ArrayType *NewType = createLoweredType(GV->getType()->getElementType());
- Constant *NewInitializer = createLoweredInitializer(NewType,
- GV->getInitializer());
+ Constant *NewInitializer = 0;
+ if (GV->hasInitializer())
+ NewInitializer = createLoweredInitializer(NewType,
+ GV->getInitializer());
GlobalVariable *NewGV =
new GlobalVariable(*M, NewType, GV->isConstant(), GV->getLinkage(),
NewInitializer, "", 0, GlobalVariable::NotThreadLocal,
diff --git a/lib/Target/XCore/XCoreMCInstLower.cpp b/lib/Target/XCore/XCoreMCInstLower.cpp
index f96eda9..def2673 100644
--- a/lib/Target/XCore/XCoreMCInstLower.cpp
+++ b/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -43,7 +43,7 @@ MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Symbol = MO.getMBB()->getSymbol();
break;
case MachineOperand::MO_GlobalAddress:
- Symbol = Mang->getSymbol(MO.getGlobal());
+ Symbol = Printer.getSymbol(MO.getGlobal());
Offset += MO.getOffset();
break;
case MachineOperand::MO_BlockAddress:
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 3ef1520..9ae0b86 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -70,3 +70,11 @@ bool XCorePassConfig::addInstSelector() {
extern "C" void LLVMInitializeXCoreTarget() {
RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget);
}
+
+void XCoreTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our XCore pass. This
+ // allows the XCore pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(this));
+ PM.add(createXCoreTargetTransformInfoPass(this));
+}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index eb9a1aa..a19a677 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -57,6 +57,8 @@ public:
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+ virtual void addAnalysisPasses(PassManagerBase &PM);
};
} // end namespace llvm
diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.cpp b/lib/Target/XCore/XCoreTargetTransformInfo.cpp
new file mode 100644
index 0000000..cc165f7
--- /dev/null
+++ b/lib/Target/XCore/XCoreTargetTransformInfo.cpp
@@ -0,0 +1,83 @@
+//===-- XCoreTargetTransformInfo.cpp - XCore specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// XCore target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcoretti"
+#include "XCore.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeXCoreTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class XCoreTTI : public ImmutablePass, public TargetTransformInfo {
+public:
+ XCoreTTI() : ImmutablePass(ID) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ XCoreTTI(const XCoreTargetMachine *TM)
+ : ImmutablePass(ID) {
+ initializeXCoreTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ unsigned getNumberOfRegisters(bool Vector) const {
+ if (Vector) {
+ return 0;
+ }
+ return 12;
+ }
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(XCoreTTI, TargetTransformInfo, "xcoretti",
+ "XCore Target Transform Info", true, true, false)
+char XCoreTTI::ID = 0;
+
+
+ImmutablePass *
+llvm::createXCoreTargetTransformInfoPass(const XCoreTargetMachine *TM) {
+ return new XCoreTTI(TM);
+}
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index 9f2343b..9251783 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -52,7 +52,7 @@ namespace {
return false;
}
- // We don't modify the program, so we preserve all analyses
+ // We don't modify the program, so we preserve all analyses.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index c42d506..df08091 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -88,7 +88,7 @@ char ArgPromotion::ID = 0;
INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
@@ -504,7 +504,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// OriginalLoads - Keep track of a representative load instruction from the
// original function so that we can tell the alias analysis implementation
// what the new GEP/Load instructions we are inserting look like.
- std::map<IndicesVector, LoadInst*> OriginalLoads;
+ // We need to keep the original loads for each argument and the elements
+ // of the argument that are accessed.
+ std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
// Attribute - Keep track of the parameter attributes for the arguments
// that we are *not* promoting. For the ones that we do promote, the parameter
@@ -569,7 +571,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
else
// Take any load, we will use it only to update Alias Analysis
OrigLoad = cast<LoadInst>(User->use_back());
- OriginalLoads[Indices] = OrigLoad;
+ OriginalLoads[std::make_pair(I, Indices)] = OrigLoad;
}
// Add a parameter to the function for each element passed in.
@@ -676,7 +678,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
Value *V = *AI;
- LoadInst *OrigLoad = OriginalLoads[*SI];
+ LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, *SI)];
if (!SI->empty()) {
Ops.reserve(SI->size());
Type *ElTy = V->getType();
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index a7bf188..d94c0f4 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -93,9 +93,12 @@ bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const {
}
unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
+ unsigned Align = GV->getAlignment();
+ if (Align)
+ return Align;
if (TD)
return TD->getPreferredAlignment(GV);
- return GV->getAlignment();
+ return 0;
}
bool ConstantMerge::runOnModule(Module &M) {
@@ -210,9 +213,9 @@ bool ConstantMerge::runOnModule(Module &M) {
// Bump the alignment if necessary.
if (Replacements[i].first->getAlignment() ||
Replacements[i].second->getAlignment()) {
- Replacements[i].second->setAlignment(std::max(
- Replacements[i].first->getAlignment(),
- Replacements[i].second->getAlignment()));
+ Replacements[i].second->setAlignment(
+ std::max(getAlignment(Replacements[i].first),
+ getAlignment(Replacements[i].second)));
}
// Eliminate any uses of the dead global.
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 6ee6162..911c14e 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -357,6 +357,19 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
return false;
+ // If a function seen at compile time is not necessarily the one linked to
+ // the binary being built, it is illegal to change the actual arguments
+ // passed to it. These functions can be captured by isWeakForLinker().
+ // *NOTE* that mayBeOverridden() is insufficient for this purpose as it
+ // doesn't include linkage types like AvailableExternallyLinkage and
+ // LinkOnceODRLinkage. Take link_odr* as an example, it indicates a set of
+ // *EQUIVALENT* globals that can be merged at link-time. However, the
+ // semantic of *EQUIVALENT*-functions includes parameters. Changing
+ // parameters breaks this assumption.
+ //
+ if (Fn.isWeakForLinker())
+ return false;
+
if (Fn.use_empty())
return false;
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index fa3d72d..50fb3e6 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -21,6 +21,38 @@
#include <algorithm>
using namespace llvm;
+/// Make sure GV is visible from both modules. Delete is true if it is
+/// being deleted from this module.
+/// This also makes sure GV cannot be dropped so that references from
+/// the split module remain valid.
+static void makeVisible(GlobalValue &GV, bool Delete) {
+ bool Local = GV.hasLocalLinkage();
+ if (Local)
+ GV.setVisibility(GlobalValue::HiddenVisibility);
+
+ if (Local || Delete) {
+ GV.setLinkage(GlobalValue::ExternalLinkage);
+ return;
+ }
+
+ if (!GV.hasLinkOnceLinkage()) {
+ assert(!GV.isDiscardableIfUnused());
+ return;
+ }
+
+ // Map linkonce* to weak* so that llvm doesn't drop this GV.
+ switch(GV.getLinkage()) {
+ default:
+ llvm_unreachable("Unexpected linkage");
+ case GlobalValue::LinkOnceAnyLinkage:
+ GV.setLinkage(GlobalValue::WeakAnyLinkage);
+ return;
+ case GlobalValue::LinkOnceODRLinkage:
+ GV.setLinkage(GlobalValue::WeakODRLinkage);
+ return;
+ }
+}
+
namespace {
/// @brief A pass to extract specific functions and their dependencies.
class GVExtractorPass : public ModulePass {
@@ -60,12 +92,7 @@ namespace {
continue;
}
- bool Local = I->isDiscardableIfUnused();
- if (Local)
- I->setVisibility(GlobalValue::HiddenVisibility);
-
- if (Local || Delete)
- I->setLinkage(GlobalValue::ExternalLinkage);
+ makeVisible(*I, Delete);
if (Delete)
I->setInitializer(0);
@@ -80,12 +107,7 @@ namespace {
continue;
}
- bool Local = I->isDiscardableIfUnused();
- if (Local)
- I->setVisibility(GlobalValue::HiddenVisibility);
-
- if (Local || Delete)
- I->setLinkage(GlobalValue::ExternalLinkage);
+ makeVisible(*I, Delete);
if (Delete)
I->deleteBody();
@@ -97,12 +119,10 @@ namespace {
Module::alias_iterator CurI = I;
++I;
- if (CurI->isDiscardableIfUnused()) {
- CurI->setVisibility(GlobalValue::HiddenVisibility);
- CurI->setLinkage(GlobalValue::ExternalLinkage);
- }
+ bool Delete = deleteStuff == (bool)Named.count(CurI);
+ makeVisible(*CurI, Delete);
- if (deleteStuff == (bool)Named.count(CurI)) {
+ if (Delete) {
Type *Ty = CurI->getType()->getElementType();
CurI->removeFromParent();
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 1366883..60e5f06 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -136,7 +136,8 @@ namespace {
char FunctionAttrs::ID = 0;
INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
@@ -366,6 +367,7 @@ namespace {
}
}
assert(Found && "Capturing call-site captured nothing?");
+ (void)Found;
return false;
}
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 201f320..901295d 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -179,6 +179,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
// any globals used will be marked as needed.
Function *F = cast<Function>(G);
+ if (F->hasPrefixData())
+ MarkUsedGlobalsAsNeeded(F->getPrefixData());
+
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 64cd515..2ea89a1 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -37,7 +37,9 @@
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
using namespace llvm;
@@ -60,7 +62,6 @@ STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
namespace {
- struct GlobalStatus;
struct GlobalOpt : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetLibraryInfo>();
@@ -80,7 +81,6 @@ namespace {
bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
- const SmallPtrSet<const PHINode*, 16> &PHIUsers,
const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
@@ -98,209 +98,6 @@ INITIALIZE_PASS_END(GlobalOpt, "globalopt",
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
-namespace {
-
-/// GlobalStatus - As we analyze each global, keep track of some information
-/// about it. If we find out that the address of the global is taken, none of
-/// this info will be accurate.
-struct GlobalStatus {
- /// isCompared - True if the global's address is used in a comparison.
- bool isCompared;
-
- /// isLoaded - True if the global is ever loaded. If the global isn't ever
- /// loaded it can be deleted.
- bool isLoaded;
-
- /// StoredType - Keep track of what stores to the global look like.
- ///
- enum StoredType {
- /// NotStored - There is no store to this global. It can thus be marked
- /// constant.
- NotStored,
-
- /// isInitializerStored - This global is stored to, but the only thing
- /// stored is the constant it was initialized with. This is only tracked
- /// for scalar globals.
- isInitializerStored,
-
- /// isStoredOnce - This global is stored to, but only its initializer and
- /// one other value is ever stored to it. If this global isStoredOnce, we
- /// track the value stored to it in StoredOnceValue below. This is only
- /// tracked for scalar globals.
- isStoredOnce,
-
- /// isStored - This global is stored to by multiple values or something else
- /// that we cannot track.
- isStored
- } StoredType;
-
- /// StoredOnceValue - If only one value (besides the initializer constant) is
- /// ever stored to this global, keep track of what value it is.
- Value *StoredOnceValue;
-
- /// AccessingFunction/HasMultipleAccessingFunctions - These start out
- /// null/false. When the first accessing function is noticed, it is recorded.
- /// When a second different accessing function is noticed,
- /// HasMultipleAccessingFunctions is set to true.
- const Function *AccessingFunction;
- bool HasMultipleAccessingFunctions;
-
- /// HasNonInstructionUser - Set to true if this global has a user that is not
- /// an instruction (e.g. a constant expr or GV initializer).
- bool HasNonInstructionUser;
-
- /// AtomicOrdering - Set to the strongest atomic ordering requirement.
- AtomicOrdering Ordering;
-
- GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
- StoredOnceValue(0), AccessingFunction(0),
- HasMultipleAccessingFunctions(false),
- HasNonInstructionUser(false), Ordering(NotAtomic) {}
-};
-
-}
-
-/// StrongerOrdering - Return the stronger of the two ordering. If the two
-/// orderings are acquire and release, then return AcquireRelease.
-///
-static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
- if (X == Acquire && Y == Release) return AcquireRelease;
- if (Y == Acquire && X == Release) return AcquireRelease;
- return (AtomicOrdering)std::max(X, Y);
-}
-
-/// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
-/// by constants itself. Note that constants cannot be cyclic, so this test is
-/// pretty easy to implement recursively.
-///
-static bool SafeToDestroyConstant(const Constant *C) {
- if (isa<GlobalValue>(C)) return false;
-
- for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
- ++UI)
- if (const Constant *CU = dyn_cast<Constant>(*UI)) {
- if (!SafeToDestroyConstant(CU)) return false;
- } else
- return false;
- return true;
-}
-
-
-/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
-/// structure. If the global has its address taken, return true to indicate we
-/// can't do anything with it.
-///
-static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
- SmallPtrSet<const PHINode*, 16> &PHIUsers) {
- for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
- ++UI) {
- const User *U = *UI;
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
- GS.HasNonInstructionUser = true;
-
- // If the result of the constantexpr isn't pointer type, then we won't
- // know to expect it in various places. Just reject early.
- if (!isa<PointerType>(CE->getType())) return true;
-
- if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
- } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
- if (!GS.HasMultipleAccessingFunctions) {
- const Function *F = I->getParent()->getParent();
- if (GS.AccessingFunction == 0)
- GS.AccessingFunction = F;
- else if (GS.AccessingFunction != F)
- GS.HasMultipleAccessingFunctions = true;
- }
- if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
- GS.isLoaded = true;
- // Don't hack on volatile loads.
- if (LI->isVolatile()) return true;
- GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering());
- } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // Don't allow a store OF the address, only stores TO the address.
- if (SI->getOperand(0) == V) return true;
-
- // Don't hack on volatile stores.
- if (SI->isVolatile()) return true;
-
- GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
-
- // If this is a direct store to the global (i.e., the global is a scalar
- // value, not an aggregate), keep more specific information about
- // stores.
- if (GS.StoredType != GlobalStatus::isStored) {
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
- SI->getOperand(1))) {
- Value *StoredVal = SI->getOperand(0);
-
- if (Constant *C = dyn_cast<Constant>(StoredVal)) {
- if (C->isThreadDependent()) {
- // The stored value changes between threads; don't track it.
- return true;
- }
- }
-
- if (StoredVal == GV->getInitializer()) {
- if (GS.StoredType < GlobalStatus::isInitializerStored)
- GS.StoredType = GlobalStatus::isInitializerStored;
- } else if (isa<LoadInst>(StoredVal) &&
- cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
- if (GS.StoredType < GlobalStatus::isInitializerStored)
- GS.StoredType = GlobalStatus::isInitializerStored;
- } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
- GS.StoredType = GlobalStatus::isStoredOnce;
- GS.StoredOnceValue = StoredVal;
- } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
- GS.StoredOnceValue == StoredVal) {
- // noop.
- } else {
- GS.StoredType = GlobalStatus::isStored;
- }
- } else {
- GS.StoredType = GlobalStatus::isStored;
- }
- }
- } else if (isa<BitCastInst>(I)) {
- if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (isa<GetElementPtrInst>(I)) {
- if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (isa<SelectInst>(I)) {
- if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
- // PHI nodes we can check just like select or GEP instructions, but we
- // have to be careful about infinite recursion.
- if (PHIUsers.insert(PN)) // Not already visited.
- if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (isa<CmpInst>(I)) {
- GS.isCompared = true;
- } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
- if (MTI->isVolatile()) return true;
- if (MTI->getArgOperand(0) == V)
- GS.StoredType = GlobalStatus::isStored;
- if (MTI->getArgOperand(1) == V)
- GS.isLoaded = true;
- } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
- assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
- if (MSI->isVolatile()) return true;
- GS.StoredType = GlobalStatus::isStored;
- } else {
- return true; // Any other non-load instruction might take address!
- }
- } else if (const Constant *C = dyn_cast<Constant>(U)) {
- GS.HasNonInstructionUser = true;
- // We might have a dead and dangling constant hanging off of here.
- if (!SafeToDestroyConstant(C))
- return true;
- } else {
- GS.HasNonInstructionUser = true;
- // Otherwise must be some other user.
- return true;
- }
- }
-
- return false;
-}
-
/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
/// as a root? If so, we might not really want to eliminate the stores to it.
static bool isLeakCheckerRoot(GlobalVariable *GV) {
@@ -434,7 +231,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
Changed = true;
}
} else if (Constant *C = dyn_cast<Constant>(U)) {
- if (SafeToDestroyConstant(C)) {
+ if (isSafeToDestroyConstant(C)) {
C->destroyConstant();
// This could have invalidated UI, start over from scratch.
Dead.clear();
@@ -471,9 +268,17 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
DataLayout *TD, TargetLibraryInfo *TLI) {
bool Changed = false;
- SmallVector<User*, 8> WorkList(V->use_begin(), V->use_end());
+ // Note that we need to use a weak value handle for the worklist items. When
+ // we delete a constant array, we may also be holding pointer to one of its
+ // elements (or an element of one of its elements if we're dealing with an
+ // array of arrays) in the worklist.
+ SmallVector<WeakVH, 8> WorkList(V->use_begin(), V->use_end());
while (!WorkList.empty()) {
- User *U = WorkList.pop_back_val();
+ Value *UV = WorkList.pop_back_val();
+ if (!UV)
+ continue;
+
+ User *U = cast<User>(UV);
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
if (Init) {
@@ -534,7 +339,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
} else if (Constant *C = dyn_cast<Constant>(U)) {
// If we have a chain of dead constantexprs or other things dangling from
// us, and if they are all dead, nuke them without remorse.
- if (SafeToDestroyConstant(C)) {
+ if (isSafeToDestroyConstant(C)) {
C->destroyConstant();
CleanupConstantGlobalUsers(V, Init, TD, TLI);
return true;
@@ -549,7 +354,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
static bool isSafeSROAElementUse(Value *V) {
// We might have a dead and dangling constant hanging off of here.
if (Constant *C = dyn_cast<Constant>(V))
- return SafeToDestroyConstant(C);
+ return isSafeToDestroyConstant(C);
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
@@ -1373,8 +1178,7 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
- StructType *ST =
- cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+ StructType *ST = cast<StructType>(PN->getType()->getPointerElementType());
PHINode *NewPN =
PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
@@ -1505,7 +1309,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
if (StructType *ST = dyn_cast<StructType>(FieldTy))
TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
- Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(CI->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems, 0,
@@ -1735,7 +1539,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
- Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(CI->getType());
unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
@@ -1917,13 +1721,12 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
if (!GV->hasLocalLinkage())
return false;
- SmallPtrSet<const PHINode*, 16> PHIUsers;
GlobalStatus GS;
- if (AnalyzeGlobal(GV, GS, PHIUsers))
+ if (GlobalStatus::analyzeGlobal(GV, GS))
return false;
- if (!GS.isCompared && !GV->hasUnnamedAddr()) {
+ if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
GV->setUnnamedAddr(true);
NumUnnamed++;
}
@@ -1931,14 +1734,13 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
if (GV->isConstant() || !GV->hasInitializer())
return false;
- return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
+ return ProcessInternalGlobal(GV, GVI, GS);
}
/// ProcessInternalGlobal - Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
Module::global_iterator &GVI,
- const SmallPtrSet<const PHINode*, 16> &PHIUsers,
const GlobalStatus &GS) {
// If this is a first class global and has only one accessing function
// and this function is main (which we know is not recursive), we replace
@@ -1971,7 +1773,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// If the global is never loaded (but may be stored to), it is dead.
// Delete it now.
- if (!GS.isLoaded) {
+ if (!GS.IsLoaded) {
DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
bool Changed;
@@ -1992,7 +1794,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
}
return Changed;
- } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+ } else if (GS.StoredType <= GlobalStatus::InitializerStored) {
DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
GV->setConstant(true);
@@ -2015,7 +1817,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GVI = FirstNewGV; // Don't skip the newly produced globals!
return true;
}
- } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+ } else if (GS.StoredType == GlobalStatus::StoredOnce) {
// If the initial value for the global was an undef value, and if only
// one other value was stored into it, we can just change the
// initializer to be the stored value, then delete all stores to the
@@ -2048,11 +1850,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Otherwise, if the global was not a boolean, we can shrink it to be a
// boolean.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
- if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
- ++NumShrunkToBool;
- return true;
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
+ if (GS.Ordering == NotAtomic) {
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
+ return true;
+ }
}
+ }
}
return false;
@@ -2210,8 +2015,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
CSVals[1] = 0;
StructType *StructTy =
- cast <StructType>(
- cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
+ cast<StructType>(GCL->getType()->getElementType()->getArrayElementType());
// Create the new init list.
std::vector<Constant*> CAList;
@@ -3041,14 +2845,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
return true;
}
-static int compareNames(const void *A, const void *B) {
- const GlobalValue *VA = *reinterpret_cast<GlobalValue* const*>(A);
- const GlobalValue *VB = *reinterpret_cast<GlobalValue* const*>(B);
- if (VA->getName() < VB->getName())
- return -1;
- if (VB->getName() < VA->getName())
- return 1;
- return 0;
+static int compareNames(Constant *const *A, Constant *const *B) {
+ return (*A)->getName().compare((*B)->getName());
}
static void setUsedInitializer(GlobalVariable &V,
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index a0095da..437597e 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -63,7 +63,7 @@ public:
char AlwaysInliner::ID = 0;
INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index a4f7026..57379a3 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
namespace {
-/// \brief Actaul inliner pass implementation.
+/// \brief Actual inliner pass implementation.
///
/// The common implementation of the inlining logic is shared between this
/// inliner pass and the always inliner pass. The two passes use different cost
@@ -61,7 +61,7 @@ public:
char SimpleInliner::ID = 0;
INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index d56a06f..64e2ced 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -11,6 +11,12 @@
// If the function or variable is not in the list of external names given to
// the pass it is marked as internal.
//
+// This transformation would not be legal in a regular compilation, but it gets
+// extra information from the linker about what is safe.
+//
+// For example: Internalizing a function with external linkage. Only if we are
+// told it is only used from within this module, it is safe to do it.
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "internalize"
@@ -23,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <fstream>
#include <set>
@@ -50,10 +57,8 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit InternalizePass();
- explicit InternalizePass(ArrayRef<const char *> exportList);
+ explicit InternalizePass(ArrayRef<const char *> ExportList);
void LoadFile(const char *Filename);
- void ClearExportList();
- void AddToExportList(const std::string &val);
virtual bool runOnModule(Module &M);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -72,15 +77,14 @@ InternalizePass::InternalizePass()
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
- if (!APIList.empty()) // If a list is specified, use it as well.
- ExternalNames.insert(APIList.begin(), APIList.end());
+ ExternalNames.insert(APIList.begin(), APIList.end());
}
-InternalizePass::InternalizePass(ArrayRef<const char *> exportList)
+InternalizePass::InternalizePass(ArrayRef<const char *> ExportList)
: ModulePass(ID){
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
- for(ArrayRef<const char *>::const_iterator itr = exportList.begin();
- itr != exportList.end(); itr++) {
+ for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
+ itr != ExportList.end(); itr++) {
ExternalNames.insert(*itr);
}
}
@@ -101,12 +105,25 @@ void InternalizePass::LoadFile(const char *Filename) {
}
}
-void InternalizePass::ClearExportList() {
- ExternalNames.clear();
-}
+static bool shouldInternalize(const GlobalValue &GV,
+ const std::set<std::string> &ExternalNames) {
+ // Function must be defined here
+ if (GV.isDeclaration())
+ return false;
+
+ // Available externally is really just a "declaration with a body".
+ if (GV.hasAvailableExternallyLinkage())
+ return false;
-void InternalizePass::AddToExportList(const std::string &val) {
- ExternalNames.insert(val);
+ // Already has internal linkage
+ if (GV.hasLocalLinkage())
+ return false;
+
+ // Marked to keep external?
+ if (ExternalNames.count(GV.getName()))
+ return false;
+
+ return true;
}
bool InternalizePass::runOnModule(Module &M) {
@@ -114,11 +131,6 @@ bool InternalizePass::runOnModule(Module &M) {
CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
bool Changed = false;
- // Never internalize functions which code-gen might insert.
- // FIXME: We should probably add this (and the __stack_chk_guard) via some
- // type of call-back in CodeGen.
- ExternalNames.insert("__stack_chk_fail");
-
SmallPtrSet<GlobalValue *, 8> Used;
collectUsedGlobalVariables(M, Used, false);
@@ -139,19 +151,20 @@ bool InternalizePass::runOnModule(Module &M) {
// Mark all functions not in the api as internal.
// FIXME: maybe use private linkage?
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration() && // Function must be defined here
- // Available externally is really just a "declaration with a body".
- !I->hasAvailableExternallyLinkage() &&
- !I->hasLocalLinkage() && // Can't already have internal linkage
- !ExternalNames.count(I->getName())) {// Not marked to keep external?
- I->setLinkage(GlobalValue::InternalLinkage);
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!shouldInternalize(*I, ExternalNames))
+ continue;
+
+ I->setLinkage(GlobalValue::InternalLinkage);
+
+ if (ExternalNode)
// Remove a callgraph edge from the external node to this function.
- if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
- Changed = true;
- ++NumFunctions;
- DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
- }
+ ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+
+ Changed = true;
+ ++NumFunctions;
+ DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+ }
// Never internalize the llvm.used symbol. It is used to implement
// attribute((used)).
@@ -166,35 +179,36 @@ bool InternalizePass::runOnModule(Module &M) {
ExternalNames.insert("llvm.global.annotations");
// Never internalize symbols code-gen inserts.
+ // FIXME: We should probably add this (and the __stack_chk_guard) via some
+ // type of call-back in CodeGen.
+ ExternalNames.insert("__stack_chk_fail");
ExternalNames.insert("__stack_chk_guard");
// Mark all global variables with initializers that are not in the api as
// internal as well.
// FIXME: maybe use private linkage?
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- if (!I->isDeclaration() && !I->hasLocalLinkage() &&
- // Available externally is really just a "declaration with a body".
- !I->hasAvailableExternallyLinkage() &&
- !ExternalNames.count(I->getName())) {
- I->setLinkage(GlobalValue::InternalLinkage);
- Changed = true;
- ++NumGlobals;
- DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
- }
+ I != E; ++I) {
+ if (!shouldInternalize(*I, ExternalNames))
+ continue;
+
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumGlobals;
+ DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
+ }
// Mark all aliases that are not in the api as internal as well.
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E; ++I)
- if (!I->isDeclaration() && !I->hasInternalLinkage() &&
- // Available externally is really just a "declaration with a body".
- !I->hasAvailableExternallyLinkage() &&
- !ExternalNames.count(I->getName())) {
- I->setLinkage(GlobalValue::InternalLinkage);
- Changed = true;
- ++NumAliases;
- DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
- }
+ I != E; ++I) {
+ if (!shouldInternalize(*I, ExternalNames))
+ continue;
+
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumAliases;
+ DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
+ }
return Changed;
}
@@ -203,6 +217,6 @@ ModulePass *llvm::createInternalizePass() {
return new InternalizePass();
}
-ModulePass *llvm::createInternalizePass(ArrayRef<const char *> el) {
- return new InternalizePass(el);
+ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) {
+ return new InternalizePass(ExportList);
}
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 4ce749c..3861421 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -210,16 +210,20 @@ private:
// Any two pointers in the same address space are equivalent, intptr_t and
// pointers are equivalent. Otherwise, standard type equivalence rules apply.
bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
+
+ PointerType *PTy1 = dyn_cast<PointerType>(Ty1);
+ PointerType *PTy2 = dyn_cast<PointerType>(Ty2);
+
+ if (TD) {
+ if (PTy1 && PTy1->getAddressSpace() == 0) Ty1 = TD->getIntPtrType(Ty1);
+ if (PTy2 && PTy2->getAddressSpace() == 0) Ty2 = TD->getIntPtrType(Ty2);
+ }
+
if (Ty1 == Ty2)
return true;
- if (Ty1->getTypeID() != Ty2->getTypeID()) {
- if (TD) {
- LLVMContext &Ctx = Ty1->getContext();
- if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
- if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
- }
+
+ if (Ty1->getTypeID() != Ty2->getTypeID())
return false;
- }
switch (Ty1->getTypeID()) {
default:
@@ -241,8 +245,7 @@ bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
return true;
case Type::PointerTyID: {
- PointerType *PTy1 = cast<PointerType>(Ty1);
- PointerType *PTy2 = cast<PointerType>(Ty2);
+ assert(PTy1 && PTy2 && "Both types must be pointers here.");
return PTy1->getAddressSpace() == PTy2->getAddressSpace();
}
@@ -352,14 +355,19 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
// Determine whether two GEP operations perform the same underlying arithmetic.
bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
const GEPOperator *GEP2) {
- // When we have target data, we can reduce the GEP down to the value in bytes
- // added to the address.
- unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 1;
- APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
- if (TD &&
- GEP1->accumulateConstantOffset(*TD, Offset1) &&
- GEP2->accumulateConstantOffset(*TD, Offset2)) {
- return Offset1 == Offset2;
+ unsigned AS = GEP1->getPointerAddressSpace();
+ if (AS != GEP2->getPointerAddressSpace())
+ return false;
+
+ if (TD) {
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 1;
+ APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
+ if (GEP1->accumulateConstantOffset(*TD, Offset1) &&
+ GEP2->accumulateConstantOffset(*TD, Offset2)) {
+ return Offset1 == Offset2;
+ }
}
if (GEP1->getPointerOperand()->getType() !=
@@ -713,6 +721,19 @@ void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
writeThunk(F, G);
}
+// Helper for writeThunk,
+// Selects proper bitcast operation,
+// but a bit simplier then CastInst::getCastOpcode.
+static Value* createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
+ Type *SrcTy = V->getType();
+ if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+ return Builder.CreateIntToPtr(V, DestTy);
+ else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+ return Builder.CreatePtrToInt(V, DestTy);
+ else
+ return Builder.CreateBitCast(V, DestTy);
+}
+
// Replace G with a simple tail call to bitcast(F). Also replace direct uses
// of G with bitcast(F). Deletes G.
void MergeFunctions::writeThunk(Function *F, Function *G) {
@@ -738,7 +759,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
FunctionType *FFTy = F->getFunctionType();
for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
AI != AE; ++AI) {
- Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
+ Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i)));
++i;
}
@@ -748,13 +769,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
if (NewG->getReturnType()->isVoidTy()) {
Builder.CreateRetVoid();
} else {
- Type *RetTy = NewG->getReturnType();
- if (CI->getType()->isIntegerTy() && RetTy->isPointerTy())
- Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy));
- else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy())
- Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy));
- else
- Builder.CreateRet(Builder.CreateBitCast(CI, RetTy));
+ Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType()));
}
NewG->copyAttributesFrom(G);
@@ -829,6 +844,18 @@ bool MergeFunctions::insert(ComparableFunction &NewF) {
const ComparableFunction &OldF = *Result.first;
+ // Don't merge tiny functions, since it can just end up making the function
+ // larger.
+ // FIXME: Should still merge them if they are unnamed_addr and produce an
+ // alias.
+ if (NewF.getFunc()->size() == 1) {
+ if (NewF.getFunc()->front().size() <= 2) {
+ DEBUG(dbgs() << NewF.getFunc()->getName()
+ << " is to small to bother merging\n");
+ return false;
+ }
+ }
+
// Never thunk a strong function to a weak function.
assert(!OldF.getFunc()->mayBeOverridden() ||
NewF.getFunc()->mayBeOverridden());
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index a6b3f4e..24c5018 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -29,20 +29,20 @@
using namespace llvm;
static cl::opt<bool>
-RunLoopVectorization("vectorize-loops",
+RunLoopVectorization("vectorize-loops", cl::Hidden,
cl::desc("Run the Loop vectorization passes"));
static cl::opt<bool>
-LateVectorization("late-vectorize", cl::init(false), cl::Hidden,
+LateVectorization("late-vectorize", cl::init(true), cl::Hidden,
cl::desc("Run the vectorization pasess late in the pass "
"pipeline (after the inliner)"));
static cl::opt<bool>
-RunSLPVectorization("vectorize-slp",
+RunSLPVectorization("vectorize-slp", cl::Hidden,
cl::desc("Run the SLP vectorization passes"));
static cl::opt<bool>
-RunBBVectorization("vectorize-slp-aggressive",
+RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
cl::desc("Run the BB vectorization passes"));
static cl::opt<bool>
@@ -54,6 +54,10 @@ static cl::opt<bool> UseNewSROA("use-new-sroa",
cl::init(true), cl::Hidden,
cl::desc("Enable the new, experimental SROA pass"));
+static cl::opt<bool>
+RunLoopRerolling("reroll-loops", cl::Hidden,
+ cl::desc("Run the loop rerolling pass"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -65,6 +69,7 @@ PassManagerBuilder::PassManagerBuilder() {
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
LateVectorize = LateVectorization;
+ RerollLoops = RunLoopRerolling;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -195,8 +200,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
- if (!LateVectorize && LoopVectorize && OptLevel > 1 && SizeLevel < 2)
- MPM.add(createLoopVectorizePass());
+ if (!LateVectorize && LoopVectorize)
+ MPM.add(createLoopVectorizePass(DisableUnrollLoops));
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass()); // Unroll small loops
@@ -216,22 +221,22 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
- if (!LateVectorize) {
- if (SLPVectorize)
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
-
- if (BBVectorize) {
- MPM.add(createBBVectorizePass());
- MPM.add(createInstructionCombiningPass());
- if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(createGVNPass()); // Remove redundancies
- else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
-
- // BBVectorize may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
- }
+ if (RerollLoops)
+ MPM.add(createLoopRerollPass());
+ if (SLPVectorize)
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
+ if (BBVectorize) {
+ MPM.add(createBBVectorizePass());
+ MPM.add(createInstructionCombiningPass());
+ if (OptLevel > 1 && UseGVNAfterVectorization)
+ MPM.add(createGVNPass()); // Remove redundancies
+ else
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+
+ // BBVectorize may have significantly shortened a loop body; unroll again.
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass());
}
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
@@ -241,7 +246,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// As an experimental mode, run any vectorization passes in a separate
// pipeline from the CGSCC pass manager that runs iteratively with the
// inliner.
- if (LateVectorize) {
+ if (LateVectorize && LoopVectorize) {
// FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
// pass manager that we are specifically trying to avoid. To prevent this
// we must insert a no-op module pass to reset the pass manager.
@@ -249,35 +254,9 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// Add the various vectorization passes and relevant cleanup passes for
// them since we are no longer in the middle of the main scalar pipeline.
- if (LoopVectorize && OptLevel > 1 && SizeLevel < 2) {
- MPM.add(createLoopVectorizePass());
-
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass()); // Unroll small loops
-
- // FIXME: Is this necessary/useful? Should we also do SimplifyCFG?
- MPM.add(createInstructionCombiningPass());
- }
-
- if (SLPVectorize) {
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
-
- // FIXME: Is this necessary/useful? Should we also do SimplifyCFG?
- MPM.add(createInstructionCombiningPass());
- }
-
- if (BBVectorize) {
- MPM.add(createBBVectorizePass());
- MPM.add(createInstructionCombiningPass());
- if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(createGVNPass()); // Remove redundancies
- else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
-
- // BBVectorize may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
- }
+ MPM.add(createLoopVectorizePass(DisableUnrollLoops));
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createCFGSimplificationPass());
}
if (!DisableUnitAtATime) {
@@ -304,11 +283,8 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// Now that composite has been compiled, scan through the module, looking
// for a main function. If main is defined, mark all other functions
// internal.
- if (Internalize) {
- std::vector<const char*> E;
- E.push_back("main");
- PM.add(createInternalizePass(E));
- }
+ if (Internalize)
+ PM.add(createInternalizePass("main"));
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
@@ -349,6 +325,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// The IPO passes may leave cruft around. Clean up after them.
PM.add(createInstructionCombiningPass());
PM.add(createJumpThreadingPass());
+
// Break up allocas
if (UseNewSROA)
PM.add(createSROAPass());
@@ -362,6 +339,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createLICMPass()); // Hoist loop invariants.
PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
+
// Nuke dead stores.
PM.add(createDeadStoreEliminationPass());
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 89529de..b160913 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -51,7 +51,7 @@ namespace {
char PruneEH::ID = 0;
INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
"Remove unused exception handling info", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_END(PruneEH, "prune-eh",
"Remove unused exception handling info", false, false)
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 2791106..c4f5cfc 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -9,7 +9,7 @@
//
// The StripSymbols transformation implements code stripping. Specifically, it
// can delete:
-//
+//
// * names for virtual registers
// * symbols for internal globals and functions
// * debug information
@@ -39,7 +39,7 @@ namespace {
bool OnlyDebugInfo;
public:
static char ID; // Pass identification, replacement for typeid
- explicit StripSymbols(bool ODI = false)
+ explicit StripSymbols(bool ODI = false)
: ModulePass(ID), OnlyDebugInfo(ODI) {
initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
}
@@ -144,7 +144,7 @@ static void RemoveDeadConstant(Constant *C) {
assert(C->use_empty() && "Constant is not dead!");
SmallPtrSet<Constant*, 4> Operands;
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
- if (OnlyUsedBy(C->getOperand(i), C))
+ if (OnlyUsedBy(C->getOperand(i), C))
Operands.insert(cast<Constant>(C->getOperand(i)));
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
if (!GV->hasLocalLinkage()) return; // Don't delete non static globals.
@@ -182,7 +182,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
StructType *STy = StructTypes[i];
if (STy->isLiteral() || STy->getName().empty()) continue;
-
+
if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
continue;
@@ -199,7 +199,7 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
- if (GlobalValue *GV =
+ if (GlobalValue *GV =
dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
UsedValues.insert(GV);
}
@@ -217,71 +217,20 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
I->setName(""); // Internal symbols can't participate in linkage
}
-
+
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
I->setName(""); // Internal symbols can't participate in linkage
StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
}
-
+
// Remove all names from types.
StripTypeNames(M, PreserveDbgInfo);
return true;
}
-// StripDebugInfo - Strip debug info in the module if it exists.
-// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and
-// llvm.dbg.region.end calls, and any globals they point to if now dead.
-static bool StripDebugInfo(Module &M) {
-
- bool Changed = false;
-
- // Remove all of the calls to the debugger intrinsics, and remove them from
- // the module.
- if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
- while (!Declare->use_empty()) {
- CallInst *CI = cast<CallInst>(Declare->use_back());
- CI->eraseFromParent();
- }
- Declare->eraseFromParent();
- Changed = true;
- }
-
- if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
- while (!DbgVal->use_empty()) {
- CallInst *CI = cast<CallInst>(DbgVal->use_back());
- CI->eraseFromParent();
- }
- DbgVal->eraseFromParent();
- Changed = true;
- }
-
- for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
- NME = M.named_metadata_end(); NMI != NME;) {
- NamedMDNode *NMD = NMI;
- ++NMI;
- if (NMD->getName().startswith("llvm.dbg.")) {
- NMD->eraseFromParent();
- Changed = true;
- }
- }
-
- for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
- for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
- ++FI)
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
- ++BI) {
- if (!BI->getDebugLoc().isUnknown()) {
- Changed = true;
- BI->setDebugLoc(DebugLoc());
- }
- }
-
- return Changed;
-}
-
bool StripSymbols::runOnModule(Module &M) {
bool Changed = false;
Changed |= StripDebugInfo(M);
@@ -307,13 +256,13 @@ bool StripDebugDeclare::runOnModule(Module &M) {
assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
CI->eraseFromParent();
if (Arg1->use_empty()) {
- if (Constant *C = dyn_cast<Constant>(Arg1))
+ if (Constant *C = dyn_cast<Constant>(Arg1))
DeadConstants.push_back(C);
- else
+ else
RecursivelyDeleteTriviallyDeadInstructions(Arg1);
}
if (Arg2->use_empty())
- if (Constant *C = dyn_cast<Constant>(Arg2))
+ if (Constant *C = dyn_cast<Constant>(Arg2))
DeadConstants.push_back(C);
}
Declare->eraseFromParent();
@@ -332,76 +281,107 @@ bool StripDebugDeclare::runOnModule(Module &M) {
return true;
}
+/// Remove any debug info for global variables/functions in the given module for
+/// which said global variable/function no longer exists (i.e. is null).
+///
+/// Debugging information is encoded in llvm IR using metadata. This is designed
+/// such a way that debug info for symbols preserved even if symbols are
+/// optimized away by the optimizer. This special pass removes debug info for
+/// such symbols.
bool StripDeadDebugInfo::runOnModule(Module &M) {
bool Changed = false;
- // Debugging infomration is encoded in llvm IR using metadata. This is designed
- // such a way that debug info for symbols preserved even if symbols are
- // optimized away by the optimizer. This special pass removes debug info for
- // such symbols.
-
- // llvm.dbg.gv keeps track of debug info for global variables.
- if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
- SmallVector<MDNode *, 8> MDs;
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
- if (NMD->getOperand(i)) {
- assert(DIGlobalVariable(NMD->getOperand(i)).isGlobalVariable() &&
- "A MDNode in llvm.dbg.gv should be a DIGlobalVariable.");
- MDs.push_back(NMD->getOperand(i));
- }
- else
- Changed = true;
- NMD->eraseFromParent();
- NMD = NULL;
-
- for (SmallVectorImpl<MDNode *>::iterator I = MDs.begin(),
- E = MDs.end(); I != E; ++I) {
- GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
- if (GV && M.getGlobalVariable(GV->getName(), true)) {
- if (!NMD)
- NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
- NMD->addOperand(*I);
- }
+ LLVMContext &C = M.getContext();
+
+ // Find all debug info in F. This is actually overkill in terms of what we
+ // want to do, but we want to try and be as resilient as possible in the face
+ // of potential debug info changes by using the formal interfaces given to us
+ // as much as possible.
+ DebugInfoFinder F;
+ F.processModule(M);
+
+ // For each compile unit, find the live set of global variables/functions and
+ // replace the current list of potentially dead global variables/functions
+ // with the live list.
+ SmallVector<Value *, 64> LiveGlobalVariables;
+ SmallVector<Value *, 64> LiveSubprograms;
+ DenseSet<const MDNode *> VisitedSet;
+
+ for (DebugInfoFinder::iterator CI = F.compile_unit_begin(),
+ CE = F.compile_unit_end(); CI != CE; ++CI) {
+ // Create our compile unit.
+ DICompileUnit DIC(*CI);
+ assert(DIC.Verify() && "DIC must verify as a DICompileUnit.");
+
+ // Create our live subprogram list.
+ DIArray SPs = DIC.getSubprograms();
+ bool SubprogramChange = false;
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram DISP(SPs.getElement(i));
+ assert(DISP.Verify() && "DISP must verify as a DISubprogram.");
+
+ // Make sure we visit each subprogram only once.
+ if (!VisitedSet.insert(DISP).second)
+ continue;
+
+ // If the function referenced by DISP is not null, the function is live.
+ if (DISP.getFunction())
+ LiveSubprograms.push_back(DISP);
else
- Changed = true;
+ SubprogramChange = true;
}
- }
- // llvm.dbg.sp keeps track of debug info for subprograms.
- if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) {
- SmallVector<MDNode *, 8> MDs;
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
- if (NMD->getOperand(i)) {
- assert(DISubprogram(NMD->getOperand(i)).isSubprogram() &&
- "A MDNode in llvm.dbg.sp should be a DISubprogram.");
- MDs.push_back(NMD->getOperand(i));
- }
+ // Create our live global variable list.
+ DIArray GVs = DIC.getGlobalVariables();
+ bool GlobalVariableChange = false;
+ for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
+ DIGlobalVariable DIG(GVs.getElement(i));
+ assert(DIG.Verify() && "DIG must verify as DIGlobalVariable.");
+
+ // Make sure we only visit each global variable only once.
+ if (!VisitedSet.insert(DIG).second)
+ continue;
+
+ // If the global variable referenced by DIG is not null, the global
+ // variable is live.
+ if (DIG.getGlobal())
+ LiveGlobalVariables.push_back(DIG);
else
- Changed = true;
- NMD->eraseFromParent();
- NMD = NULL;
-
- for (SmallVectorImpl<MDNode *>::iterator I = MDs.begin(),
- E = MDs.end(); I != E; ++I) {
- bool FnIsLive = false;
- if (Function *F = DISubprogram(*I).getFunction())
- if (M.getFunction(F->getName()))
- FnIsLive = true;
- if (FnIsLive) {
- if (!NMD)
- NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
- NMD->addOperand(*I);
- } else {
- // Remove llvm.dbg.lv.fnname named mdnode which may have been used
- // to hold debug info for dead function's local variables.
- StringRef FName = DISubprogram(*I).getLinkageName();
- if (FName.empty())
- FName = DISubprogram(*I).getName();
- if (NamedMDNode *LVNMD = M.getNamedMetadata(
- "llvm.dbg.lv." + Function::getRealLinkageName(FName)))
- LVNMD->eraseFromParent();
- }
+ GlobalVariableChange = true;
+ }
+
+ // If we found dead subprograms or global variables, replace the current
+ // subprogram list/global variable list with our new live subprogram/global
+ // variable list.
+ if (SubprogramChange) {
+ // Make sure that 9 is still the index of the subprograms. This is to make
+ // sure that an assert is hit if the location of the subprogram array
+ // changes. This is just to make sure that this is updated if such an
+ // event occurs.
+ assert(DIC->getNumOperands() >= 10 &&
+ SPs == DIC->getOperand(9) &&
+ "DICompileUnits is expected to store Subprograms in operand "
+ "9.");
+ DIC->replaceOperandWith(9, MDNode::get(C, LiveSubprograms));
+ Changed = true;
}
+
+ if (GlobalVariableChange) {
+ // Make sure that 10 is still the index of global variables. This is to
+ // make sure that an assert is hit if the location of the subprogram array
+ // changes. This is just to make sure that this index is updated if such
+ // an event occurs.
+ assert(DIC->getNumOperands() >= 11 &&
+ GVs == DIC->getOperand(10) &&
+ "DICompileUnits is expected to store Global Variables in operand "
+ "10.");
+ DIC->replaceOperandWith(10, MDNode::get(C, LiveGlobalVariables));
+ Changed = true;
+ }
+
+ // Reset lists for the next iteration.
+ LiveSubprograms.clear();
+ LiveGlobalVariables.clear();
}
return Changed;
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index b3084cc..a5eddc2 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -158,8 +158,8 @@ public:
ConstantInt *DivRHS);
Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
ConstantInt *DivRHS);
- Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
- ICmpInst::Predicate Pred, Value *TheAdd);
+ Instruction *FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred);
Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond, Instruction &I);
Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
@@ -178,6 +178,7 @@ public:
Instruction *visitPtrToInt(PtrToIntInst &CI);
Instruction *visitIntToPtr(IntToPtrInst &CI);
Instruction *visitBitCast(BitCastInst &CI);
+ Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
Instruction *FI);
Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
@@ -212,8 +213,8 @@ private:
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const;
- Type *FindElementAtOffset(Type *Ty, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices);
+ Type *FindElementAtOffset(Type *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
@@ -271,7 +272,7 @@ public:
if (&I == V)
V = UndefValue::get(I.getType());
- DEBUG(errs() << "IC: Replacing " << I << "\n"
+ DEBUG(dbgs() << "IC: Replacing " << I << "\n"
" with " << *V << '\n');
I.replaceAllUsesWith(V);
@@ -283,7 +284,7 @@ public:
// instruction. Instead, visit methods should return the value returned by
// this function.
Instruction *EraseInstFromFunction(Instruction &I) {
- DEBUG(errs() << "IC: ERASE " << I << '\n');
+ DEBUG(dbgs() << "IC: ERASE " << I << '\n');
assert(I.use_empty() && "Cannot erase instruction that is used!");
// Make sure that we reprocess all operands now that we reduced their
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b474bd8..88bb69b 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -488,6 +488,26 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
return result;
}
+/// Convert an analysis of a masked ICmp into its equivalent if all boolean
+/// operations had the opposite sense. Since each "NotXXX" flag (recording !=)
+/// is adjacent to the corresponding normal flag (recording ==), this just
+/// involves swapping those bits over.
+static unsigned conjugateICmpMask(unsigned Mask) {
+ unsigned NewMask;
+ NewMask = (Mask & (FoldMskICmp_AMask_AllOnes | FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed |
+ FoldMskICmp_BMask_Mixed))
+ << 1;
+
+ NewMask |=
+ (Mask & (FoldMskICmp_AMask_NotAllOnes | FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_AMask_NotMixed |
+ FoldMskICmp_BMask_NotMixed))
+ >> 1;
+
+ return NewMask;
+}
+
/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z)
/// if possible. The returned predicate is either == or !=. Returns false if
/// decomposition fails.
@@ -548,14 +568,22 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
L21 = L22 = L1 = 0;
} else {
// Look for ANDs in the LHS icmp.
- if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
- if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
- L21 = L22 = 0;
- } else {
- if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
- return 0;
- std::swap(L1, L2);
+ if (!L1->getType()->isIntegerTy()) {
+ // You can icmp pointers, for example. They really aren't masks.
+ L11 = L12 = 0;
+ } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+ // Any icmp can be viewed as being trivially masked; if it allows us to
+ // remove one, it's worth it.
+ L11 = L1;
+ L12 = Constant::getAllOnesValue(L1->getType());
+ }
+
+ if (!L2->getType()->isIntegerTy()) {
+ // You can icmp pointers, for example. They really aren't masks.
L21 = L22 = 0;
+ } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) {
+ L21 = L2;
+ L22 = Constant::getAllOnesValue(L2->getType());
}
}
@@ -576,7 +604,14 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
return 0;
}
E = R2; R1 = 0; ok = true;
- } else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ } else if (R1->getType()->isIntegerTy()) {
+ if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ // As before, model no mask as a trivial mask if it'll let us do an
+ // optimisation.
+ R11 = R1;
+ R12 = Constant::getAllOnesValue(R1->getType());
+ }
+
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
A = R11; D = R12; E = R2; ok = true;
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
@@ -589,7 +624,12 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
return 0;
// Look for ANDs in on the right side of the RHS icmp.
- if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ if (!ok && R2->getType()->isIntegerTy()) {
+ if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ R11 = R2;
+ R12 = Constant::getAllOnesValue(R2->getType());
+ }
+
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
A = R11; D = R12; E = R1; ok = true;
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
@@ -618,8 +658,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
/// foldLogOpOfMaskedICmps:
/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
/// into a single (icmp(A & X) ==/!= Y)
-static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
- ICmpInst::Predicate NEWCC,
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
llvm::InstCombiner::BuilderTy* Builder) {
Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -629,8 +668,24 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) &&
"foldLogOpOfMaskedICmpsHelper must return an equality predicate.");
- if (NEWCC == ICmpInst::ICMP_NE)
- mask >>= 1; // treat "Not"-states as normal states
+ // In full generality:
+ // (icmp (A & B) Op C) | (icmp (A & D) Op E)
+ // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ]
+ //
+ // If the latter can be converted into (icmp (A & X) Op Y) then the former is
+ // equivalent to (icmp (A & X) !Op Y).
+ //
+ // Therefore, we can pretend for the rest of this function that we're dealing
+ // with the conjunction, provided we flip the sense of any comparisons (both
+ // input and output).
+
+ // In most cases we're going to produce an EQ for the "&&" case.
+ ICmpInst::Predicate NEWCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+ if (!IsAnd) {
+ // Convert the masking analysis into its equivalent with negated
+ // comparisons.
+ mask = conjugateICmpMask(mask);
+ }
if (mask & FoldMskICmp_Mask_AllZeroes) {
// (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
@@ -657,6 +712,40 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
Value* newAnd = Builder->CreateAnd(A, newAnd1);
return Builder->CreateICmp(NEWCC, newAnd, A);
}
+
+ // Remaining cases assume at least that B and D are constant, and depend on
+ // their actual values. This isn't strictly, necessary, just a "handle the
+ // easy cases for now" decision.
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ if (BCst == 0) return 0;
+ ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+ if (DCst == 0) return 0;
+
+ if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) {
+ // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
+ // Only valid if one of the masks is a superset of the other (check "B&D" is
+ // the same as either B or D).
+ APInt NewMask = BCst->getValue() & DCst->getValue();
+
+ if (NewMask == BCst->getValue())
+ return LHS;
+ else if (NewMask == DCst->getValue())
+ return RHS;
+ }
+ if (mask & FoldMskICmp_AMask_NotAllOnes) {
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
+ // Only valid if one of the masks is a superset of the other (check "B|D" is
+ // the same as either B or D).
+ APInt NewMask = BCst->getValue() | DCst->getValue();
+
+ if (NewMask == BCst->getValue())
+ return LHS;
+ else if (NewMask == DCst->getValue())
+ return RHS;
+ }
if (mask & FoldMskICmp_BMask_Mixed) {
// (icmp eq (A & B), C) & (icmp eq (A & D), E)
// We already know that B & C == C && D & E == E.
@@ -665,14 +754,9 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
// contradict, then we can transform to
// -> (icmp eq (A & (B|D)), (C|E))
// Currently, we only handle the case of B, C, D, and E being constant.
- ConstantInt *BCst = dyn_cast<ConstantInt>(B);
- if (BCst == 0) return 0;
- ConstantInt *DCst = dyn_cast<ConstantInt>(D);
- if (DCst == 0) return 0;
// we can't simply use C and E, because we might actually handle
// (icmp ne (A & B), B) & (icmp eq (A & D), D)
// with B and D, having a single bit set
-
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
if (CCst == 0) return 0;
if (LHSCC != NEWCC)
@@ -715,7 +799,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
// handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder))
return V;
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
@@ -849,10 +933,15 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
return RHS;
case ICmpInst::ICMP_NE:
+ // Special case to get the ordering right when the values wrap around
+ // zero.
+ if (LHSCst->getValue() == 0 && RHSCst->getValue().isAllOnesValue())
+ std::swap(LHSCst, RHSCst);
if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
Constant *AddCST = ConstantExpr::getNeg(LHSCst);
Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
- return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
+ return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1),
+ Val->getName()+".cmp");
}
break; // (X != 13 & X != 15) -> no change
}
@@ -1454,10 +1543,60 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
return 0;
}
+/// IsOneHotValue - Returns true for "one-hot" values (values where at most
+/// one bit can be set).
+static bool IsOneHotValue(Value *V) {
+ // Match 1<<K.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
+ if (BO->getOpcode() == Instruction::Shl) {
+ ConstantInt *One = dyn_cast<ConstantInt>(BO->getOperand(0));
+ return One && One->isOne();
+ }
+
+ // Check for power of two integer constants.
+ if (ConstantInt *K = dyn_cast<ConstantInt>(V))
+ return K->getValue().isPowerOf2();
+
+ return false;
+}
+
/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
+ // if K1 and K2 are a one-bit mask.
+ ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+
+ if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero() &&
+ RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) {
+
+ BinaryOperator *LAnd = dyn_cast<BinaryOperator>(LHS->getOperand(0));
+ BinaryOperator *RAnd = dyn_cast<BinaryOperator>(RHS->getOperand(0));
+ if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() &&
+ LAnd->getOpcode() == Instruction::And &&
+ RAnd->getOpcode() == Instruction::And) {
+
+ Value *Mask = 0;
+ Value *Masked = 0;
+ if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
+ IsOneHotValue(LAnd->getOperand(1)) &&
+ IsOneHotValue(RAnd->getOperand(1))) {
+ Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
+ Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
+ } else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
+ IsOneHotValue(LAnd->getOperand(0)) &&
+ IsOneHotValue(RAnd->getOperand(0))) {
+ Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
+ Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
+ }
+
+ if (Masked)
+ return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask);
+ }
+ }
+
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
if (PredicatesFoldable(LHSCC, RHSCC)) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
@@ -1474,13 +1613,10 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// handle (roughly):
// (icmp ne (A & B), C) | (icmp ne (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder))
return V;
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
- ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
- ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
-
if (LHS->hasOneUse() || RHS->hasOneUse()) {
// (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
// (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 9f74fd6..0cd7b14 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -999,20 +999,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Check to see if we are changing the return type...
if (OldRetTy != NewRetTy) {
- if (Callee->isDeclaration() &&
- // Conversion is ok if changing from one pointer type to another or from
- // a pointer to an integer of the same size.
- !((OldRetTy->isPointerTy() || !TD ||
- OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
- (NewRetTy->isPointerTy() || !TD ||
- NewRetTy == TD->getIntPtrType(Caller->getContext()))))
- return false; // Cannot transform this return value.
+ if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) {
+ if (Callee->isDeclaration())
+ return false; // Cannot transform this return value.
- if (!Caller->use_empty() &&
- // void -> non-void is handled specially
- !NewRetTy->isVoidTy() &&
- !CastInst::isBitCastable(NewRetTy, OldRetTy))
+ if (!Caller->use_empty() &&
+ // void -> non-void is handled specially
+ !NewRetTy->isVoidTy())
return false; // Cannot transform this return value.
+ }
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
@@ -1045,9 +1040,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Type *ParamTy = FT->getParamType(i);
Type *ActTy = (*AI)->getType();
- if (!CastInst::isBitCastable(ActTy, ParamTy)) {
+ if (!CastInst::isBitCastable(ActTy, ParamTy))
return false; // Cannot transform this parameter value.
- }
if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
hasAttributes(AttributeFuncs::
@@ -1063,21 +1057,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
return false;
- Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+ Type *CurElTy = ActTy->getPointerElementType();
if (TD->getTypeAllocSize(CurElTy) !=
TD->getTypeAllocSize(ParamPTy->getElementType()))
return false;
}
-
- // Converting from one pointer type to another or between a pointer and an
- // integer of the same size is safe even if we do not have a body.
- bool isConvertible = ActTy == ParamTy ||
- (TD && ((ParamTy->isPointerTy() ||
- ParamTy == TD->getIntPtrType(Caller->getContext())) &&
- (ActTy->isPointerTy() ||
- ActTy == TD->getIntPtrType(Caller->getContext()))));
- if (Callee->isDeclaration() && !isConvertible)
- return false;
}
if (Callee->isDeclaration()) {
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 361acdd..72377dc 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1229,6 +1229,19 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
}
+ // (fptrunc (select cond, R1, Cst)) -->
+ // (select cond, (fptrunc R1), (fptrunc Cst))
+ SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0));
+ if (SI &&
+ (isa<ConstantFP>(SI->getOperand(1)) ||
+ isa<ConstantFP>(SI->getOperand(2)))) {
+ Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1),
+ CI.getType());
+ Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2),
+ CI.getType());
+ return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc);
+ }
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
if (II) {
switch (II->getIntrinsicID()) {
@@ -1249,9 +1262,14 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
// Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+ // Note that we restrict this transformation based on
+ // TLI->has(LibFunc::sqrtf), even for the sqrt intrinsic, because
+ // TLI->has(LibFunc::sqrtf) is sufficient to guarantee that the
+ // single-precision intrinsic can be expanded in the backend.
CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
- Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
+ (Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) ||
+ Call->getCalledFunction()->getIntrinsicID() == Intrinsic::sqrt) &&
Call->getNumArgOperands() == 1 &&
Call->hasOneUse()) {
CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
@@ -1262,11 +1280,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Arg->getOperand(0)->getType()->isFloatTy()) {
Function *Callee = Call->getCalledFunction();
Module *M = CI.getParent()->getParent()->getParent();
- Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
- Callee->getAttributes(),
- Builder->getFloatTy(),
- Builder->getFloatTy(),
- NULL);
+ Constant *SqrtfFunc = (Callee->getIntrinsicID() == Intrinsic::sqrt) ?
+ Intrinsic::getDeclaration(M, Intrinsic::sqrt, Builder->getFloatTy()) :
+ M->getOrInsertFunction("sqrtf", Callee->getAttributes(),
+ Builder->getFloatTy(), Builder->getFloatTy(),
+ NULL);
CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
"sqrtfcall");
ret->setAttributes(Callee->getAttributes());
@@ -1338,14 +1356,18 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// If the source integer type is not the intptr_t type for this target, do a
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
- if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() !=
- TD->getPointerSizeInBits()) {
- Type *Ty = TD->getIntPtrType(CI.getContext());
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
-
- Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
- return new IntToPtrInst(P, CI.getType());
+
+ if (TD) {
+ unsigned AS = CI.getAddressSpace();
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+ TD->getPointerSizeInBits(AS)) {
+ Type *Ty = TD->getIntPtrType(CI.getContext(), AS);
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+ return new IntToPtrInst(P, CI.getType());
+ }
}
if (Instruction *I = commonCastTransforms(CI))
@@ -1370,25 +1392,32 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
return &CI;
}
+ if (!TD)
+ return commonCastTransforms(CI);
+
// If the GEP has a single use, and the base pointer is a bitcast, and the
// GEP computes a constant offset, see if we can convert these three
// instructions into fewer. This typically happens with unions and other
// non-type-safe code.
- APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
- if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
+ unsigned AS = GEP->getPointerAddressSpace();
+ unsigned OffsetBits = TD->getPointerSizeInBits(AS);
+ APInt Offset(OffsetBits, 0);
+ BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
+ if (GEP->hasOneUse() &&
+ BCI &&
GEP->accumulateConstantOffset(*TD, Offset)) {
// Get the base pointer input of the bitcast, and the type it points to.
- Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
- Type *GEPIdxTy =
- cast<PointerType>(OrigBase->getType())->getElementType();
+ Value *OrigBase = BCI->getOperand(0);
SmallVector<Value*, 8> NewIndices;
- if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) {
+ if (FindElementAtOffset(OrigBase->getType(),
+ Offset.getSExtValue(),
+ NewIndices)) {
// If we were able to index down into an element, create the GEP
// and bitcast the result. This eliminates one bitcast, potentially
// two.
Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
- Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
- Builder->CreateGEP(OrigBase, NewIndices);
+ Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
+ Builder->CreateGEP(OrigBase, NewIndices);
NGEP->takeName(GEP);
if (isa<BitCastInst>(CI))
@@ -1406,16 +1435,22 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// If the destination integer type is not the intptr_t type for this target,
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
- if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) {
- Type *Ty = TD->getIntPtrType(CI.getContext());
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
- Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty);
- return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false);
- }
+ if (!TD)
+ return commonPointerCastTransforms(CI);
+
+ Type *Ty = CI.getType();
+ unsigned AS = CI.getPointerAddressSpace();
+
+ if (Ty->getScalarSizeInBits() == TD->getPointerSizeInBits(AS))
+ return commonPointerCastTransforms(CI);
- return commonPointerCastTransforms(CI);
+ Type *PtrTy = TD->getIntPtrType(CI.getContext(), AS);
+ if (Ty->isVectorTy()) // Handle vectors of pointers.
+ PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
+
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy);
+ return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
/// OptimizeVectorResize - This input value (which is known to have vector type)
@@ -1488,12 +1523,17 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
/// insertions into the vector. See the example in the comment for
/// OptimizeIntegerToVectorInsertions for the pattern this handles.
/// The type of V is always a non-zero multiple of VecEltTy's size.
+/// Shift is the number of bits between the lsb of V and the lsb of
+/// the vector.
///
/// This returns false if the pattern can't be matched or true if it can,
/// filling in Elements with the elements found here.
-static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+static bool CollectInsertionElements(Value *V, unsigned Shift,
SmallVectorImpl<Value*> &Elements,
- Type *VecEltTy) {
+ Type *VecEltTy, InstCombiner &IC) {
+ assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
+ "Shift should be a multiple of the element type size");
+
// Undef values never contribute useful bits to the result.
if (isa<UndefValue>(V)) return true;
@@ -1505,8 +1545,12 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (C->isNullValue())
return true;
+ unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
+ if (IC.getDataLayout()->isBigEndian())
+ ElementIndex = Elements.size() - ElementIndex - 1;
+
// Fail if multiple elements are inserted into this slot.
- if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+ if (Elements[ElementIndex] != 0)
return false;
Elements[ElementIndex] = V;
@@ -1522,7 +1566,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
// it to the right type so it gets properly inserted.
if (NumElts == 1)
return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
- ElementIndex, Elements, VecEltTy);
+ Shift, Elements, VecEltTy, IC);
// Okay, this is a constant that covers multiple elements. Slice it up into
// pieces and insert each element-sized piece into the vector.
@@ -1533,10 +1577,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned ShiftI = Shift+i*ElementSize;
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
- i*ElementSize));
+ ShiftI));
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
- if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+ if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC))
return false;
}
return true;
@@ -1549,29 +1594,28 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
case Instruction::Or:
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy) &&
- CollectInsertionElements(I->getOperand(1), ElementIndex,
- Elements, VecEltTy);
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC) &&
+ CollectInsertionElements(I->getOperand(1), Shift,
+ Elements, VecEltTy, IC);
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
if (CI == 0) return false;
- if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
- unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
-
- return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
- Elements, VecEltTy);
+ Shift += CI->getZExtValue();
+ if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
}
}
@@ -1594,12 +1638,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
/// Into two insertelements that do "buildvector{%inc, %inc5}".
static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
+ // We need to know the target byte order to perform this optimization.
+ if (!IC.getDataLayout()) return 0;
+
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
if (!CollectInsertionElements(IntInput, 0, Elements,
- DestVecTy->getElementType()))
+ DestVecTy->getElementType(), IC))
return 0;
// If we succeeded, we know that all of the element are specified by Elements
@@ -1785,10 +1832,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// Okay, we have (bitcast (shuffle ..)). Check to see if this is
// a bitcast to a vector with the same # elts.
if (SVI->hasOneUse() && DestTy->isVectorTy() &&
- cast<VectorType>(DestTy)->getNumElements() ==
- SVI->getType()->getNumElements() &&
+ DestTy->getVectorNumElements() == SVI->getType()->getNumElements() &&
SVI->getType()->getNumElements() ==
- cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+ SVI->getOperand(0)->getType()->getVectorNumElements()) {
BitCastInst *Tmp;
// If either of the operands is a cast from CI.getType(), then
// evaluating the shuffle in the casted destination's type will allow
@@ -1810,3 +1856,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
}
+
+Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
+ return commonCastTransforms(CI);
+}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c0225ae..9bb65ef 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -227,7 +227,8 @@ Instruction *InstCombiner::
FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
// We need TD information to know the pointer size unless this is inbounds.
- if (!GEP->isInBounds() && TD == 0) return 0;
+ if (!GEP->isInBounds() && TD == 0)
+ return 0;
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
@@ -393,9 +394,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// If the index is larger than the pointer size of the target, truncate the
// index down like the GEP would do implicitly. We don't have to do this for
// an inbounds GEP because the index can't be out of range.
- if (!GEP->isInBounds() &&
- Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
- Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+ if (!GEP->isInBounds()) {
+ Type *IntPtrTy = TD->getIntPtrType(GEP->getType());
+ unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
+ if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
+ Idx = Builder->CreateTrunc(Idx, IntPtrTy);
+ }
// If the comparison is only true for one or two elements, emit direct
// comparisons.
@@ -562,16 +566,18 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
}
}
+
+
// Okay, we know we have a single variable index, which must be a
// pointer/array/vector index. If there is no offset, life is simple, return
// the index.
- unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ Type *IntPtrTy = TD.getIntPtrType(GEP->getOperand(0)->getType());
+ unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
if (Offset == 0) {
// Cast to intptrty in case a truncation occurs. If an extension is needed,
// we don't need to bother extending: the extension won't affect where the
// computation crosses zero.
if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
- Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
}
return VariableIdx;
@@ -593,7 +599,6 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
return 0;
// Okay, we can do this evaluation. Start by converting the index to intptr.
- Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
if (VariableIdx->getType() != IntPtrTy)
VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
true /*Signed*/);
@@ -737,10 +742,9 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
-Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
+Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
Value *X, ConstantInt *CI,
- ICmpInst::Predicate Pred,
- Value *TheAdd) {
+ ICmpInst::Predicate Pred) {
// If we have X+0, exit early (simplifying logic below) and let it get folded
// elsewhere. icmp X+0, X -> icmp X, X
if (CI->isZero()) {
@@ -1194,11 +1198,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Type *AndTy = AndCST->getType(); // Type of the and.
// We can fold this as long as we can't shift unknown bits
- // into the mask. This can only happen with signed shift
- // rights, as they sign-extend.
+ // into the mask. This can happen with signed shift
+ // rights, as they sign-extend. With logical shifts,
+ // we must still make sure the comparison is not signed
+ // because we are effectively changing the
+ // position of the sign bit (PR17827).
+ // TODO: We can relax these constraints a bit more.
if (ShAmt) {
- bool CanFold = Shift->isLogicalShift();
- if (!CanFold) {
+ bool CanFold = false;
+ unsigned ShiftOpcode = Shift->getOpcode();
+ if (ShiftOpcode == Instruction::AShr) {
// To test for the bad case of the signed shr, see if any
// of the bits shifted in could be tested after the mask.
uint32_t TyBits = Ty->getPrimitiveSizeInBits();
@@ -1208,6 +1217,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
AndCST->getValue()) == 0)
CanFold = true;
+ } else if (ShiftOpcode == Instruction::Shl ||
+ ShiftOpcode == Instruction::LShr) {
+ CanFold = !ICI.isSigned();
}
if (CanFold) {
@@ -1781,8 +1793,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
- TD->getPointerSizeInBits() ==
- cast<IntegerType>(DestTy)->getBitWidth()) {
+ TD->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
Value *RHSOp = 0;
if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
@@ -2035,14 +2046,59 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
}
+/// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst
+/// should be swapped.
+/// The descision is based on how many times these two operands are reused
+/// as subtract operands and their positions in those instructions.
+/// The rational is that several architectures use the same instruction for
+/// both subtract and cmp, thus it is better if the order of those operands
+/// match.
+/// \return true if Op0 and Op1 should be swapped.
+static bool swapMayExposeCSEOpportunities(const Value * Op0,
+ const Value * Op1) {
+ // Filter out pointer value as those cannot appears directly in subtract.
+ // FIXME: we may want to go through inttoptrs or bitcasts.
+ if (Op0->getType()->isPointerTy())
+ return false;
+ // Count every uses of both Op0 and Op1 in a subtract.
+ // Each time Op0 is the first operand, count -1: swapping is bad, the
+ // subtract has already the same layout as the compare.
+ // Each time Op0 is the second operand, count +1: swapping is good, the
+ // subtract has a diffrent layout as the compare.
+ // At the end, if the benefit is greater than 0, Op0 should come second to
+ // expose more CSE opportunities.
+ int GlobalSwapBenefits = 0;
+ for (Value::const_use_iterator UI = Op0->use_begin(), UIEnd = Op0->use_end(); UI != UIEnd; ++UI) {
+ const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(*UI);
+ if (!BinOp || BinOp->getOpcode() != Instruction::Sub)
+ continue;
+ // If Op0 is the first argument, this is not beneficial to swap the
+ // arguments.
+ int LocalSwapBenefits = -1;
+ unsigned Op1Idx = 1;
+ if (BinOp->getOperand(Op1Idx) == Op0) {
+ Op1Idx = 0;
+ LocalSwapBenefits = 1;
+ }
+ if (BinOp->getOperand(Op1Idx) != Op1)
+ continue;
+ GlobalSwapBenefits += LocalSwapBenefits;
+ }
+ return GlobalSwapBenefits > 0;
+}
+
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ unsigned Op0Cplxity = getComplexity(Op0);
+ unsigned Op1Cplxity = getComplexity(Op1);
/// Orders the operands of the compare so that they are listed from most
/// complex to least complex. This puts constants before unary operators,
/// before binary operators.
- if (getComplexity(Op0) < getComplexity(Op1)) {
+ if (Op0Cplxity < Op1Cplxity ||
+ (Op0Cplxity == Op1Cplxity &&
+ swapMayExposeCSEOpportunities(Op0, Op1))) {
I.swapOperands();
std::swap(Op0, Op1);
Changed = true;
@@ -2477,7 +2533,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
if (RHSC->isNullValue() && TD &&
- TD->getIntPtrType(RHSC->getContext()) ==
+ TD->getIntPtrType(RHSC->getType()) ==
LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
@@ -2900,6 +2956,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Builder->CreateTrunc(B, A->getType()));
}
+ // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
+ // For lshr and ashr pairs.
+ if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
+ (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
+ unsigned TypeBits = Cst1->getBitWidth();
+ unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+ if (ShAmt < TypeBits && ShAmt != 0) {
+ ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE
+ ? ICmpInst::ICMP_UGE
+ : ICmpInst::ICMP_ULT;
+ Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+ APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
+ return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal));
+ }
+ }
+
// Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
// "icmp (and X, mask), cst"
uint64_t ShAmt = 0;
@@ -2930,20 +3004,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Value *X; ConstantInt *Cst;
// icmp X+Cst, X
if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
- return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
+ return FoldICmpAddOpCst(I, X, Cst, I.getPredicate());
// icmp X, X+Cst
if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
- return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
+ return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate());
}
return Changed ? &I : 0;
}
-
-
-
-
-
/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
///
Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e2d7966..4c861b3 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -154,7 +154,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
// any casting is exposed early.
if (TD) {
- Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+ Type *IntPtrTy = TD->getIntPtrType(AI.getType());
if (AI.getArraySize()->getType() != IntPtrTy) {
Value *V = Builder->CreateIntCast(AI.getArraySize(),
IntPtrTy, false);
@@ -180,12 +180,13 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Now that I is pointing to the first non-allocation-inst in the block,
// insert our getelementptr instruction...
//
- Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
- Value *Idx[2];
- Idx[0] = NullIdx;
- Idx[1] = NullIdx;
+ Type *IdxTy = TD
+ ? TD->getIntPtrType(AI.getType())
+ : Type::getInt64Ty(AI.getContext());
+ Value *NullIdx = Constant::getNullValue(IdxTy);
+ Value *Idx[2] = { NullIdx, NullIdx };
Instruction *GEP =
- GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub");
+ GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
InsertNewInstBefore(GEP, *It);
// Now make everything use the getelementptr instead of the original
@@ -262,9 +263,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
EraseInstFromFunction(*ToDelete[i]);
Constant *TheSrc = cast<Constant>(Copy->getSource());
- Instruction *NewI
- = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
- AI.getType()));
+ Constant *Cast
+ = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
+ Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
EraseInstFromFunction(*Copy);
++NumGlobalCopies;
return NewI;
@@ -302,9 +303,11 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
if (Constant *CSrc = dyn_cast<Constant>(CastOp))
if (ASrcTy->getNumElements() != 0) {
- Value *Idxs[2];
- Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
- Idxs[1] = Idxs[0];
+ Type *IdxTy = TD
+ ? TD->getIntPtrType(SrcTy)
+ : Type::getInt64Ty(SrcTy->getContext());
+ Value *Idx = Constant::getNullValue(IdxTy);
+ Value *Idxs[2] = { Idx, Idx };
CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
SrcTy = cast<PointerType>(CastOp->getType());
SrcPTy = SrcTy->getElementType();
@@ -315,7 +318,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
- (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
+ (SrcPTy->isPtrOrPtrVectorTy() ==
+ LI.getType()->isPtrOrPtrVectorTy()) &&
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index cc6a301..a759548 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -374,9 +374,12 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
} else {
if (C0) {
// (C0 / X) * C => (C0 * C) / X
- ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
- if (isNormalFp(F))
- R = BinaryOperator::CreateFDiv(F, Opnd1);
+ if (FMulOrDiv->hasOneUse()) {
+ // It would otherwise introduce another div.
+ ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
+ if (isNormalFp(F))
+ R = BinaryOperator::CreateFDiv(F, Opnd1);
+ }
} else {
// (X / C1) * C => X * (C/C1) if C/C1 is not a denormal
ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1));
@@ -460,10 +463,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Swap && FAddSub->getOpcode() == Instruction::FSub)
std::swap(M0, M1);
- Value *R = (FAddSub->getOpcode() == Instruction::FAdd) ?
- BinaryOperator::CreateFAdd(M0, M1) :
- BinaryOperator::CreateFSub(M0, M1);
- Instruction *RI = cast<Instruction>(R);
+ Instruction *RI = (FAddSub->getOpcode() == Instruction::FAdd)
+ ? BinaryOperator::CreateFAdd(M0, M1)
+ : BinaryOperator::CreateFSub(M0, M1);
RI->copyFastMathFlags(&I);
return RI;
}
@@ -490,13 +492,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
// if pattern detected emit alternate sequence
if (OpX && OpY) {
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->SetFastMathFlags(Log2->getFastMathFlags());
Log2->setArgOperand(0, OpY);
Value *FMulVal = Builder->CreateFMul(OpX, Log2);
- Instruction *FMul = cast<Instruction>(FMulVal);
- FMul->copyFastMathFlags(Log2);
- Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX);
- FSub->copyFastMathFlags(Log2);
- return FSub;
+ Value *FSub = Builder->CreateFSub(FMulVal, OpX);
+ FSub->takeName(&I);
+ return ReplaceInstUsesWith(I, FSub);
}
}
@@ -506,6 +508,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
for (int i = 0; i < 2; i++) {
bool IgnoreZeroSign = I.hasNoSignedZeros();
if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->SetFastMathFlags(I.getFastMathFlags());
+
Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
@@ -516,13 +521,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Opnd0->hasOneUse()) {
// -X * Y => -(X*Y) (Promote negation as high as possible)
Value *T = Builder->CreateFMul(N0, Opnd1);
- cast<Instruction>(T)->setDebugLoc(I.getDebugLoc());
- Instruction *Neg = BinaryOperator::CreateFNeg(T);
- if (I.getFastMathFlags().any()) {
- cast<Instruction>(T)->copyFastMathFlags(&I);
- Neg->copyFastMathFlags(&I);
- }
- return Neg;
+ Value *Neg = Builder->CreateFNeg(T);
+ Neg->takeName(&I);
+ return ReplaceInstUsesWith(I, Neg);
}
}
@@ -545,13 +546,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
Y = Opnd0_0;
if (Y) {
- Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1));
- T->copyFastMathFlags(&I);
- T->setDebugLoc(I.getDebugLoc());
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->SetFastMathFlags(I.getFastMathFlags());
+ Value *T = Builder->CreateFMul(Opnd1, Opnd1);
- Instruction *R = BinaryOperator::CreateFMul(T, Y);
- R->copyFastMathFlags(&I);
- return R;
+ Value *R = Builder->CreateFMul(T, Y);
+ R->takeName(&I);
+ return ReplaceInstUsesWith(I, R);
}
}
}
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index bd14e81..4c6d0c4 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -604,8 +604,6 @@ namespace llvm {
LHS.Width == RHS.Width;
}
};
- template <>
- struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
}
@@ -688,10 +686,10 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// extracted out of it. First, sort the users by their offset and size.
array_pod_sort(PHIUsers.begin(), PHIUsers.end());
- DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
- for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
- errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
- );
+ DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n';
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ dbgs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n';
+ );
// PredValues - This is a temporary used when rewriting PHI nodes. It is
// hoisted out here to avoid construction/destruction thrashing.
@@ -772,7 +770,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
}
PredValues.clear();
- DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
+ DEBUG(dbgs() << " Made element PHI for offset " << Offset << ": "
<< *EltPHI << '\n');
ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
}
@@ -792,7 +790,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHINode simplification
//
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
- if (Value *V = SimplifyInstruction(&PN, TD))
+ if (Value *V = SimplifyInstruction(&PN, TD, TLI))
return ReplaceInstUsesWith(PN, V);
// If all PHI operands are the same operation, pull them through the PHI,
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 7581dbe..283bec2 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -367,7 +367,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
Value *FalseVal,
InstCombiner::BuilderTy *Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
- if (!IC || !IC->isEquality())
+ if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
return 0;
Value *CmpLHS = IC->getOperand(0);
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index a7bfe09..c831ddd 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -808,7 +808,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// TODO: Could compute known zero/one bits based on the input.
break;
}
- case Intrinsic::x86_sse42_crc32_64_8:
case Intrinsic::x86_sse42_crc32_64_64:
KnownZero = APInt::getHighBitsSet(64, 32);
return 0;
@@ -845,21 +844,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
- unsigned ShlAmt = cast<ConstantInt>(Shl->getOperand(1))->getZExtValue();
- unsigned ShrAmt = cast<ConstantInt>(Shr->getOperand(1))->getZExtValue();
+ const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
+ const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
+ if (!ShlOp1 || !ShrOp1)
+ return 0; // Noop.
+
+ Value *VarX = Shr->getOperand(0);
+ Type *Ty = VarX->getType();
+ unsigned BitWidth = Ty->getIntegerBitWidth();
+ if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
+ return 0; // Undef.
+
+ unsigned ShlAmt = ShlOp1.getZExtValue();
+ unsigned ShrAmt = ShrOp1.getZExtValue();
KnownOne.clearAllBits();
KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
KnownZero &= DemandedMask;
- if (ShlAmt == 0 || ShrAmt == 0)
- return 0;
-
- Value *VarX = Shr->getOperand(0);
- Type *Ty = VarX->getType();
-
- APInt BitMask1(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
- APInt BitMask2(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
+ APInt BitMask1(APInt::getAllOnesValue(BitWidth));
+ APInt BitMask2(APInt::getAllOnesValue(BitWidth));
bool isLshr = (Shr->getOpcode() == Instruction::LShr);
BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index f3de6e2..1e72410 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -106,8 +106,8 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
}
// If we have a PHI node with a vector type that has only 2 uses: feed
-// itself and be an operand of extractelemnt at a constant location,
-// try to replace the PHI of the vector type with a PHI of a scalar type
+// itself and be an operand of extractelement at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type.
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
if (!PN->hasNUses(2))
@@ -282,6 +282,38 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
Worklist.AddValue(EE);
return CastInst::Create(CI->getOpcode(), EE, EI.getType());
}
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ if (SI->hasOneUse()) {
+ // TODO: For a select on vectors, it might be useful to do this if it
+ // has multiple extractelement uses. For vector select, that seems to
+ // fight the vectorizer.
+
+ // If we are extracting an element from a vector select or a select on
+ // vectors, a select on the scalars extracted from the vector arguments.
+ Value *TrueVal = SI->getTrueValue();
+ Value *FalseVal = SI->getFalseValue();
+
+ Value *Cond = SI->getCondition();
+ if (Cond->getType()->isVectorTy()) {
+ Cond = Builder->CreateExtractElement(Cond,
+ EI.getIndexOperand(),
+ Cond->getName() + ".elt");
+ }
+
+ Value *V1Elem
+ = Builder->CreateExtractElement(TrueVal,
+ EI.getIndexOperand(),
+ TrueVal->getName() + ".elt");
+
+ Value *V2Elem
+ = Builder->CreateExtractElement(FalseVal,
+ EI.getIndexOperand(),
+ FalseVal->getName() + ".elt");
+ return SelectInst::Create(Cond,
+ V1Elem,
+ V2Elem,
+ SI->getName() + ".elt");
+ }
}
}
return 0;
@@ -294,7 +326,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
SmallVectorImpl<Constant*> &Mask) {
assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
- unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+ unsigned NumElts = V->getType()->getVectorNumElements();
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 19959c0..f84db27 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -37,7 +37,7 @@ public:
/// in it.
void Add(Instruction *I) {
if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
- DEBUG(errs() << "IC: ADD: " << *I << '\n');
+ DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
Worklist.push_back(I);
}
}
@@ -54,7 +54,7 @@ public:
assert(Worklist.empty() && "Worklist must be empty to add initial group");
Worklist.reserve(NumEntries+16);
WorklistMap.resize(NumEntries);
- DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+ DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
for (unsigned Idx = 0; NumEntries; --NumEntries) {
Instruction *I = List[NumEntries-1];
WorklistMap.insert(std::make_pair(I, Idx++));
@@ -74,8 +74,7 @@ public:
}
Instruction *RemoveOne() {
- Instruction *I = Worklist.back();
- Worklist.pop_back();
+ Instruction *I = Worklist.pop_back_val();
WorklistMap.erase(I);
return I;
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index b34ae21..191a101 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -699,7 +699,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
Value *InV = 0;
- if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ // Beware of ConstantExpr: it may eventually evaluate to getNullValue,
+ // even if currently isNullValue gives false.
+ Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
+ if (InC && !isa<ConstantExpr>(InC))
InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
else
InV = Builder->CreateSelect(PN->getIncomingValue(i),
@@ -755,19 +758,25 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
return ReplaceInstUsesWith(I, NewPN);
}
-/// FindElementAtOffset - Given a type and a constant offset, determine whether
-/// or not there is a sequence of GEP indices into the type that will land us at
-/// the specified offset. If so, fill them into NewIndices and return the
-/// resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices) {
- if (!TD) return 0;
- if (!Ty->isSized()) return 0;
+/// FindElementAtOffset - Given a pointer type and a constant offset, determine
+/// whether or not there is a sequence of GEP indices into the pointed type that
+/// will land us at the specified offset. If so, fill them into NewIndices and
+/// return the resultant element type, otherwise return null.
+Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices) {
+ assert(PtrTy->isPtrOrPtrVectorTy());
+
+ if (!TD)
+ return 0;
+
+ Type *Ty = PtrTy->getPointerElementType();
+ if (!Ty->isSized())
+ return 0;
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
- Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(PtrTy);
int64_t FirstIdx = 0;
if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
@@ -1176,6 +1185,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
}
+ // Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub (ptrtoint X), (ptrtoint Y))
+ // The GEP pattern is emitted by the SCEV expander for certain kinds of
+ // pointer arithmetic.
+ if (TD && GEP.getNumIndices() == 1 &&
+ match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value())))) {
+ unsigned AS = GEP.getPointerAddressSpace();
+ if (GEP.getType() == Builder->getInt8PtrTy(AS) &&
+ GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
+ TD->getPointerSizeInBits(AS)) {
+ Operator *Index = cast<Operator>(GEP.getOperand(1));
+ Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
+ Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
+ return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
+ }
+ }
+
// Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
Value *StrippedPtr = PtrOp->stripPointerCasts();
PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
@@ -1231,13 +1256,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
Type *SrcElTy = StrippedPtrTy->getElementType();
- Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+ Type *ResElTy = PtrOp->getType()->getPointerElementType();
if (TD && SrcElTy->isArrayTy() &&
- TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+ TD->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
TD->getTypeAllocSize(ResElTy)) {
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
- Idx[1] = GEP.getOperand(1);
+ Type *IdxType = TD->getIntPtrType(GEP.getType());
+ Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
Value *NewGEP = GEP.isInBounds() ?
Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
@@ -1261,7 +1285,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1287,8 +1311,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Check that changing to the array element type amounts to dividing the
// index by a scale factor.
uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
- uint64_t ArrayEltSize =
- TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+ uint64_t ArrayEltSize
+ = TD->getTypeAllocSize(SrcElTy->getArrayElementType());
if (ResSize && ArrayEltSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1296,7 +1320,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1304,9 +1328,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
- Value *Off[2];
- Off[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
- Off[1] = NewIdx;
+ Value *Off[2] = {
+ Constant::getNullValue(TD->getIntPtrType(GEP.getType())),
+ NewIdx
+ };
+
Value *NewGEP = GEP.isInBounds() && NSW ?
Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
@@ -1318,15 +1344,20 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
+ if (!TD)
+ return 0;
+
/// See if we can simplify:
/// X = bitcast A* to B*
/// Y = gep X, <...constant indices...>
/// into a gep of the original struct. This is important for SROA and alias
/// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
- APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
- if (TD &&
- !isa<BitCastInst>(BCI->getOperand(0)) &&
+ Value *Operand = BCI->getOperand(0);
+ PointerType *OpType = cast<PointerType>(Operand->getType());
+ unsigned OffsetBits = TD->getPointerTypeSizeInBits(OpType);
+ APInt Offset(OffsetBits, 0);
+ if (!isa<BitCastInst>(Operand) &&
GEP.accumulateConstantOffset(*TD, Offset) &&
StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
@@ -1335,8 +1366,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Offset) {
// If the bitcast is of an allocation, and the allocation will be
// converted to match the type of the cast, don't touch this.
- if (isa<AllocaInst>(BCI->getOperand(0)) ||
- isAllocationFn(BCI->getOperand(0), TLI)) {
+ if (isa<AllocaInst>(Operand) || isAllocationFn(Operand, TLI)) {
// See if the bitcast simplifies, if so, don't nuke this GEP yet.
if (Instruction *I = visitBitCast(*BCI)) {
if (I != BCI) {
@@ -1347,19 +1377,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return &GEP;
}
}
- return new BitCastInst(BCI->getOperand(0), GEP.getType());
+ return new BitCastInst(Operand, GEP.getType());
}
// Otherwise, if the offset is non-zero, we need to find out if there is a
// field at Offset in 'A's type. If so, we can pull the cast through the
// GEP.
SmallVector<Value*, 8> NewIndices;
- Type *InTy =
- cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
- if (FindElementAtOffset(InTy, Offset.getSExtValue(), NewIndices)) {
+ if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
Value *NGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
- Builder->CreateGEP(BCI->getOperand(0), NewIndices);
+ Builder->CreateInBoundsGEP(Operand, NewIndices) :
+ Builder->CreateGEP(Operand, NewIndices);
if (NGEP->getType() == GEP.getType())
return ReplaceInstUsesWith(GEP, NGEP);
@@ -1372,8 +1400,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return 0;
}
-
-
static bool
isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
const TargetLibraryInfo *TLI) {
@@ -2209,7 +2235,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// DCE instruction if trivially dead.
if (isInstructionTriviallyDead(Inst, TLI)) {
++NumDeadInst;
- DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
+ DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
Inst->eraseFromParent();
continue;
}
@@ -2217,7 +2243,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
- DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+ DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
++NumConstProp;
@@ -2293,7 +2319,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
MadeIRChange = false;
- DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+ DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
<< F.getName() << "\n");
{
@@ -2338,7 +2364,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Check to see if we can DCE the instruction.
if (isInstructionTriviallyDead(I, TLI)) {
- DEBUG(errs() << "IC: DCE: " << *I << '\n');
+ DEBUG(dbgs() << "IC: DCE: " << *I << '\n');
EraseInstFromFunction(*I);
++NumDeadInst;
MadeIRChange = true;
@@ -2348,7 +2374,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Instruction isn't dead, see if we can constant propagate it.
if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
- DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+ DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
// Add operands to the worklist.
ReplaceInstUsesWith(*I, C);
@@ -2396,13 +2422,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
std::string OrigI;
#endif
DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
- DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+ DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
if (Instruction *Result = visit(*I)) {
++NumCombined;
// Should we replace the old instruction with a new one?
if (Result != I) {
- DEBUG(errs() << "IC: Old = " << *I << '\n'
+ DEBUG(dbgs() << "IC: Old = " << *I << '\n'
<< " New = " << *Result << '\n');
if (!I->getDebugLoc().isUnknown())
@@ -2431,7 +2457,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
EraseInstFromFunction(*I);
} else {
#ifndef NDEBUG
- DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+ DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
<< " New = " << *I << '\n');
#endif
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index d77e20b..d731ec5 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/DIBuilder.h"
@@ -59,6 +60,7 @@ static const uint64_t kDefaultShort64bitShadowOffset = 0x7FFF8000; // < 2G.
static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa8000;
+static const size_t kMinStackMallocSize = 1 << 6; // 64B
static const size_t kMaxStackMallocSize = 1 << 16; // 64K
static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
@@ -75,21 +77,30 @@ static const char *const kAsanUnregisterGlobalsName =
static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
static const char *const kAsanInitName = "__asan_init_v3";
+static const char *const kAsanCovName = "__sanitizer_cov";
static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *const kAsanMappingOffsetName = "__asan_mapping_offset";
static const char *const kAsanMappingScaleName = "__asan_mapping_scale";
-static const char *const kAsanStackMallocName = "__asan_stack_malloc";
-static const char *const kAsanStackFreeName = "__asan_stack_free";
+static const int kMaxAsanStackMallocSizeClass = 10;
+static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_";
+static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_";
static const char *const kAsanGenPrefix = "__asan_gen_";
static const char *const kAsanPoisonStackMemoryName =
"__asan_poison_stack_memory";
static const char *const kAsanUnpoisonStackMemoryName =
"__asan_unpoison_stack_memory";
+static const char *const kAsanOptionDetectUAR =
+ "__asan_option_detect_stack_use_after_return";
+
+// These constants must match the definitions in the run-time library.
static const int kAsanStackLeftRedzoneMagic = 0xf1;
static const int kAsanStackMidRedzoneMagic = 0xf2;
static const int kAsanStackRightRedzoneMagic = 0xf3;
static const int kAsanStackPartialRedzoneMagic = 0xf4;
+#ifndef NDEBUG
+static const int kAsanStackAfterReturnMagic = 0xf5;
+#endif
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
static const size_t kNumberOfAccessSizes = 5;
@@ -124,6 +135,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClCoverage("asan-coverage",
+ cl::desc("ASan coverage"), cl::Hidden, cl::init(false));
static cl::opt<bool> ClInitializers("asan-initialization-order",
cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
static cl::opt<bool> ClMemIntrin("asan-memintrin",
@@ -184,6 +197,13 @@ static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
cl::Hidden, cl::init(-1));
+STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
+STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
+STATISTIC(NumOptimizedAccessesToGlobalArray,
+ "Number of optimized accesses to global arrays");
+STATISTIC(NumOptimizedAccessesToGlobalVar,
+ "Number of optimized accesses to global vars");
+
namespace {
/// A set of dynamically initialized globals extracted from metadata.
class SetOfDynamicallyInitializedGlobals {
@@ -306,6 +326,8 @@ struct AddressSanitizer : public FunctionPass {
bool ShouldInstrumentGlobal(GlobalVariable *G);
bool LooksLikeCodeInBug11395(Instruction *I);
void FindDynamicInitializers(Module &M);
+ bool GlobalIsLinkerInitialized(GlobalVariable *G);
+ bool InjectCoverage(Function &F);
bool CheckInitOrder;
bool CheckUseAfterReturn;
@@ -321,6 +343,7 @@ struct AddressSanitizer : public FunctionPass {
Function *AsanCtorFunction;
Function *AsanInitFunction;
Function *AsanHandleNoReturnFunc;
+ Function *AsanCovFunction;
OwningPtr<SpecialCaseList> BL;
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
@@ -396,12 +419,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
uint64_t TotalStackSize;
unsigned StackAlignment;
- Function *AsanStackMallocFunc, *AsanStackFreeFunc;
+ Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
+ *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
// Stores a place and arguments of poisoning/unpoisoning call for alloca.
struct AllocaPoisonCall {
IntrinsicInst *InsBefore;
+ AllocaInst *AI;
uint64_t Size;
bool DoPoison;
};
@@ -480,7 +505,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
AllocaInst *AI = findAllocaForValue(II.getArgOperand(1));
if (!AI) return;
bool DoPoison = (ID == Intrinsic::lifetime_end);
- AllocaPoisonCall APC = {&II, SizeValue, DoPoison};
+ AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison};
AllocaPoisonCallVec.push_back(APC);
}
@@ -488,7 +513,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
void initializeCallbacks(Module &M);
// Check if we want (and can) handle this alloca.
- bool isInterestingAlloca(AllocaInst &AI) {
+ bool isInterestingAlloca(AllocaInst &AI) const {
return (!AI.isArrayAllocation() &&
AI.isStaticAlloca() &&
AI.getAlignment() <= RedzoneSize() &&
@@ -498,24 +523,27 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
size_t RedzoneSize() const {
return RedzoneSizeForScale(Mapping.Scale);
}
- uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
+ uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
Type *Ty = AI->getAllocatedType();
uint64_t SizeInBytes = ASan.TD->getTypeAllocSize(Ty);
return SizeInBytes;
}
- uint64_t getAlignedSize(uint64_t SizeInBytes) {
+ uint64_t getAlignedSize(uint64_t SizeInBytes) const {
size_t RZ = RedzoneSize();
return ((SizeInBytes + RZ - 1) / RZ) * RZ;
}
- uint64_t getAlignedAllocaSize(AllocaInst *AI) {
+ uint64_t getAlignedAllocaSize(AllocaInst *AI) const {
uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
return getAlignedSize(SizeInBytes);
}
/// Finds alloca where the value comes from.
AllocaInst *findAllocaForValue(Value *V);
- void poisonRedZones(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
+ void poisonRedZones(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> &IRB,
Value *ShadowBase, bool DoPoison);
- void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> IRB, bool DoPoison);
+ void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison);
+
+ void SetShadowToStackAfterReturnInlined(IRBuilder<> &IRB, Value *ShadowBase,
+ int Size);
};
} // namespace
@@ -642,6 +670,13 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
return NULL;
}
+bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
+ // If a global variable does not have dynamic initialization we don't
+ // have to instrument it. However, if a global does not have initializer
+ // at all, we assume it has dynamic initializer (in other TU).
+ return G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G);
+}
+
void AddressSanitizer::instrumentMop(Instruction *I) {
bool IsWrite = false;
Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
@@ -650,13 +685,19 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
// If initialization order checking is disabled, a simple access to a
// dynamically initialized global is always valid.
- if (!CheckInitOrder)
- return;
- // If a global variable does not have dynamic initialization we don't
- // have to instrument it. However, if a global does not have initailizer
- // at all, we assume it has dynamic initializer (in other TU).
- if (G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G))
+ if (!CheckInitOrder || GlobalIsLinkerInitialized(G)) {
+ NumOptimizedAccessesToGlobalVar++;
return;
+ }
+ }
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr);
+ if (CE && CE->isGEPWithNoNotionalOverIndexing()) {
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
+ if (CE->getOperand(1)->isNullValue() && GlobalIsLinkerInitialized(G)) {
+ NumOptimizedAccessesToGlobalArray++;
+ return;
+ }
+ }
}
}
@@ -668,6 +709,11 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
assert((TypeSize % 8) == 0);
+ if (IsWrite)
+ NumInstrumentedWrites++;
+ else
+ NumInstrumentedReads++;
+
// Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check.
if (TypeSize == 8 || TypeSize == 16 ||
TypeSize == 32 || TypeSize == 64 || TypeSize == 128)
@@ -883,7 +929,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
TD = getAnalysisIfAvailable<DataLayout>();
if (!TD)
return false;
- BL.reset(new SpecialCaseList(BlacklistFile));
+ BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
if (BL->isIn(M)) return false;
C = &(M.getContext());
int LongSize = TD->getPointerSizeInBits();
@@ -914,8 +960,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
IntptrTy, IntptrTy,
IntptrTy, IntptrTy, NULL);
- SmallVector<Constant *, 16> Initializers(n), DynamicInit;
-
+ SmallVector<Constant *, 16> Initializers(n);
Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
assert(CtorFunc);
@@ -1046,6 +1091,8 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+ AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanCovName, IRB.getVoidTy(), IntptrTy, NULL));
// We insert an empty inline asm after __asan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
@@ -1076,7 +1123,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
if (!TD)
return false;
- BL.reset(new SpecialCaseList(BlacklistFile));
+ BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
DynamicallyInitializedGlobals.Init(M);
C = &(M.getContext());
@@ -1117,6 +1164,47 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
return false;
}
+// Poor man's coverage that works with ASan.
+// We create a Guard boolean variable with the same linkage
+// as the function and inject this code into the entry block:
+// if (*Guard) {
+// __sanitizer_cov(&F);
+// *Guard = 1;
+// }
+// The accesses to Guard are atomic. The rest of the logic is
+// in __sanitizer_cov (it's fine to call it more than once).
+//
+// This coverage implementation provides very limited data:
+// it only tells if a given function was ever executed.
+// No counters, no per-basic-block or per-edge data.
+// But for many use cases this is what we need and the added slowdown
+// is negligible. This simple implementation will probably be obsoleted
+// by the upcoming Clang-based coverage implementation.
+// By having it here and now we hope to
+// a) get the functionality to users earlier and
+// b) collect usage statistics to help improve Clang coverage design.
+bool AddressSanitizer::InjectCoverage(Function &F) {
+ if (!ClCoverage) return false;
+ IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt());
+ Type *Int8Ty = IRB.getInt8Ty();
+ GlobalVariable *Guard = new GlobalVariable(
+ *F.getParent(), Int8Ty, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(Int8Ty), "__asan_gen_cov_" + F.getName());
+ LoadInst *Load = IRB.CreateLoad(Guard);
+ Load->setAtomic(Monotonic);
+ Load->setAlignment(1);
+ Value *Cmp = IRB.CreateICmpEQ(Constant::getNullValue(Int8Ty), Load);
+ Instruction *Ins = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ IRB.SetInsertPoint(Ins);
+ // We pass &F to __sanitizer_cov. We could avoid this and rely on
+ // GET_CALLER_PC, but having the PC of the first instruction is just nice.
+ IRB.CreateCall(AsanCovFunction, IRB.CreatePointerCast(&F, IntptrTy));
+ StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard);
+ Store->setAtomic(Monotonic);
+ Store->setAlignment(1);
+ return true;
+}
+
bool AddressSanitizer::runOnFunction(Function &F) {
if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
@@ -1212,6 +1300,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {
}
bool res = NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
+
+ if (InjectCoverage(F))
+ res = true;
+
DEBUG(dbgs() << "ASAN done instrumenting: " << res << " " << F << "\n");
if (ClKeepUninstrumented) {
@@ -1271,11 +1363,15 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
void FunctionStackPoisoner::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
- AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL));
- AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanStackFreeName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, IntptrTy, NULL));
+ for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
+ std::string Suffix = itostr(i);
+ AsanStackMallocFunc[i] = checkInterfaceFunction(
+ M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
+ IntptrTy, IntptrTy, NULL));
+ AsanStackFreeFunc[i] = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanStackFreeNameTemplate + Suffix, IRB.getVoidTy(), IntptrTy,
+ IntptrTy, IntptrTy, NULL));
+ }
AsanPoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
@@ -1283,7 +1379,7 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
}
void FunctionStackPoisoner::poisonRedZones(
- const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase,
+ const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> &IRB, Value *ShadowBase,
bool DoPoison) {
size_t ShadowRZSize = RedzoneSize() >> Mapping.Scale;
assert(ShadowRZSize >= 1 && ShadowRZSize <= 4);
@@ -1344,12 +1440,40 @@ void FunctionStackPoisoner::poisonRedZones(
}
}
+// Fake stack allocator (asan_fake_stack.h) has 11 size classes
+// for every power of 2 from kMinStackMallocSize to kMaxAsanStackMallocSizeClass
+static int StackMallocSizeClass(uint64_t LocalStackSize) {
+ assert(LocalStackSize <= kMaxStackMallocSize);
+ uint64_t MaxSize = kMinStackMallocSize;
+ for (int i = 0; ; i++, MaxSize *= 2)
+ if (LocalStackSize <= MaxSize)
+ return i;
+ llvm_unreachable("impossible LocalStackSize");
+}
+
+// Set Size bytes starting from ShadowBase to kAsanStackAfterReturnMagic.
+// We can not use MemSet intrinsic because it may end up calling the actual
+// memset. Size is a multiple of 8.
+// Currently this generates 8-byte stores on x86_64; it may be better to
+// generate wider stores.
+void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
+ IRBuilder<> &IRB, Value *ShadowBase, int Size) {
+ assert(!(Size % 8));
+ assert(kAsanStackAfterReturnMagic == 0xf5);
+ for (int i = 0; i < Size; i += 8) {
+ Value *p = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
+ IRB.CreateStore(ConstantInt::get(IRB.getInt64Ty(), 0xf5f5f5f5f5f5f5f5ULL),
+ IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo()));
+ }
+}
+
void FunctionStackPoisoner::poisonStack() {
uint64_t LocalStackSize = TotalStackSize +
(AllocaVec.size() + 1) * RedzoneSize();
bool DoStackMalloc = ASan.CheckUseAfterReturn
&& LocalStackSize <= kMaxStackMallocSize;
+ int StackMallocIdx = -1;
assert(AllocaVec.size() > 0);
Instruction *InsBefore = AllocaVec[0];
@@ -1367,8 +1491,28 @@ void FunctionStackPoisoner::poisonStack() {
Value *LocalStackBase = OrigStackBase;
if (DoStackMalloc) {
- LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc,
+ // LocalStackBase = OrigStackBase
+ // if (__asan_option_detect_stack_use_after_return)
+ // LocalStackBase = __asan_stack_malloc_N(LocalStackBase, OrigStackBase);
+ StackMallocIdx = StackMallocSizeClass(LocalStackSize);
+ assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass);
+ Constant *OptionDetectUAR = F.getParent()->getOrInsertGlobal(
+ kAsanOptionDetectUAR, IRB.getInt32Ty());
+ Value *Cmp = IRB.CreateICmpNE(IRB.CreateLoad(OptionDetectUAR),
+ Constant::getNullValue(IRB.getInt32Ty()));
+ Instruction *Term =
+ SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ BasicBlock *CmpBlock = cast<Instruction>(Cmp)->getParent();
+ IRBuilder<> IRBIf(Term);
+ LocalStackBase = IRBIf.CreateCall2(
+ AsanStackMallocFunc[StackMallocIdx],
ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
+ BasicBlock *SetBlock = cast<Instruction>(LocalStackBase)->getParent();
+ IRB.SetInsertPoint(InsBefore);
+ PHINode *Phi = IRB.CreatePHI(IntptrTy, 2);
+ Phi->addIncoming(OrigStackBase, CmpBlock);
+ Phi->addIncoming(LocalStackBase, SetBlock);
+ LocalStackBase = Phi;
}
// This string will be parsed by the run-time (DescribeAddressIfStack).
@@ -1380,11 +1524,10 @@ void FunctionStackPoisoner::poisonStack() {
bool HavePoisonedAllocas = false;
for (size_t i = 0, n = AllocaPoisonCallVec.size(); i < n; i++) {
const AllocaPoisonCall &APC = AllocaPoisonCallVec[i];
- IntrinsicInst *II = APC.InsBefore;
- AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
- assert(AI);
- IRBuilder<> IRB(II);
- poisonAlloca(AI, APC.Size, IRB, APC.DoPoison);
+ assert(APC.InsBefore);
+ assert(APC.AI);
+ IRBuilder<> IRB(APC.InsBefore);
+ poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
HavePoisonedAllocas |= APC.DoPoison;
}
@@ -1442,10 +1585,35 @@ void FunctionStackPoisoner::poisonStack() {
// Unpoison the stack.
poisonRedZones(AllocaVec, IRBRet, ShadowBase, false);
if (DoStackMalloc) {
+ assert(StackMallocIdx >= 0);
// In use-after-return mode, mark the whole stack frame unaddressable.
- IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase,
- ConstantInt::get(IntptrTy, LocalStackSize),
- OrigStackBase);
+ if (StackMallocIdx <= 4) {
+ // For small sizes inline the whole thing:
+ // if LocalStackBase != OrigStackBase:
+ // memset(ShadowBase, kAsanStackAfterReturnMagic, ShadowSize);
+ // **SavedFlagPtr(LocalStackBase) = 0
+ // FIXME: if LocalStackBase != OrigStackBase don't call poisonRedZones.
+ Value *Cmp = IRBRet.CreateICmpNE(LocalStackBase, OrigStackBase);
+ TerminatorInst *PoisonTerm =
+ SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ IRBuilder<> IRBPoison(PoisonTerm);
+ int ClassSize = kMinStackMallocSize << StackMallocIdx;
+ SetShadowToStackAfterReturnInlined(IRBPoison, ShadowBase,
+ ClassSize >> Mapping.Scale);
+ Value *SavedFlagPtrPtr = IRBPoison.CreateAdd(
+ LocalStackBase,
+ ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8));
+ Value *SavedFlagPtr = IRBPoison.CreateLoad(
+ IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
+ IRBPoison.CreateStore(
+ Constant::getNullValue(IRBPoison.getInt8Ty()),
+ IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
+ } else {
+ // For larger frames call __asan_stack_free_*.
+ IRBRet.CreateCall3(AsanStackFreeFunc[StackMallocIdx], LocalStackBase,
+ ConstantInt::get(IntptrTy, LocalStackSize),
+ OrigStackBase);
+ }
} else if (HavePoisonedAllocas) {
// If we poisoned some allocas in llvm.lifetime analysis,
// unpoison whole stack frame now.
@@ -1460,7 +1628,7 @@ void FunctionStackPoisoner::poisonStack() {
}
void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
- IRBuilder<> IRB, bool DoPoison) {
+ IRBuilder<> &IRB, bool DoPoison) {
// For now just insert the call to ASan runtime.
Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
Value *SizeArg = ConstantInt::get(IntptrTy, Size);
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index b094d42..7a9f0f6 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -80,7 +80,7 @@ BasicBlock *BoundsChecking::getTrapBB() {
return TrapBB;
Function *Fn = Inst->getParent()->getParent();
- BasicBlock::iterator PrevInsertPoint = Builder->GetInsertPoint();
+ IRBuilder<>::InsertPointGuard Guard(*Builder);
TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
Builder->SetInsertPoint(TrapBB);
@@ -91,7 +91,6 @@ BasicBlock *BoundsChecking::getTrapBB() {
TrapCall->setDebugLoc(Inst->getDebugLoc());
Builder->CreateUnreachable();
- Builder->SetInsertPoint(PrevInsertPoint);
return TrapBB;
}
@@ -173,7 +172,8 @@ bool BoundsChecking::runOnFunction(Function &F) {
TrapBB = 0;
BuilderTy TheBuilder(F.getContext(), TargetFolder(TD));
Builder = &TheBuilder;
- ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext());
+ ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext(),
+ /*RoundToAlign=*/true);
ObjSizeEval = &TheObjSizeEval;
// check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 5e34863..3563593 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,14 +1,11 @@
add_llvm_library(LLVMInstrumentation
AddressSanitizer.cpp
BoundsChecking.cpp
+ DataFlowSanitizer.cpp
DebugIR.cpp
- EdgeProfiling.cpp
GCOVProfiling.cpp
MemorySanitizer.cpp
Instrumentation.cpp
- OptimalEdgeProfiling.cpp
- PathProfiling.cpp
- ProfilingUtils.cpp
ThreadSanitizer.cpp
)
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
new file mode 100644
index 0000000..9b9e725
--- /dev/null
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -0,0 +1,1397 @@
+//===-- DataFlowSanitizer.cpp - dynamic data flow analysis ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
+/// analysis.
+///
+/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
+/// class of bugs on its own. Instead, it provides a generic dynamic data flow
+/// analysis framework to be used by clients to help detect application-specific
+/// issues within their own code.
+///
+/// The analysis is based on automatic propagation of data flow labels (also
+/// known as taint labels) through a program as it performs computation. Each
+/// byte of application memory is backed by two bytes of shadow memory which
+/// hold the label. On Linux/x86_64, memory is laid out as follows:
+///
+/// +--------------------+ 0x800000000000 (top of memory)
+/// | application memory |
+/// +--------------------+ 0x700000008000 (kAppAddr)
+/// | |
+/// | unused |
+/// | |
+/// +--------------------+ 0x200200000000 (kUnusedAddr)
+/// | union table |
+/// +--------------------+ 0x200000000000 (kUnionTableAddr)
+/// | shadow memory |
+/// +--------------------+ 0x000000010000 (kShadowAddr)
+/// | reserved by kernel |
+/// +--------------------+ 0x000000000000
+///
+/// To derive a shadow memory address from an application memory address,
+/// bits 44-46 are cleared to bring the address into the range
+/// [0x000000008000,0x100000000000). Then the address is shifted left by 1 to
+/// account for the double byte representation of shadow labels and move the
+/// address into the shadow memory range. See the function
+/// DataFlowSanitizer::getShadowAddress below.
+///
+/// For more information, please refer to the design document:
+/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SpecialCaseList.h"
+#include <iterator>
+
+using namespace llvm;
+
+// The -dfsan-preserve-alignment flag controls whether this pass assumes that
+// alignment requirements provided by the input IR are correct. For example,
+// if the input IR contains a load with alignment 8, this flag will cause
+// the shadow load to have alignment 16. This flag is disabled by default as
+// we have unfortunately encountered too much code (including Clang itself;
+// see PR14291) which performs misaligned access.
+static cl::opt<bool> ClPreserveAlignment(
+ "dfsan-preserve-alignment",
+ cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
+ cl::init(false));
+
+// The ABI list file controls how shadow parameters are passed. The pass treats
+// every function labelled "uninstrumented" in the ABI list file as conforming
+// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
+// additional annotations for those functions, a call to one of those functions
+// will produce a warning message, as the labelling behaviour of the function is
+// unknown. The other supported annotations are "functional" and "discard",
+// which are described below under DataFlowSanitizer::WrapperKind.
+static cl::opt<std::string> ClABIListFile(
+ "dfsan-abilist",
+ cl::desc("File listing native ABI functions and how the pass treats them"),
+ cl::Hidden);
+
+// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented
+// functions (see DataFlowSanitizer::InstrumentedABI below).
+static cl::opt<bool> ClArgsABI(
+ "dfsan-args-abi",
+ cl::desc("Use the argument ABI rather than the TLS ABI"),
+ cl::Hidden);
+
+static cl::opt<bool> ClDebugNonzeroLabels(
+ "dfsan-debug-nonzero-labels",
+ cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
+ "load or return with a nonzero label"),
+ cl::Hidden);
+
+namespace {
+
+class DataFlowSanitizer : public ModulePass {
+ friend struct DFSanFunction;
+ friend class DFSanVisitor;
+
+ enum {
+ ShadowWidth = 16
+ };
+
+ /// Which ABI should be used for instrumented functions?
+ enum InstrumentedABI {
+ /// Argument and return value labels are passed through additional
+ /// arguments and by modifying the return type.
+ IA_Args,
+
+ /// Argument and return value labels are passed through TLS variables
+ /// __dfsan_arg_tls and __dfsan_retval_tls.
+ IA_TLS
+ };
+
+ /// How should calls to uninstrumented functions be handled?
+ enum WrapperKind {
+ /// This function is present in an uninstrumented form but we don't know
+ /// how it should be handled. Print a warning and call the function anyway.
+ /// Don't label the return value.
+ WK_Warning,
+
+ /// This function does not write to (user-accessible) memory, and its return
+ /// value is unlabelled.
+ WK_Discard,
+
+ /// This function does not write to (user-accessible) memory, and the label
+ /// of its return value is the union of the label of its arguments.
+ WK_Functional,
+
+ /// Instead of calling the function, a custom wrapper __dfsw_F is called,
+ /// where F is the name of the function. This function may wrap the
+ /// original function or provide its own implementation. This is similar to
+ /// the IA_Args ABI, except that IA_Args uses a struct return type to
+ /// pass the return value shadow in a register, while WK_Custom uses an
+ /// extra pointer argument to return the shadow. This allows the wrapped
+ /// form of the function type to be expressed in C.
+ WK_Custom
+ };
+
+ DataLayout *DL;
+ Module *Mod;
+ LLVMContext *Ctx;
+ IntegerType *ShadowTy;
+ PointerType *ShadowPtrTy;
+ IntegerType *IntptrTy;
+ ConstantInt *ZeroShadow;
+ ConstantInt *ShadowPtrMask;
+ ConstantInt *ShadowPtrMul;
+ Constant *ArgTLS;
+ Constant *RetvalTLS;
+ void *(*GetArgTLSPtr)();
+ void *(*GetRetvalTLSPtr)();
+ Constant *GetArgTLS;
+ Constant *GetRetvalTLS;
+ FunctionType *DFSanUnionFnTy;
+ FunctionType *DFSanUnionLoadFnTy;
+ FunctionType *DFSanUnimplementedFnTy;
+ FunctionType *DFSanSetLabelFnTy;
+ FunctionType *DFSanNonzeroLabelFnTy;
+ Constant *DFSanUnionFn;
+ Constant *DFSanUnionLoadFn;
+ Constant *DFSanUnimplementedFn;
+ Constant *DFSanSetLabelFn;
+ Constant *DFSanNonzeroLabelFn;
+ MDNode *ColdCallWeights;
+ OwningPtr<SpecialCaseList> ABIList;
+ DenseMap<Value *, Function *> UnwrappedFnMap;
+ AttributeSet ReadOnlyNoneAttrs;
+
+ Value *getShadowAddress(Value *Addr, Instruction *Pos);
+ Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
+ bool isInstrumented(const Function *F);
+ bool isInstrumented(const GlobalAlias *GA);
+ FunctionType *getArgsFunctionType(FunctionType *T);
+ FunctionType *getTrampolineFunctionType(FunctionType *T);
+ FunctionType *getCustomFunctionType(FunctionType *T);
+ InstrumentedABI getInstrumentedABI();
+ WrapperKind getWrapperKind(Function *F);
+ void addGlobalNamePrefix(GlobalValue *GV);
+ Function *buildWrapperFunction(Function *F, StringRef NewFName,
+ GlobalValue::LinkageTypes NewFLink,
+ FunctionType *NewFT);
+ Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
+
+ public:
+ DataFlowSanitizer(StringRef ABIListFile = StringRef(),
+ void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0);
+ static char ID;
+ bool doInitialization(Module &M);
+ bool runOnModule(Module &M);
+};
+
+struct DFSanFunction {
+ DataFlowSanitizer &DFS;
+ Function *F;
+ DataFlowSanitizer::InstrumentedABI IA;
+ bool IsNativeABI;
+ Value *ArgTLSPtr;
+ Value *RetvalTLSPtr;
+ AllocaInst *LabelReturnAlloca;
+ DenseMap<Value *, Value *> ValShadowMap;
+ DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
+ std::vector<std::pair<PHINode *, PHINode *> > PHIFixups;
+ DenseSet<Instruction *> SkipInsts;
+ DenseSet<Value *> NonZeroChecks;
+
+ DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
+ : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()),
+ IsNativeABI(IsNativeABI), ArgTLSPtr(0), RetvalTLSPtr(0),
+ LabelReturnAlloca(0) {}
+ Value *getArgTLSPtr();
+ Value *getArgTLS(unsigned Index, Instruction *Pos);
+ Value *getRetvalTLS();
+ Value *getShadow(Value *V);
+ void setShadow(Instruction *I, Value *Shadow);
+ Value *combineOperandShadows(Instruction *Inst);
+ Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align,
+ Instruction *Pos);
+ void storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *Shadow,
+ Instruction *Pos);
+};
+
+class DFSanVisitor : public InstVisitor<DFSanVisitor> {
+ public:
+ DFSanFunction &DFSF;
+ DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
+
+ void visitOperandShadowInst(Instruction &I);
+
+ void visitBinaryOperator(BinaryOperator &BO);
+ void visitCastInst(CastInst &CI);
+ void visitCmpInst(CmpInst &CI);
+ void visitGetElementPtrInst(GetElementPtrInst &GEPI);
+ void visitLoadInst(LoadInst &LI);
+ void visitStoreInst(StoreInst &SI);
+ void visitReturnInst(ReturnInst &RI);
+ void visitCallSite(CallSite CS);
+ void visitPHINode(PHINode &PN);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &I);
+ void visitExtractValueInst(ExtractValueInst &I);
+ void visitInsertValueInst(InsertValueInst &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitMemSetInst(MemSetInst &I);
+ void visitMemTransferInst(MemTransferInst &I);
+};
+
+}
+
+char DataFlowSanitizer::ID;
+INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
+ "DataFlowSanitizer: dynamic data flow analysis.", false, false)
+
+ModulePass *llvm::createDataFlowSanitizerPass(StringRef ABIListFile,
+ void *(*getArgTLS)(),
+ void *(*getRetValTLS)()) {
+ return new DataFlowSanitizer(ABIListFile, getArgTLS, getRetValTLS);
+}
+
+DataFlowSanitizer::DataFlowSanitizer(StringRef ABIListFile,
+ void *(*getArgTLS)(),
+ void *(*getRetValTLS)())
+ : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS),
+ ABIList(SpecialCaseList::createOrDie(ABIListFile.empty() ? ClABIListFile
+ : ABIListFile)) {
+}
+
+FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
+ llvm::SmallVector<Type *, 4> ArgTypes;
+ std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
+ for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+ ArgTypes.push_back(ShadowTy);
+ if (T->isVarArg())
+ ArgTypes.push_back(ShadowPtrTy);
+ Type *RetType = T->getReturnType();
+ if (!RetType->isVoidTy())
+ RetType = StructType::get(RetType, ShadowTy, (Type *)0);
+ return FunctionType::get(RetType, ArgTypes, T->isVarArg());
+}
+
+FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
+ assert(!T->isVarArg());
+ llvm::SmallVector<Type *, 4> ArgTypes;
+ ArgTypes.push_back(T->getPointerTo());
+ std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
+ for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+ ArgTypes.push_back(ShadowTy);
+ Type *RetType = T->getReturnType();
+ if (!RetType->isVoidTy())
+ ArgTypes.push_back(ShadowPtrTy);
+ return FunctionType::get(T->getReturnType(), ArgTypes, false);
+}
+
+FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
+ assert(!T->isVarArg());
+ llvm::SmallVector<Type *, 4> ArgTypes;
+ for (FunctionType::param_iterator i = T->param_begin(), e = T->param_end();
+ i != e; ++i) {
+ FunctionType *FT;
+ if (isa<PointerType>(*i) && (FT = dyn_cast<FunctionType>(cast<PointerType>(
+ *i)->getElementType()))) {
+ ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
+ ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
+ } else {
+ ArgTypes.push_back(*i);
+ }
+ }
+ for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+ ArgTypes.push_back(ShadowTy);
+ Type *RetType = T->getReturnType();
+ if (!RetType->isVoidTy())
+ ArgTypes.push_back(ShadowPtrTy);
+ return FunctionType::get(T->getReturnType(), ArgTypes, false);
+}
+
+bool DataFlowSanitizer::doInitialization(Module &M) {
+ DL = getAnalysisIfAvailable<DataLayout>();
+ if (!DL)
+ return false;
+
+ Mod = &M;
+ Ctx = &M.getContext();
+ ShadowTy = IntegerType::get(*Ctx, ShadowWidth);
+ ShadowPtrTy = PointerType::getUnqual(ShadowTy);
+ IntptrTy = DL->getIntPtrType(*Ctx);
+ ZeroShadow = ConstantInt::getSigned(ShadowTy, 0);
+ ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
+ ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8);
+
+ Type *DFSanUnionArgs[2] = { ShadowTy, ShadowTy };
+ DFSanUnionFnTy =
+ FunctionType::get(ShadowTy, DFSanUnionArgs, /*isVarArg=*/ false);
+ Type *DFSanUnionLoadArgs[2] = { ShadowPtrTy, IntptrTy };
+ DFSanUnionLoadFnTy =
+ FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false);
+ DFSanUnimplementedFnTy = FunctionType::get(
+ Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
+ Type *DFSanSetLabelArgs[3] = { ShadowTy, Type::getInt8PtrTy(*Ctx), IntptrTy };
+ DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ DFSanSetLabelArgs, /*isVarArg=*/false);
+ DFSanNonzeroLabelFnTy = FunctionType::get(
+ Type::getVoidTy(*Ctx), ArrayRef<Type *>(), /*isVarArg=*/false);
+
+ if (GetArgTLSPtr) {
+ Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
+ ArgTLS = 0;
+ GetArgTLS = ConstantExpr::getIntToPtr(
+ ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
+ PointerType::getUnqual(
+ FunctionType::get(PointerType::getUnqual(ArgTLSTy), (Type *)0)));
+ }
+ if (GetRetvalTLSPtr) {
+ RetvalTLS = 0;
+ GetRetvalTLS = ConstantExpr::getIntToPtr(
+ ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
+ PointerType::getUnqual(
+ FunctionType::get(PointerType::getUnqual(ShadowTy), (Type *)0)));
+ }
+
+ ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
+ return true;
+}
+
+bool DataFlowSanitizer::isInstrumented(const Function *F) {
+ return !ABIList->isIn(*F, "uninstrumented");
+}
+
+bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
+ return !ABIList->isIn(*GA, "uninstrumented");
+}
+
+DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
+ return ClArgsABI ? IA_Args : IA_TLS;
+}
+
+DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
+ if (ABIList->isIn(*F, "functional"))
+ return WK_Functional;
+ if (ABIList->isIn(*F, "discard"))
+ return WK_Discard;
+ if (ABIList->isIn(*F, "custom"))
+ return WK_Custom;
+
+ return WK_Warning;
+}
+
+void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) {
+ std::string GVName = GV->getName(), Prefix = "dfs$";
+ GV->setName(Prefix + GVName);
+
+ // Try to change the name of the function in module inline asm. We only do
+ // this for specific asm directives, currently only ".symver", to try to avoid
+ // corrupting asm which happens to contain the symbol name as a substring.
+ // Note that the substitution for .symver assumes that the versioned symbol
+ // also has an instrumented name.
+ std::string Asm = GV->getParent()->getModuleInlineAsm();
+ std::string SearchStr = ".symver " + GVName + ",";
+ size_t Pos = Asm.find(SearchStr);
+ if (Pos != std::string::npos) {
+ Asm.replace(Pos, SearchStr.size(),
+ ".symver " + Prefix + GVName + "," + Prefix);
+ GV->getParent()->setModuleInlineAsm(Asm);
+ }
+}
+
+Function *
+DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
+ GlobalValue::LinkageTypes NewFLink,
+ FunctionType *NewFT) {
+ FunctionType *FT = F->getFunctionType();
+ Function *NewF = Function::Create(NewFT, NewFLink, NewFName,
+ F->getParent());
+ NewF->copyAttributesFrom(F);
+ NewF->removeAttributes(
+ AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
+ AttributeSet::ReturnIndex));
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
+ std::vector<Value *> Args;
+ unsigned n = FT->getNumParams();
+ for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
+ Args.push_back(&*ai);
+ CallInst *CI = CallInst::Create(F, Args, "", BB);
+ if (FT->getReturnType()->isVoidTy())
+ ReturnInst::Create(*Ctx, BB);
+ else
+ ReturnInst::Create(*Ctx, CI, BB);
+
+ return NewF;
+}
+
+Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
+ StringRef FName) {
+ FunctionType *FTT = getTrampolineFunctionType(FT);
+ Constant *C = Mod->getOrInsertFunction(FName, FTT);
+ Function *F = dyn_cast<Function>(C);
+ if (F && F->isDeclaration()) {
+ F->setLinkage(GlobalValue::LinkOnceODRLinkage);
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+ std::vector<Value *> Args;
+ Function::arg_iterator AI = F->arg_begin(); ++AI;
+ for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
+ Args.push_back(&*AI);
+ CallInst *CI =
+ CallInst::Create(&F->getArgumentList().front(), Args, "", BB);
+ ReturnInst *RI;
+ if (FT->getReturnType()->isVoidTy())
+ RI = ReturnInst::Create(*Ctx, BB);
+ else
+ RI = ReturnInst::Create(*Ctx, CI, BB);
+
+ DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
+ Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
+ for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N)
+ DFSF.ValShadowMap[ValAI] = ShadowAI;
+ DFSanVisitor(DFSF).visitCallInst(*CI);
+ if (!FT->getReturnType()->isVoidTy())
+ new StoreInst(DFSF.getShadow(RI->getReturnValue()),
+ &F->getArgumentList().back(), RI);
+ }
+
+ return C;
+}
+
+bool DataFlowSanitizer::runOnModule(Module &M) {
+ if (!DL)
+ return false;
+
+ if (ABIList->isIn(M, "skip"))
+ return false;
+
+ if (!GetArgTLSPtr) {
+ Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
+ ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS))
+ G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+ }
+ if (!GetRetvalTLSPtr) {
+ RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy);
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS))
+ G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+ }
+
+ DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy);
+ if (Function *F = dyn_cast<Function>(DFSanUnionFn)) {
+ F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ F->addAttribute(1, Attribute::ZExt);
+ F->addAttribute(2, Attribute::ZExt);
+ }
+ DFSanUnionLoadFn =
+ Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy);
+ if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) {
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ }
+ DFSanUnimplementedFn =
+ Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
+ DFSanSetLabelFn =
+ Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy);
+ if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) {
+ F->addAttribute(1, Attribute::ZExt);
+ }
+ DFSanNonzeroLabelFn =
+ Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
+
+ std::vector<Function *> FnsToInstrument;
+ llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI;
+ for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) {
+ if (!i->isIntrinsic() &&
+ i != DFSanUnionFn &&
+ i != DFSanUnionLoadFn &&
+ i != DFSanUnimplementedFn &&
+ i != DFSanSetLabelFn &&
+ i != DFSanNonzeroLabelFn)
+ FnsToInstrument.push_back(&*i);
+ }
+
+ // Give function aliases prefixes when necessary, and build wrappers where the
+ // instrumentedness is inconsistent.
+ for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) {
+ GlobalAlias *GA = &*i;
+ ++i;
+ // Don't stop on weak. We assume people aren't playing games with the
+ // instrumentedness of overridden weak aliases.
+ if (Function *F = dyn_cast<Function>(
+ GA->resolveAliasedGlobal(/*stopOnWeak=*/false))) {
+ bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
+ if (GAInst && FInst) {
+ addGlobalNamePrefix(GA);
+ } else if (GAInst != FInst) {
+ // Non-instrumented alias of an instrumented function, or vice versa.
+ // Replace the alias with a native-ABI wrapper of the aliasee. The pass
+ // below will take care of instrumenting it.
+ Function *NewF =
+ buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
+ GA->replaceAllUsesWith(NewF);
+ NewF->takeName(GA);
+ GA->eraseFromParent();
+ FnsToInstrument.push_back(NewF);
+ }
+ }
+ }
+
+ AttrBuilder B;
+ B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
+ ReadOnlyNoneAttrs = AttributeSet::get(*Ctx, AttributeSet::FunctionIndex, B);
+
+ // First, change the ABI of every function in the module. ABI-listed
+ // functions keep their original ABI and get a wrapper function.
+ for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
+ e = FnsToInstrument.end();
+ i != e; ++i) {
+ Function &F = **i;
+ FunctionType *FT = F.getFunctionType();
+
+ bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
+ FT->getReturnType()->isVoidTy());
+
+ if (isInstrumented(&F)) {
+ // Instrumented functions get a 'dfs$' prefix. This allows us to more
+ // easily identify cases of mismatching ABIs.
+ if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
+ FunctionType *NewFT = getArgsFunctionType(FT);
+ Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M);
+ NewF->copyAttributesFrom(&F);
+ NewF->removeAttributes(
+ AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
+ AttributeSet::ReturnIndex));
+ for (Function::arg_iterator FArg = F.arg_begin(),
+ NewFArg = NewF->arg_begin(),
+ FArgEnd = F.arg_end();
+ FArg != FArgEnd; ++FArg, ++NewFArg) {
+ FArg->replaceAllUsesWith(NewFArg);
+ }
+ NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
+
+ for (Function::use_iterator ui = F.use_begin(), ue = F.use_end();
+ ui != ue;) {
+ BlockAddress *BA = dyn_cast<BlockAddress>(ui.getUse().getUser());
+ ++ui;
+ if (BA) {
+ BA->replaceAllUsesWith(
+ BlockAddress::get(NewF, BA->getBasicBlock()));
+ delete BA;
+ }
+ }
+ F.replaceAllUsesWith(
+ ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)));
+ NewF->takeName(&F);
+ F.eraseFromParent();
+ *i = NewF;
+ addGlobalNamePrefix(NewF);
+ } else {
+ addGlobalNamePrefix(&F);
+ }
+ // Hopefully, nobody will try to indirectly call a vararg
+ // function... yet.
+ } else if (FT->isVarArg()) {
+ UnwrappedFnMap[&F] = &F;
+ *i = 0;
+ } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
+ // Build a wrapper function for F. The wrapper simply calls F, and is
+ // added to FnsToInstrument so that any instrumentation according to its
+ // WrapperKind is done in the second pass below.
+ FunctionType *NewFT = getInstrumentedABI() == IA_Args
+ ? getArgsFunctionType(FT)
+ : FT;
+ Function *NewF = buildWrapperFunction(
+ &F, std::string("dfsw$") + std::string(F.getName()),
+ GlobalValue::LinkOnceODRLinkage, NewFT);
+ if (getInstrumentedABI() == IA_TLS)
+ NewF->removeAttributes(AttributeSet::FunctionIndex, ReadOnlyNoneAttrs);
+
+ Value *WrappedFnCst =
+ ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
+ F.replaceAllUsesWith(WrappedFnCst);
+ UnwrappedFnMap[WrappedFnCst] = &F;
+ *i = NewF;
+
+ if (!F.isDeclaration()) {
+ // This function is probably defining an interposition of an
+ // uninstrumented function and hence needs to keep the original ABI.
+ // But any functions it may call need to use the instrumented ABI, so
+ // we instrument it in a mode which preserves the original ABI.
+ FnsWithNativeABI.insert(&F);
+
+ // This code needs to rebuild the iterators, as they may be invalidated
+ // by the push_back, taking care that the new range does not include
+ // any functions added by this code.
+ size_t N = i - FnsToInstrument.begin(),
+ Count = e - FnsToInstrument.begin();
+ FnsToInstrument.push_back(&F);
+ i = FnsToInstrument.begin() + N;
+ e = FnsToInstrument.begin() + Count;
+ }
+ }
+ }
+
+ for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
+ e = FnsToInstrument.end();
+ i != e; ++i) {
+ if (!*i || (*i)->isDeclaration())
+ continue;
+
+ removeUnreachableBlocks(**i);
+
+ DFSanFunction DFSF(*this, *i, FnsWithNativeABI.count(*i));
+
+ // DFSanVisitor may create new basic blocks, which confuses df_iterator.
+ // Build a copy of the list before iterating over it.
+ llvm::SmallVector<BasicBlock *, 4> BBList;
+ std::copy(df_begin(&(*i)->getEntryBlock()), df_end(&(*i)->getEntryBlock()),
+ std::back_inserter(BBList));
+
+ for (llvm::SmallVector<BasicBlock *, 4>::iterator i = BBList.begin(),
+ e = BBList.end();
+ i != e; ++i) {
+ Instruction *Inst = &(*i)->front();
+ while (1) {
+ // DFSanVisitor may split the current basic block, changing the current
+ // instruction's next pointer and moving the next instruction to the
+ // tail block from which we should continue.
+ Instruction *Next = Inst->getNextNode();
+ // DFSanVisitor may delete Inst, so keep track of whether it was a
+ // terminator.
+ bool IsTerminator = isa<TerminatorInst>(Inst);
+ if (!DFSF.SkipInsts.count(Inst))
+ DFSanVisitor(DFSF).visit(Inst);
+ if (IsTerminator)
+ break;
+ Inst = Next;
+ }
+ }
+
+ // We will not necessarily be able to compute the shadow for every phi node
+ // until we have visited every block. Therefore, the code that handles phi
+ // nodes adds them to the PHIFixups list so that they can be properly
+ // handled here.
+ for (std::vector<std::pair<PHINode *, PHINode *> >::iterator
+ i = DFSF.PHIFixups.begin(),
+ e = DFSF.PHIFixups.end();
+ i != e; ++i) {
+ for (unsigned val = 0, n = i->first->getNumIncomingValues(); val != n;
+ ++val) {
+ i->second->setIncomingValue(
+ val, DFSF.getShadow(i->first->getIncomingValue(val)));
+ }
+ }
+
+ // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
+ // places (i.e. instructions in basic blocks we haven't even begun visiting
+ // yet). To make our life easier, do this work in a pass after the main
+ // instrumentation.
+ if (ClDebugNonzeroLabels) {
+ for (DenseSet<Value *>::iterator i = DFSF.NonZeroChecks.begin(),
+ e = DFSF.NonZeroChecks.end();
+ i != e; ++i) {
+ Instruction *Pos;
+ if (Instruction *I = dyn_cast<Instruction>(*i))
+ Pos = I->getNextNode();
+ else
+ Pos = DFSF.F->getEntryBlock().begin();
+ while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
+ Pos = Pos->getNextNode();
+ IRBuilder<> IRB(Pos);
+ Instruction *NeInst = cast<Instruction>(
+ IRB.CreateICmpNE(*i, DFSF.DFS.ZeroShadow));
+ BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ NeInst, /*Unreachable=*/ false, ColdCallWeights));
+ IRBuilder<> ThenIRB(BI);
+ ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn);
+ }
+ }
+ }
+
+ return false;
+}
+
+Value *DFSanFunction::getArgTLSPtr() {
+ if (ArgTLSPtr)
+ return ArgTLSPtr;
+ if (DFS.ArgTLS)
+ return ArgTLSPtr = DFS.ArgTLS;
+
+ IRBuilder<> IRB(F->getEntryBlock().begin());
+ return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS);
+}
+
+Value *DFSanFunction::getRetvalTLS() {
+ if (RetvalTLSPtr)
+ return RetvalTLSPtr;
+ if (DFS.RetvalTLS)
+ return RetvalTLSPtr = DFS.RetvalTLS;
+
+ IRBuilder<> IRB(F->getEntryBlock().begin());
+ return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS);
+}
+
+Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
+ IRBuilder<> IRB(Pos);
+ return IRB.CreateConstGEP2_64(getArgTLSPtr(), 0, Idx);
+}
+
+Value *DFSanFunction::getShadow(Value *V) {
+ if (!isa<Argument>(V) && !isa<Instruction>(V))
+ return DFS.ZeroShadow;
+ Value *&Shadow = ValShadowMap[V];
+ if (!Shadow) {
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ if (IsNativeABI)
+ return DFS.ZeroShadow;
+ switch (IA) {
+ case DataFlowSanitizer::IA_TLS: {
+ Value *ArgTLSPtr = getArgTLSPtr();
+ Instruction *ArgTLSPos =
+ DFS.ArgTLS ? &*F->getEntryBlock().begin()
+ : cast<Instruction>(ArgTLSPtr)->getNextNode();
+ IRBuilder<> IRB(ArgTLSPos);
+ Shadow = IRB.CreateLoad(getArgTLS(A->getArgNo(), ArgTLSPos));
+ break;
+ }
+ case DataFlowSanitizer::IA_Args: {
+ unsigned ArgIdx = A->getArgNo() + F->getArgumentList().size() / 2;
+ Function::arg_iterator i = F->arg_begin();
+ while (ArgIdx--)
+ ++i;
+ Shadow = i;
+ assert(Shadow->getType() == DFS.ShadowTy);
+ break;
+ }
+ }
+ NonZeroChecks.insert(Shadow);
+ } else {
+ Shadow = DFS.ZeroShadow;
+ }
+ }
+ return Shadow;
+}
+
+void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
+ assert(!ValShadowMap.count(I));
+ assert(Shadow->getType() == DFS.ShadowTy);
+ ValShadowMap[I] = Shadow;
+}
+
+Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
+ assert(Addr != RetvalTLS && "Reinstrumenting?");
+ IRBuilder<> IRB(Pos);
+ return IRB.CreateIntToPtr(
+ IRB.CreateMul(
+ IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), ShadowPtrMask),
+ ShadowPtrMul),
+ ShadowPtrTy);
+}
+
+// Generates IR to compute the union of the two given shadows, inserting it
+// before Pos. Returns the computed union Value.
+Value *DataFlowSanitizer::combineShadows(Value *V1, Value *V2,
+ Instruction *Pos) {
+ if (V1 == ZeroShadow)
+ return V2;
+ if (V2 == ZeroShadow)
+ return V1;
+ if (V1 == V2)
+ return V1;
+ IRBuilder<> IRB(Pos);
+ BasicBlock *Head = Pos->getParent();
+ Value *Ne = IRB.CreateICmpNE(V1, V2);
+ Instruction *NeInst = dyn_cast<Instruction>(Ne);
+ if (NeInst) {
+ BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ NeInst, /*Unreachable=*/ false, ColdCallWeights));
+ IRBuilder<> ThenIRB(BI);
+ CallInst *Call = ThenIRB.CreateCall2(DFSanUnionFn, V1, V2);
+ Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ Call->addAttribute(1, Attribute::ZExt);
+ Call->addAttribute(2, Attribute::ZExt);
+
+ BasicBlock *Tail = BI->getSuccessor(0);
+ PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin());
+ Phi->addIncoming(Call, Call->getParent());
+ Phi->addIncoming(V1, Head);
+ Pos = Phi;
+ return Phi;
+ } else {
+ assert(0 && "todo");
+ return 0;
+ }
+}
+
+// A convenience function which folds the shadows of each of the operands
+// of the provided instruction Inst, inserting the IR before Inst. Returns
+// the computed union Value.
+Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
+ if (Inst->getNumOperands() == 0)
+ return DFS.ZeroShadow;
+
+ Value *Shadow = getShadow(Inst->getOperand(0));
+ for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) {
+ Shadow = DFS.combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst);
+ }
+ return Shadow;
+}
+
+void DFSanVisitor::visitOperandShadowInst(Instruction &I) {
+ Value *CombinedShadow = DFSF.combineOperandShadows(&I);
+ DFSF.setShadow(&I, CombinedShadow);
+}
+
+// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where
+// Addr has alignment Align, and take the union of each of those shadows.
+Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
+ Instruction *Pos) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
+ llvm::DenseMap<AllocaInst *, AllocaInst *>::iterator i =
+ AllocaShadowMap.find(AI);
+ if (i != AllocaShadowMap.end()) {
+ IRBuilder<> IRB(Pos);
+ return IRB.CreateLoad(i->second);
+ }
+ }
+
+ uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
+ SmallVector<Value *, 2> Objs;
+ GetUnderlyingObjects(Addr, Objs, DFS.DL);
+ bool AllConstants = true;
+ for (SmallVector<Value *, 2>::iterator i = Objs.begin(), e = Objs.end();
+ i != e; ++i) {
+ if (isa<Function>(*i) || isa<BlockAddress>(*i))
+ continue;
+ if (isa<GlobalVariable>(*i) && cast<GlobalVariable>(*i)->isConstant())
+ continue;
+
+ AllConstants = false;
+ break;
+ }
+ if (AllConstants)
+ return DFS.ZeroShadow;
+
+ Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
+ switch (Size) {
+ case 0:
+ return DFS.ZeroShadow;
+ case 1: {
+ LoadInst *LI = new LoadInst(ShadowAddr, "", Pos);
+ LI->setAlignment(ShadowAlign);
+ return LI;
+ }
+ case 2: {
+ IRBuilder<> IRB(Pos);
+ Value *ShadowAddr1 =
+ IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1));
+ return DFS.combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
+ IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign),
+ Pos);
+ }
+ }
+ if (Size % (64 / DFS.ShadowWidth) == 0) {
+ // Fast path for the common case where each byte has identical shadow: load
+ // shadow 64 bits at a time, fall out to a __dfsan_union_load call if any
+ // shadow is non-equal.
+ BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F);
+ IRBuilder<> FallbackIRB(FallbackBB);
+ CallInst *FallbackCall = FallbackIRB.CreateCall2(
+ DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size));
+ FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+
+ // Compare each of the shadows stored in the loaded 64 bits to each other,
+ // by computing (WideShadow rotl ShadowWidth) == WideShadow.
+ IRBuilder<> IRB(Pos);
+ Value *WideAddr =
+ IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
+ Value *WideShadow = IRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+ Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy);
+ Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidth);
+ Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidth);
+ Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow);
+ Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow);
+
+ BasicBlock *Head = Pos->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(Pos);
+ // In the following code LastBr will refer to the previous basic block's
+ // conditional branch instruction, whose true successor is fixed up to point
+ // to the next block during the loop below or to the tail after the final
+ // iteration.
+ BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq);
+ ReplaceInstWithInst(Head->getTerminator(), LastBr);
+
+ for (uint64_t Ofs = 64 / DFS.ShadowWidth; Ofs != Size;
+ Ofs += 64 / DFS.ShadowWidth) {
+ BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
+ IRBuilder<> NextIRB(NextBB);
+ WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1));
+ Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+ ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
+ LastBr->setSuccessor(0, NextBB);
+ LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB);
+ }
+
+ LastBr->setSuccessor(0, Tail);
+ FallbackIRB.CreateBr(Tail);
+ PHINode *Shadow = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
+ Shadow->addIncoming(FallbackCall, FallbackBB);
+ Shadow->addIncoming(TruncShadow, LastBr->getParent());
+ return Shadow;
+ }
+
+ IRBuilder<> IRB(Pos);
+ CallInst *FallbackCall = IRB.CreateCall2(
+ DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size));
+ FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ return FallbackCall;
+}
+
+void DFSanVisitor::visitLoadInst(LoadInst &LI) {
+ uint64_t Size = DFSF.DFS.DL->getTypeStoreSize(LI.getType());
+ uint64_t Align;
+ if (ClPreserveAlignment) {
+ Align = LI.getAlignment();
+ if (Align == 0)
+ Align = DFSF.DFS.DL->getABITypeAlignment(LI.getType());
+ } else {
+ Align = 1;
+ }
+ IRBuilder<> IRB(&LI);
+ Value *LoadedShadow =
+ DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI);
+ Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
+ Value *CombinedShadow = DFSF.DFS.combineShadows(LoadedShadow, PtrShadow, &LI);
+ if (CombinedShadow != DFSF.DFS.ZeroShadow)
+ DFSF.NonZeroChecks.insert(CombinedShadow);
+
+ DFSF.setShadow(&LI, CombinedShadow);
+}
+
+void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
+ Value *Shadow, Instruction *Pos) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
+ llvm::DenseMap<AllocaInst *, AllocaInst *>::iterator i =
+ AllocaShadowMap.find(AI);
+ if (i != AllocaShadowMap.end()) {
+ IRBuilder<> IRB(Pos);
+ IRB.CreateStore(Shadow, i->second);
+ return;
+ }
+ }
+
+ uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
+ IRBuilder<> IRB(Pos);
+ Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
+ if (Shadow == DFS.ZeroShadow) {
+ IntegerType *ShadowTy = IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidth);
+ Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
+ Value *ExtShadowAddr =
+ IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
+ IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
+ return;
+ }
+
+ const unsigned ShadowVecSize = 128 / DFS.ShadowWidth;
+ uint64_t Offset = 0;
+ if (Size >= ShadowVecSize) {
+ VectorType *ShadowVecTy = VectorType::get(DFS.ShadowTy, ShadowVecSize);
+ Value *ShadowVec = UndefValue::get(ShadowVecTy);
+ for (unsigned i = 0; i != ShadowVecSize; ++i) {
+ ShadowVec = IRB.CreateInsertElement(
+ ShadowVec, Shadow, ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), i));
+ }
+ Value *ShadowVecAddr =
+ IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
+ do {
+ Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecAddr, Offset);
+ IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
+ Size -= ShadowVecSize;
+ ++Offset;
+ } while (Size >= ShadowVecSize);
+ Offset *= ShadowVecSize;
+ }
+ while (Size > 0) {
+ Value *CurShadowAddr = IRB.CreateConstGEP1_32(ShadowAddr, Offset);
+ IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);
+ --Size;
+ ++Offset;
+ }
+}
+
+void DFSanVisitor::visitStoreInst(StoreInst &SI) {
+ uint64_t Size =
+ DFSF.DFS.DL->getTypeStoreSize(SI.getValueOperand()->getType());
+ uint64_t Align;
+ if (ClPreserveAlignment) {
+ Align = SI.getAlignment();
+ if (Align == 0)
+ Align = DFSF.DFS.DL->getABITypeAlignment(SI.getValueOperand()->getType());
+ } else {
+ Align = 1;
+ }
+ DFSF.storeShadow(SI.getPointerOperand(), Size, Align,
+ DFSF.getShadow(SI.getValueOperand()), &SI);
+}
+
+void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
+ visitOperandShadowInst(BO);
+}
+
+void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); }
+
+void DFSanVisitor::visitCmpInst(CmpInst &CI) { visitOperandShadowInst(CI); }
+
+void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ visitOperandShadowInst(GEPI);
+}
+
+void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
+ bool AllLoadsStores = true;
+ for (Instruction::use_iterator i = I.use_begin(), e = I.use_end(); i != e;
+ ++i) {
+ if (isa<LoadInst>(*i))
+ continue;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(*i)) {
+ if (SI->getPointerOperand() == &I)
+ continue;
+ }
+
+ AllLoadsStores = false;
+ break;
+ }
+ if (AllLoadsStores) {
+ IRBuilder<> IRB(&I);
+ DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.ShadowTy);
+ }
+ DFSF.setShadow(&I, DFSF.DFS.ZeroShadow);
+}
+
+void DFSanVisitor::visitSelectInst(SelectInst &I) {
+ Value *CondShadow = DFSF.getShadow(I.getCondition());
+ Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
+ Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
+
+ if (isa<VectorType>(I.getCondition()->getType())) {
+ DFSF.setShadow(
+ &I, DFSF.DFS.combineShadows(
+ CondShadow,
+ DFSF.DFS.combineShadows(TrueShadow, FalseShadow, &I), &I));
+ } else {
+ Value *ShadowSel;
+ if (TrueShadow == FalseShadow) {
+ ShadowSel = TrueShadow;
+ } else {
+ ShadowSel =
+ SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
+ }
+ DFSF.setShadow(&I, DFSF.DFS.combineShadows(CondShadow, ShadowSel, &I));
+ }
+}
+
+void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *ValShadow = DFSF.getShadow(I.getValue());
+ IRB.CreateCall3(
+ DFSF.DFS.DFSanSetLabelFn, ValShadow,
+ IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)),
+ IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy));
+}
+
+void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
+ Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
+ Value *LenShadow = IRB.CreateMul(
+ I.getLength(),
+ ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8));
+ Value *AlignShadow;
+ if (ClPreserveAlignment) {
+ AlignShadow = IRB.CreateMul(I.getAlignmentCst(),
+ ConstantInt::get(I.getAlignmentCst()->getType(),
+ DFSF.DFS.ShadowWidth / 8));
+ } else {
+ AlignShadow = ConstantInt::get(I.getAlignmentCst()->getType(),
+ DFSF.DFS.ShadowWidth / 8);
+ }
+ Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
+ DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
+ SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
+ IRB.CreateCall5(I.getCalledValue(), DestShadow, SrcShadow, LenShadow,
+ AlignShadow, I.getVolatileCst());
+}
+
+void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
+ if (!DFSF.IsNativeABI && RI.getReturnValue()) {
+ switch (DFSF.IA) {
+ case DataFlowSanitizer::IA_TLS: {
+ Value *S = DFSF.getShadow(RI.getReturnValue());
+ IRBuilder<> IRB(&RI);
+ IRB.CreateStore(S, DFSF.getRetvalTLS());
+ break;
+ }
+ case DataFlowSanitizer::IA_Args: {
+ IRBuilder<> IRB(&RI);
+ Type *RT = DFSF.F->getFunctionType()->getReturnType();
+ Value *InsVal =
+ IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0);
+ Value *InsShadow =
+ IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1);
+ RI.setOperand(0, InsShadow);
+ break;
+ }
+ }
+ }
+}
+
+void DFSanVisitor::visitCallSite(CallSite CS) {
+ Function *F = CS.getCalledFunction();
+ if ((F && F->isIntrinsic()) || isa<InlineAsm>(CS.getCalledValue())) {
+ visitOperandShadowInst(*CS.getInstruction());
+ return;
+ }
+
+ IRBuilder<> IRB(CS.getInstruction());
+
+ DenseMap<Value *, Function *>::iterator i =
+ DFSF.DFS.UnwrappedFnMap.find(CS.getCalledValue());
+ if (i != DFSF.DFS.UnwrappedFnMap.end()) {
+ Function *F = i->second;
+ switch (DFSF.DFS.getWrapperKind(F)) {
+ case DataFlowSanitizer::WK_Warning: {
+ CS.setCalledFunction(F);
+ IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
+ IRB.CreateGlobalStringPtr(F->getName()));
+ DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
+ return;
+ }
+ case DataFlowSanitizer::WK_Discard: {
+ CS.setCalledFunction(F);
+ DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
+ return;
+ }
+ case DataFlowSanitizer::WK_Functional: {
+ CS.setCalledFunction(F);
+ visitOperandShadowInst(*CS.getInstruction());
+ return;
+ }
+ case DataFlowSanitizer::WK_Custom: {
+ // Don't try to handle invokes of custom functions, it's too complicated.
+ // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
+ // wrapper.
+ if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+ FunctionType *FT = F->getFunctionType();
+ FunctionType *CustomFT = DFSF.DFS.getCustomFunctionType(FT);
+ std::string CustomFName = "__dfsw_";
+ CustomFName += F->getName();
+ Constant *CustomF =
+ DFSF.DFS.Mod->getOrInsertFunction(CustomFName, CustomFT);
+ if (Function *CustomFn = dyn_cast<Function>(CustomF)) {
+ CustomFn->copyAttributesFrom(F);
+
+ // Custom functions returning non-void will write to the return label.
+ if (!FT->getReturnType()->isVoidTy()) {
+ CustomFn->removeAttributes(AttributeSet::FunctionIndex,
+ DFSF.DFS.ReadOnlyNoneAttrs);
+ }
+ }
+
+ std::vector<Value *> Args;
+
+ CallSite::arg_iterator i = CS.arg_begin();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) {
+ Type *T = (*i)->getType();
+ FunctionType *ParamFT;
+ if (isa<PointerType>(T) &&
+ (ParamFT = dyn_cast<FunctionType>(
+ cast<PointerType>(T)->getElementType()))) {
+ std::string TName = "dfst";
+ TName += utostr(FT->getNumParams() - n);
+ TName += "$";
+ TName += F->getName();
+ Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
+ Args.push_back(T);
+ Args.push_back(
+ IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
+ } else {
+ Args.push_back(*i);
+ }
+ }
+
+ i = CS.arg_begin();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+ Args.push_back(DFSF.getShadow(*i));
+
+ if (!FT->getReturnType()->isVoidTy()) {
+ if (!DFSF.LabelReturnAlloca) {
+ DFSF.LabelReturnAlloca =
+ new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn",
+ DFSF.F->getEntryBlock().begin());
+ }
+ Args.push_back(DFSF.LabelReturnAlloca);
+ }
+
+ CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
+ CustomCI->setCallingConv(CI->getCallingConv());
+ CustomCI->setAttributes(CI->getAttributes());
+
+ if (!FT->getReturnType()->isVoidTy()) {
+ LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca);
+ DFSF.setShadow(CustomCI, LabelLoad);
+ }
+
+ CI->replaceAllUsesWith(CustomCI);
+ CI->eraseFromParent();
+ return;
+ }
+ break;
+ }
+ }
+ }
+
+ FunctionType *FT = cast<FunctionType>(
+ CS.getCalledValue()->getType()->getPointerElementType());
+ if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+ for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) {
+ IRB.CreateStore(DFSF.getShadow(CS.getArgument(i)),
+ DFSF.getArgTLS(i, CS.getInstruction()));
+ }
+ }
+
+ Instruction *Next = 0;
+ if (!CS.getType()->isVoidTy()) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ if (II->getNormalDest()->getSinglePredecessor()) {
+ Next = II->getNormalDest()->begin();
+ } else {
+ BasicBlock *NewBB =
+ SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DFS);
+ Next = NewBB->begin();
+ }
+ } else {
+ Next = CS->getNextNode();
+ }
+
+ if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+ IRBuilder<> NextIRB(Next);
+ LoadInst *LI = NextIRB.CreateLoad(DFSF.getRetvalTLS());
+ DFSF.SkipInsts.insert(LI);
+ DFSF.setShadow(CS.getInstruction(), LI);
+ DFSF.NonZeroChecks.insert(LI);
+ }
+ }
+
+ // Do all instrumentation for IA_Args down here to defer tampering with the
+ // CFG in a way that SplitEdge may be able to detect.
+ if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) {
+ FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT);
+ Value *Func =
+ IRB.CreateBitCast(CS.getCalledValue(), PointerType::getUnqual(NewFT));
+ std::vector<Value *> Args;
+
+ CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+ Args.push_back(*i);
+
+ i = CS.arg_begin();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+ Args.push_back(DFSF.getShadow(*i));
+
+ if (FT->isVarArg()) {
+ unsigned VarArgSize = CS.arg_size() - FT->getNumParams();
+ ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
+ AllocaInst *VarArgShadow =
+ new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin());
+ Args.push_back(IRB.CreateConstGEP2_32(VarArgShadow, 0, 0));
+ for (unsigned n = 0; i != e; ++i, ++n) {
+ IRB.CreateStore(DFSF.getShadow(*i),
+ IRB.CreateConstGEP2_32(VarArgShadow, 0, n));
+ Args.push_back(*i);
+ }
+ }
+
+ CallSite NewCS;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ NewCS = IRB.CreateInvoke(Func, II->getNormalDest(), II->getUnwindDest(),
+ Args);
+ } else {
+ NewCS = IRB.CreateCall(Func, Args);
+ }
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(CS.getAttributes().removeAttributes(
+ *DFSF.DFS.Ctx, AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType(),
+ AttributeSet::ReturnIndex)));
+
+ if (Next) {
+ ExtractValueInst *ExVal =
+ ExtractValueInst::Create(NewCS.getInstruction(), 0, "", Next);
+ DFSF.SkipInsts.insert(ExVal);
+ ExtractValueInst *ExShadow =
+ ExtractValueInst::Create(NewCS.getInstruction(), 1, "", Next);
+ DFSF.SkipInsts.insert(ExShadow);
+ DFSF.setShadow(ExVal, ExShadow);
+ DFSF.NonZeroChecks.insert(ExShadow);
+
+ CS.getInstruction()->replaceAllUsesWith(ExVal);
+ }
+
+ CS.getInstruction()->eraseFromParent();
+ }
+}
+
+void DFSanVisitor::visitPHINode(PHINode &PN) {
+ PHINode *ShadowPN =
+ PHINode::Create(DFSF.DFS.ShadowTy, PN.getNumIncomingValues(), "", &PN);
+
+ // Give the shadow phi node valid predecessors to fool SplitEdge into working.
+ Value *UndefShadow = UndefValue::get(DFSF.DFS.ShadowTy);
+ for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); i != e;
+ ++i) {
+ ShadowPN->addIncoming(UndefShadow, *i);
+ }
+
+ DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN));
+ DFSF.setShadow(&PN, ShadowPN);
+}
diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp
index 651381d..f50a044 100644
--- a/lib/Transforms/Instrumentation/DebugIR.cpp
+++ b/lib/Transforms/Instrumentation/DebugIR.cpp
@@ -25,6 +25,7 @@
#include "llvm/InstVisitor.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -401,7 +402,7 @@ private:
Type *PointeeTy = T->getPointerElementType();
if (!(N = getType(PointeeTy)))
N = Builder.createPointerType(
- getOrCreateType(PointeeTy), Layout.getPointerSizeInBits(),
+ getOrCreateType(PointeeTy), Layout.getPointerTypeSizeInBits(T),
Layout.getPrefTypeAlignment(T), getTypeName(T));
} else if (T->isArrayTy()) {
SmallVector<Value *, 1> Subrange;
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
deleted file mode 100644
index a2459fb..0000000
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-//===- EdgeProfiling.cpp - Insert counters for edge profiling -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments the specified program with counters for edge profiling.
-// Edge profiling can give a reasonable approximation of the hot paths through a
-// program, and is used for a wide variety of program transformations.
-//
-// Note that this implementation is very naive. We insert a counter for *every*
-// edge in the program, instead of using control flow information to prune the
-// number of counters inserted.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-edge-profiling"
-
-#include "llvm/Transforms/Instrumentation.h"
-#include "ProfilingUtils.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <set>
-using namespace llvm;
-
-STATISTIC(NumEdgesInserted, "The # of edges inserted.");
-
-namespace {
- class EdgeProfiler : public ModulePass {
- bool runOnModule(Module &M);
- public:
- static char ID; // Pass identification, replacement for typeid
- EdgeProfiler() : ModulePass(ID) {
- initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
- }
-
- virtual const char *getPassName() const {
- return "Edge Profiler";
- }
- };
-}
-
-char EdgeProfiler::ID = 0;
-INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
- "Insert instrumentation for edge profiling", false, false)
-
-ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
-
-bool EdgeProfiler::runOnModule(Module &M) {
- Function *Main = M.getFunction("main");
- if (Main == 0) {
- errs() << "WARNING: cannot insert edge profiling into a module"
- << " with no main function!\n";
- return false; // No main, no instrumentation!
- }
-
- std::set<BasicBlock*> BlocksToInstrument;
- unsigned NumEdges = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- // Reserve space for (0,entry) edge.
- ++NumEdges;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- // Keep track of which blocks need to be instrumented. We don't want to
- // instrument blocks that are added as the result of breaking critical
- // edges!
- BlocksToInstrument.insert(BB);
- NumEdges += BB->getTerminator()->getNumSuccessors();
- }
- }
-
- Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
- GlobalVariable *Counters =
- new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
- Constant::getNullValue(ATy), "EdgeProfCounters");
- NumEdgesInserted = NumEdges;
-
- // Instrument all of the edges...
- unsigned i = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- // Create counter for (0,entry) edge.
- IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- if (BlocksToInstrument.count(BB)) { // Don't instrument inserted blocks
- // Okay, we have to add a counter of each outgoing edge. If the
- // outgoing edge is not critical don't split it, just insert the counter
- // in the source or destination of the edge.
- TerminatorInst *TI = BB->getTerminator();
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- // If the edge is critical, split it.
- SplitCriticalEdge(TI, s, this);
-
- // Okay, we are guaranteed that the edge is no longer critical. If we
- // only have a single successor, insert the counter in this block,
- // otherwise insert it in the successor block.
- if (TI->getNumSuccessors() == 1) {
- // Insert counter at the start of the block
- IncrementCounterInBlock(BB, i++, Counters, false);
- } else {
- // Insert counter at the start of the block
- IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
- }
- }
- }
- }
-
- // Add the initialization call to main.
- InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
- return true;
-}
-
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 4c2681f..206bffb 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -17,7 +17,6 @@
#define DEBUG_TYPE "insert-gcov-profiling"
#include "llvm/Transforms/Instrumentation.h"
-#include "ProfilingUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
@@ -103,6 +102,7 @@ namespace {
Constant *getIncrementIndirectCounterFunc();
Constant *getEmitFunctionFunc();
Constant *getEmitArcsFunc();
+ Constant *getSummaryInfoFunc();
Constant *getDeleteWriteoutFunctionListFunc();
Constant *getDeleteFlushFunctionListFunc();
Constant *getEndFileFunc();
@@ -519,15 +519,15 @@ bool GCOVProfiler::emitProfileArcs() {
TerminatorInst *TI = BB->getTerminator();
int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
if (Successors) {
- IRBuilder<> Builder(TI);
-
if (Successors == 1) {
+ IRBuilder<> Builder(BB->getFirstInsertionPt());
Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Edge);
Value *Count = Builder.CreateLoad(Counter);
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ IRBuilder<> Builder(BI);
Value *Sel = Builder.CreateSelect(BI->getCondition(),
Builder.getInt64(Edge),
Builder.getInt64(Edge + 1));
@@ -543,6 +543,7 @@ bool GCOVProfiler::emitProfileArcs() {
for (int i = 0; i != Successors; ++i)
ComplexEdgeSuccs.insert(TI->getSuccessor(i));
}
+
Edge += Successors;
}
}
@@ -554,14 +555,13 @@ bool GCOVProfiler::emitProfileArcs() {
GlobalVariable *EdgeState = getEdgeStateValue();
for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
- IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
+ IRBuilder<> Builder(ComplexEdgePreds[i + 1]->getFirstInsertionPt());
Builder.CreateStore(Builder.getInt32(i), EdgeState);
}
+
for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
- // call runtime to perform increment
- BasicBlock::iterator InsertPt =
- ComplexEdgeSuccs[i+1]->getFirstInsertionPt();
- IRBuilder<> Builder(InsertPt);
+ // Call runtime to perform increment.
+ IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstInsertionPt());
Value *CounterPtrArray =
Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
i * ComplexEdgePreds.size());
@@ -599,7 +599,7 @@ bool GCOVProfiler::emitProfileArcs() {
};
FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
- // Inialize the environment and register the local writeout and flush
+ // Initialize the environment and register the local writeout and flush
// functions.
Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
@@ -701,6 +701,11 @@ Constant *GCOVProfiler::getEmitArcsFunc() {
return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
}
+Constant *GCOVProfiler::getSummaryInfoFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
+}
+
Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy);
@@ -747,6 +752,7 @@ Function *GCOVProfiler::insertCounterWriteout(
Constant *StartFile = getStartFileFunc();
Constant *EmitFunction = getEmitFunctionFunc();
Constant *EmitArcs = getEmitArcsFunc();
+ Constant *SummaryInfo = getSummaryInfoFunc();
Constant *EndFile = getEndFileFunc();
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
@@ -773,6 +779,7 @@ Function *GCOVProfiler::insertCounterWriteout(
Builder.getInt32(Arcs),
Builder.CreateConstGEP2_64(GV, 0, 0));
}
+ Builder.CreateCall(SummaryInfo);
Builder.CreateCall(EndFile);
}
}
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 9f35396..b1bea38 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -24,12 +24,10 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeAddressSanitizerPass(Registry);
initializeAddressSanitizerModulePass(Registry);
initializeBoundsCheckingPass(Registry);
- initializeEdgeProfilerPass(Registry);
initializeGCOVProfilerPass(Registry);
- initializeOptimalEdgeProfilerPass(Registry);
- initializePathProfilerPass(Registry);
initializeMemorySanitizerPass(Registry);
initializeThreadSanitizerPass(Registry);
+ initializeDataFlowSanitizerPass(Registry);
}
/// LLVMInitializeInstrumentation - C binding for
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 0251f16..d547adc 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -66,6 +66,31 @@
/// avoids storing origin to memory when a fully initialized value is stored.
/// This way it avoids needless overwritting origin of the 4-byte region on
/// a short (i.e. 1 byte) clean store, and it is also good for performance.
+///
+/// Atomic handling.
+///
+/// Ideally, every atomic store of application value should update the
+/// corresponding shadow location in an atomic way. Unfortunately, atomic store
+/// of two disjoint locations can not be done without severe slowdown.
+///
+/// Therefore, we implement an approximation that may err on the safe side.
+/// In this implementation, every atomically accessed location in the program
+/// may only change from (partially) uninitialized to fully initialized, but
+/// not the other way around. We load the shadow _after_ the application load,
+/// and we store the shadow _before_ the app store. Also, we always store clean
+/// shadow (if the application store is atomic). This way, if the store-load
+/// pair constitutes a happens-before arc, shadow store and load are correctly
+/// ordered such that the load will get either the value that was stored, or
+/// some later value (which is always clean).
+///
+/// This does not work very well with Compare-And-Swap (CAS) and
+/// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
+/// must store the new shadow before the app operation, and load the shadow
+/// after the app operation. Computers don't work this way. Current
+/// implementation ignores the load aspect of CAS/RMW, always returning a clean
+/// value. It implements the store part as a simple atomic store by storing a
+/// clean shadow.
+
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "msan"
@@ -157,6 +182,18 @@ static cl::opt<std::string> ClBlacklistFile("msan-blacklist",
cl::desc("File containing the list of functions where MemorySanitizer "
"should not report bugs"), cl::Hidden);
+// Experimental. Wraps all indirect calls in the instrumented code with
+// a call to the given function. This is needed to assist the dynamic
+// helper tool (MSanDR) to regain control on transition between instrumented and
+// non-instrumented code.
+static cl::opt<std::string> ClWrapIndirectCalls("msan-wrap-indirect-calls",
+ cl::desc("Wrap indirect calls with a given function"),
+ cl::Hidden);
+
+static cl::opt<bool> ClWrapIndirectCallsFast("msan-wrap-indirect-calls-fast",
+ cl::desc("Do not wrap indirect calls with target in the same module"),
+ cl::Hidden, cl::init(true));
+
namespace {
/// \brief An instrumentation pass implementing detection of uninitialized
@@ -168,12 +205,12 @@ class MemorySanitizer : public FunctionPass {
public:
MemorySanitizer(bool TrackOrigins = false,
StringRef BlacklistFile = StringRef())
- : FunctionPass(ID),
- TrackOrigins(TrackOrigins || ClTrackOrigins),
- TD(0),
- WarningFn(0),
- BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
- : BlacklistFile) { }
+ : FunctionPass(ID),
+ TrackOrigins(TrackOrigins || ClTrackOrigins),
+ TD(0),
+ WarningFn(0),
+ BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile),
+ WrapIndirectCalls(!ClWrapIndirectCalls.empty()) {}
const char *getPassName() const { return "MemorySanitizer"; }
bool runOnFunction(Function &F);
bool doInitialization(Module &M);
@@ -207,13 +244,16 @@ class MemorySanitizer : public FunctionPass {
/// function.
GlobalVariable *OriginTLS;
+ GlobalVariable *MsandrModuleStart;
+ GlobalVariable *MsandrModuleEnd;
+
/// \brief The run-time callback to print a warning.
Value *WarningFn;
/// \brief Run-time helper that copies origin info for a memory range.
Value *MsanCopyOriginFn;
/// \brief Run-time helper that generates a new origin value for a stack
/// allocation.
- Value *MsanSetAllocaOriginFn;
+ Value *MsanSetAllocaOrigin4Fn;
/// \brief Run-time helper that poisons stack on function entry.
Value *MsanPoisonStackFn;
/// \brief MSan runtime replacements for memmove, memcpy and memset.
@@ -236,6 +276,12 @@ class MemorySanitizer : public FunctionPass {
/// \brief An empty volatile inline asm that prevents callback merge.
InlineAsm *EmptyAsm;
+ bool WrapIndirectCalls;
+ /// \brief Run-time wrapper for indirect calls.
+ Value *IndirectCallWrapperFn;
+ // Argument and return type of IndirectCallWrapperFn: void (*f)(void).
+ Type *AnyFunctionPtrTy;
+
friend struct MemorySanitizerVisitor;
friend struct VarArgAMD64Helper;
};
@@ -281,9 +327,9 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
MsanCopyOriginFn = M.getOrInsertFunction(
"__msan_copy_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), IntptrTy, NULL);
- MsanSetAllocaOriginFn = M.getOrInsertFunction(
- "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
- IRB.getInt8PtrTy(), NULL);
+ MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
+ "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
+ IRB.getInt8PtrTy(), IntptrTy, NULL);
MsanPoisonStackFn = M.getOrInsertFunction(
"__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL);
MemmoveFn = M.getOrInsertFunction(
@@ -329,6 +375,24 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
/*hasSideEffects=*/true);
+
+ if (WrapIndirectCalls) {
+ AnyFunctionPtrTy =
+ PointerType::getUnqual(FunctionType::get(IRB.getVoidTy(), false));
+ IndirectCallWrapperFn = M.getOrInsertFunction(
+ ClWrapIndirectCalls, AnyFunctionPtrTy, AnyFunctionPtrTy, NULL);
+ }
+
+ if (ClWrapIndirectCallsFast) {
+ MsandrModuleStart = new GlobalVariable(
+ M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage,
+ 0, "__executable_start");
+ MsandrModuleStart->setVisibility(GlobalVariable::HiddenVisibility);
+ MsandrModuleEnd = new GlobalVariable(
+ M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage,
+ 0, "_end");
+ MsandrModuleEnd->setVisibility(GlobalVariable::HiddenVisibility);
+ }
}
/// \brief Module-level initialization.
@@ -338,7 +402,7 @@ bool MemorySanitizer::doInitialization(Module &M) {
TD = getAnalysisIfAvailable<DataLayout>();
if (!TD)
return false;
- BL.reset(new SpecialCaseList(BlacklistFile));
+ BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
C = &(M.getContext());
unsigned PtrSize = TD->getPointerSizeInBits(/* AddressSpace */0);
switch (PtrSize) {
@@ -423,22 +487,27 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
MemorySanitizer &MS;
SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
ValueMap<Value*, Value*> ShadowMap, OriginMap;
+ OwningPtr<VarArgHelper> VAHelper;
+
+ // The following flags disable parts of MSan instrumentation based on
+ // blacklist contents and command-line options.
bool InsertChecks;
bool LoadShadow;
bool PoisonStack;
bool PoisonUndef;
- OwningPtr<VarArgHelper> VAHelper;
+ bool CheckReturnValue;
struct ShadowOriginAndInsertPoint {
- Instruction *Shadow;
- Instruction *Origin;
+ Value *Shadow;
+ Value *Origin;
Instruction *OrigIns;
- ShadowOriginAndInsertPoint(Instruction *S, Instruction *O, Instruction *I)
+ ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
: Shadow(S), Origin(O), OrigIns(I) { }
ShadowOriginAndInsertPoint() : Shadow(0), Origin(0), OrigIns(0) { }
};
SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
SmallVector<Instruction*, 16> StoreList;
+ SmallVector<CallSite, 16> IndirectCallList;
MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
: F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
@@ -449,6 +518,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
LoadShadow = SanitizeFunction;
PoisonStack = SanitizeFunction && ClPoisonStack;
PoisonUndef = SanitizeFunction && ClPoisonUndef;
+ // FIXME: Consider using SpecialCaseList to specify a list of functions that
+ // must always return fully initialized values. For now, we hardcode "main".
+ CheckReturnValue = SanitizeFunction && (F.getName() == "main");
DEBUG(if (!InsertChecks)
dbgs() << "MemorySanitizer is not inserting checks into '"
@@ -462,7 +534,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(&I);
Value *Val = I.getValueOperand();
Value *Addr = I.getPointerOperand();
- Value *Shadow = getShadow(Val);
+ Value *Shadow = I.isAtomic() ? getCleanShadow(Val) : getShadow(Val);
Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
StoreInst *NewSI =
@@ -471,7 +543,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
(void)NewSI;
if (ClCheckAccessAddress)
- insertCheck(Addr, &I);
+ insertShadowCheck(Addr, &I);
+
+ if (I.isAtomic())
+ I.setOrdering(addReleaseOrdering(I.getOrdering()));
if (MS.TrackOrigins) {
unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
@@ -481,11 +556,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
} else {
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
- Constant *Cst = dyn_cast_or_null<Constant>(ConvertedShadow);
// TODO(eugenis): handle non-zero constant shadow by inserting an
// unconditional check (can not simply fail compilation as this could
// be in the dead code).
- if (Cst)
+ if (isa<Constant>(ConvertedShadow))
continue;
Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
@@ -503,12 +577,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void materializeChecks() {
for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
- Instruction *Shadow = InstrumentationList[i].Shadow;
+ Value *Shadow = InstrumentationList[i].Shadow;
Instruction *OrigIns = InstrumentationList[i].OrigIns;
IRBuilder<> IRB(OrigIns);
DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n");
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
+ // See the comment in materializeStores().
+ if (isa<Constant>(ConvertedShadow))
+ continue;
Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
getCleanShadow(ConvertedShadow), "_mscmp");
Instruction *CheckTerm =
@@ -518,7 +595,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.SetInsertPoint(CheckTerm);
if (MS.TrackOrigins) {
- Instruction *Origin = InstrumentationList[i].Origin;
+ Value *Origin = InstrumentationList[i].Origin;
IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0),
MS.OriginTLS);
}
@@ -530,6 +607,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << "DONE:\n" << F);
}
+ void materializeIndirectCalls() {
+ for (size_t i = 0, n = IndirectCallList.size(); i < n; i++) {
+ CallSite CS = IndirectCallList[i];
+ Instruction *I = CS.getInstruction();
+ BasicBlock *B = I->getParent();
+ IRBuilder<> IRB(I);
+ Value *Fn0 = CS.getCalledValue();
+ Value *Fn = IRB.CreateBitCast(Fn0, MS.AnyFunctionPtrTy);
+
+ if (ClWrapIndirectCallsFast) {
+ // Check that call target is inside this module limits.
+ Value *Start =
+ IRB.CreateBitCast(MS.MsandrModuleStart, MS.AnyFunctionPtrTy);
+ Value *End = IRB.CreateBitCast(MS.MsandrModuleEnd, MS.AnyFunctionPtrTy);
+
+ Value *NotInThisModule = IRB.CreateOr(IRB.CreateICmpULT(Fn, Start),
+ IRB.CreateICmpUGE(Fn, End));
+
+ PHINode *NewFnPhi =
+ IRB.CreatePHI(Fn0->getType(), 2, "msandr.indirect_target");
+
+ Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+ cast<Instruction>(NotInThisModule),
+ /* Unreachable */ false, MS.ColdCallWeights);
+
+ IRB.SetInsertPoint(CheckTerm);
+ // Slow path: call wrapper function to possibly transform the call
+ // target.
+ Value *NewFn = IRB.CreateBitCast(
+ IRB.CreateCall(MS.IndirectCallWrapperFn, Fn), Fn0->getType());
+
+ NewFnPhi->addIncoming(Fn0, B);
+ NewFnPhi->addIncoming(NewFn, dyn_cast<Instruction>(NewFn)->getParent());
+ CS.setCalledFunction(NewFnPhi);
+ } else {
+ Value *NewFn = IRB.CreateBitCast(
+ IRB.CreateCall(MS.IndirectCallWrapperFn, Fn), Fn0->getType());
+ CS.setCalledFunction(NewFn);
+ }
+ }
+ }
+
/// \brief Add MemorySanitizer instrumentation to a function.
bool runOnFunction() {
MS.initializeCallbacks(*F.getParent());
@@ -572,6 +691,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Insert shadow value checks.
materializeChecks();
+ // Wrap indirect calls.
+ materializeIndirectCalls();
+
return true;
}
@@ -835,20 +957,63 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Remember the place where a shadow check should be inserted.
///
/// This location will be later instrumented with a check that will print a
- /// UMR warning in runtime if the value is not fully defined.
- void insertCheck(Value *Val, Instruction *OrigIns) {
- assert(Val);
+ /// UMR warning in runtime if the shadow value is not 0.
+ void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
+ assert(Shadow);
if (!InsertChecks) return;
- Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
- if (!Shadow) return;
#ifndef NDEBUG
Type *ShadowTy = Shadow->getType();
assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
"Can only insert checks for integer and vector shadow types");
#endif
- Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
InstrumentationList.push_back(
- ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+ ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+ }
+
+ /// \brief Remember the place where a shadow check should be inserted.
+ ///
+ /// This location will be later instrumented with a check that will print a
+ /// UMR warning in runtime if the value is not fully defined.
+ void insertShadowCheck(Value *Val, Instruction *OrigIns) {
+ assert(Val);
+ Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
+ if (!Shadow) return;
+ Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
+ insertShadowCheck(Shadow, Origin, OrigIns);
+ }
+
+ AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
+ switch (a) {
+ case NotAtomic:
+ return NotAtomic;
+ case Unordered:
+ case Monotonic:
+ case Release:
+ return Release;
+ case Acquire:
+ case AcquireRelease:
+ return AcquireRelease;
+ case SequentiallyConsistent:
+ return SequentiallyConsistent;
+ }
+ llvm_unreachable("Unknown ordering");
+ }
+
+ AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
+ switch (a) {
+ case NotAtomic:
+ return NotAtomic;
+ case Unordered:
+ case Monotonic:
+ case Acquire:
+ return Acquire;
+ case Release:
+ case AcquireRelease:
+ return AcquireRelease;
+ case SequentiallyConsistent:
+ return SequentiallyConsistent;
+ }
+ llvm_unreachable("Unknown ordering");
}
// ------------------- Visitors.
@@ -859,7 +1024,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// Optionally, checks that the load address is fully defined.
void visitLoadInst(LoadInst &I) {
assert(I.getType()->isSized() && "Load type must have size");
- IRBuilder<> IRB(&I);
+ IRBuilder<> IRB(I.getNextNode());
Type *ShadowTy = getShadowTy(&I);
Value *Addr = I.getPointerOperand();
if (LoadShadow) {
@@ -871,7 +1036,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
if (ClCheckAccessAddress)
- insertCheck(I.getPointerOperand(), &I);
+ insertShadowCheck(I.getPointerOperand(), &I);
+
+ if (I.isAtomic())
+ I.setOrdering(addAcquireOrdering(I.getOrdering()));
if (MS.TrackOrigins) {
if (LoadShadow) {
@@ -892,9 +1060,40 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
StoreList.push_back(&I);
}
+ void handleCASOrRMW(Instruction &I) {
+ assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
+
+ IRBuilder<> IRB(&I);
+ Value *Addr = I.getOperand(0);
+ Value *ShadowPtr = getShadowPtr(Addr, I.getType(), IRB);
+
+ if (ClCheckAccessAddress)
+ insertShadowCheck(Addr, &I);
+
+ // Only test the conditional argument of cmpxchg instruction.
+ // The other argument can potentially be uninitialized, but we can not
+ // detect this situation reliably without possible false positives.
+ if (isa<AtomicCmpXchgInst>(I))
+ insertShadowCheck(I.getOperand(1), &I);
+
+ IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
+
+ setShadow(&I, getCleanShadow(&I));
+ }
+
+ void visitAtomicRMWInst(AtomicRMWInst &I) {
+ handleCASOrRMW(I);
+ I.setOrdering(addReleaseOrdering(I.getOrdering()));
+ }
+
+ void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
+ handleCASOrRMW(I);
+ I.setOrdering(addReleaseOrdering(I.getOrdering()));
+ }
+
// Vector manipulation.
void visitExtractElementInst(ExtractElementInst &I) {
- insertCheck(I.getOperand(1), &I);
+ insertShadowCheck(I.getOperand(1), &I);
IRBuilder<> IRB(&I);
setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
"_msprop"));
@@ -902,7 +1101,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
void visitInsertElementInst(InsertElementInst &I) {
- insertCheck(I.getOperand(2), &I);
+ insertShadowCheck(I.getOperand(2), &I);
IRBuilder<> IRB(&I);
setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
I.getOperand(2), "_msprop"));
@@ -910,7 +1109,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
void visitShuffleVectorInst(ShuffleVectorInst &I) {
- insertCheck(I.getOperand(2), &I);
+ insertShadowCheck(I.getOperand(2), &I);
IRBuilder<> IRB(&I);
setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
I.getOperand(2), "_msprop"));
@@ -1109,18 +1308,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Cast between two shadow types, extending or truncating as
/// necessary.
- Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy) {
+ Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
+ bool Signed = false) {
Type *srcTy = V->getType();
if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
- return IRB.CreateIntCast(V, dstTy, false);
+ return IRB.CreateIntCast(V, dstTy, Signed);
if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
dstTy->getVectorNumElements() == srcTy->getVectorNumElements())
- return IRB.CreateIntCast(V, dstTy, false);
+ return IRB.CreateIntCast(V, dstTy, Signed);
size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
Value *V2 =
- IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), false);
+ IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
return IRB.CreateBitCast(V2, dstTy);
// TODO: handle struct types.
}
@@ -1145,7 +1345,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void handleDiv(Instruction &I) {
IRBuilder<> IRB(&I);
// Strict on the second argument.
- insertCheck(I.getOperand(1), &I);
+ insertShadowCheck(I.getOperand(1), &I);
setShadow(&I, getShadow(&I, 0));
setOrigin(&I, getOrigin(&I, 0));
}
@@ -1428,7 +1628,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
if (ClCheckAccessAddress)
- insertCheck(Addr, &I);
+ insertShadowCheck(Addr, &I);
// FIXME: use ClStoreCleanOrigin
// FIXME: factor out common code from materializeStores
@@ -1455,9 +1655,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setShadow(&I, getCleanShadow(&I));
}
-
if (ClCheckAccessAddress)
- insertCheck(Addr, &I);
+ insertShadowCheck(Addr, &I);
if (MS.TrackOrigins) {
if (LoadShadow)
@@ -1554,11 +1753,119 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getOrigin(Op));
}
+ // \brief Instrument vector convert instrinsic.
+ //
+ // This function instruments intrinsics like cvtsi2ss:
+ // %Out = int_xxx_cvtyyy(%ConvertOp)
+ // or
+ // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
+ // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
+ // number \p Out elements, and (if has 2 arguments) copies the rest of the
+ // elements from \p CopyOp.
+ // In most cases conversion involves floating-point value which may trigger a
+ // hardware exception when not fully initialized. For this reason we require
+ // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
+ // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
+ // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
+ // return a fully initialized value.
+ void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
+ IRBuilder<> IRB(&I);
+ Value *CopyOp, *ConvertOp;
+
+ switch (I.getNumArgOperands()) {
+ case 2:
+ CopyOp = I.getArgOperand(0);
+ ConvertOp = I.getArgOperand(1);
+ break;
+ case 1:
+ ConvertOp = I.getArgOperand(0);
+ CopyOp = NULL;
+ break;
+ default:
+ llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
+ }
+
+ // The first *NumUsedElements* elements of ConvertOp are converted to the
+ // same number of output elements. The rest of the output is copied from
+ // CopyOp, or (if not available) filled with zeroes.
+ // Combine shadow for elements of ConvertOp that are used in this operation,
+ // and insert a check.
+ // FIXME: consider propagating shadow of ConvertOp, at least in the case of
+ // int->any conversion.
+ Value *ConvertShadow = getShadow(ConvertOp);
+ Value *AggShadow = 0;
+ if (ConvertOp->getType()->isVectorTy()) {
+ AggShadow = IRB.CreateExtractElement(
+ ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+ for (int i = 1; i < NumUsedElements; ++i) {
+ Value *MoreShadow = IRB.CreateExtractElement(
+ ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
+ AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
+ }
+ } else {
+ AggShadow = ConvertShadow;
+ }
+ assert(AggShadow->getType()->isIntegerTy());
+ insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
+
+ // Build result shadow by zero-filling parts of CopyOp shadow that come from
+ // ConvertOp.
+ if (CopyOp) {
+ assert(CopyOp->getType() == I.getType());
+ assert(CopyOp->getType()->isVectorTy());
+ Value *ResultShadow = getShadow(CopyOp);
+ Type *EltTy = ResultShadow->getType()->getVectorElementType();
+ for (int i = 0; i < NumUsedElements; ++i) {
+ ResultShadow = IRB.CreateInsertElement(
+ ResultShadow, ConstantInt::getNullValue(EltTy),
+ ConstantInt::get(IRB.getInt32Ty(), i));
+ }
+ setShadow(&I, ResultShadow);
+ setOrigin(&I, getOrigin(CopyOp));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ }
+ }
+
void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
case llvm::Intrinsic::bswap:
handleBswap(I);
break;
+ case llvm::Intrinsic::x86_avx512_cvtsd2usi64:
+ case llvm::Intrinsic::x86_avx512_cvtsd2usi:
+ case llvm::Intrinsic::x86_avx512_cvtss2usi64:
+ case llvm::Intrinsic::x86_avx512_cvtss2usi:
+ case llvm::Intrinsic::x86_avx512_cvttss2usi64:
+ case llvm::Intrinsic::x86_avx512_cvttss2usi:
+ case llvm::Intrinsic::x86_avx512_cvttsd2usi64:
+ case llvm::Intrinsic::x86_avx512_cvttsd2usi:
+ case llvm::Intrinsic::x86_avx512_cvtusi2sd:
+ case llvm::Intrinsic::x86_avx512_cvtusi2ss:
+ case llvm::Intrinsic::x86_avx512_cvtusi642sd:
+ case llvm::Intrinsic::x86_avx512_cvtusi642ss:
+ case llvm::Intrinsic::x86_sse2_cvtsd2si64:
+ case llvm::Intrinsic::x86_sse2_cvtsd2si:
+ case llvm::Intrinsic::x86_sse2_cvtsd2ss:
+ case llvm::Intrinsic::x86_sse2_cvtsi2sd:
+ case llvm::Intrinsic::x86_sse2_cvtsi642sd:
+ case llvm::Intrinsic::x86_sse2_cvtss2sd:
+ case llvm::Intrinsic::x86_sse2_cvttsd2si64:
+ case llvm::Intrinsic::x86_sse2_cvttsd2si:
+ case llvm::Intrinsic::x86_sse_cvtsi2ss:
+ case llvm::Intrinsic::x86_sse_cvtsi642ss:
+ case llvm::Intrinsic::x86_sse_cvtss2si64:
+ case llvm::Intrinsic::x86_sse_cvtss2si:
+ case llvm::Intrinsic::x86_sse_cvttss2si64:
+ case llvm::Intrinsic::x86_sse_cvttss2si:
+ handleVectorConvertIntrinsic(I, 1);
+ break;
+ case llvm::Intrinsic::x86_sse2_cvtdq2pd:
+ case llvm::Intrinsic::x86_sse2_cvtps2pd:
+ case llvm::Intrinsic::x86_sse_cvtps2pi:
+ case llvm::Intrinsic::x86_sse_cvttps2pi:
+ handleVectorConvertIntrinsic(I, 2);
+ break;
default:
if (!handleUnknownIntrinsic(I))
visitInstruction(I);
@@ -1604,6 +1911,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
IRBuilder<> IRB(&I);
+
+ if (MS.WrapIndirectCalls && !CS.getCalledFunction())
+ IndirectCallList.push_back(CS);
+
unsigned ArgOffset = 0;
DEBUG(dbgs() << " CallSite: " << I << "\n");
for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
@@ -1647,7 +1958,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << " done with call args\n");
FunctionType *FT =
- cast<FunctionType>(CS.getCalledValue()->getType()-> getContainedType(0));
+ cast<FunctionType>(CS.getCalledValue()->getType()->getContainedType(0));
if (FT->isVarArg()) {
VAHelper->visitCallSite(CS, IRB);
}
@@ -1686,12 +1997,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitReturnInst(ReturnInst &I) {
IRBuilder<> IRB(&I);
- if (Value *RetVal = I.getReturnValue()) {
- // Set the shadow for the RetVal.
+ Value *RetVal = I.getReturnValue();
+ if (!RetVal) return;
+ Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
+ if (CheckReturnValue) {
+ insertShadowCheck(RetVal, &I);
+ Value *Shadow = getCleanShadow(RetVal);
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+ } else {
Value *Shadow = getShadow(RetVal);
- Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
- DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n");
IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+ // FIXME: make it conditional if ClStoreCleanOrigin==0
if (MS.TrackOrigins)
IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
}
@@ -1734,18 +2050,34 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Descr =
createPrivateNonConstGlobalForString(*F.getParent(),
StackDescription.str());
- IRB.CreateCall3(MS.MsanSetAllocaOriginFn,
+
+ IRB.CreateCall4(MS.MsanSetAllocaOrigin4Fn,
IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
ConstantInt::get(MS.IntptrTy, Size),
- IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()));
+ IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(&F, MS.IntptrTy));
}
}
void visitSelectInst(SelectInst& I) {
IRBuilder<> IRB(&I);
- setShadow(&I, IRB.CreateSelect(I.getCondition(),
- getShadow(I.getTrueValue()), getShadow(I.getFalseValue()),
- "_msprop"));
+ // a = select b, c, d
+ Value *S = IRB.CreateSelect(I.getCondition(), getShadow(I.getTrueValue()),
+ getShadow(I.getFalseValue()));
+ if (I.getType()->isAggregateType()) {
+ // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
+ // an extra "select". This results in much more compact IR.
+ // Sa = select Sb, poisoned, (select b, Sc, Sd)
+ S = IRB.CreateSelect(getShadow(I.getCondition()),
+ getPoisonedShadow(getShadowTy(I.getType())), S,
+ "_msprop_select_agg");
+ } else {
+ // Sa = (sext Sb) | (select b, Sc, Sd)
+ S = IRB.CreateOr(S, CreateShadowCast(IRB, getShadow(I.getCondition()),
+ S->getType(), true),
+ "_msprop_select");
+ }
+ setShadow(&I, S);
if (MS.TrackOrigins) {
// Origins are always i32, so any vector conditions must be flattened.
// FIXME: consider tracking vector origins for app vectors?
@@ -1780,7 +2112,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
DEBUG(dbgs() << " ResShadow: " << *ResShadow << "\n");
setShadow(&I, ResShadow);
- setOrigin(&I, getCleanOrigin());
+ setOriginForNaryOp(I);
}
void visitInsertValueInst(InsertValueInst &I) {
@@ -1793,7 +2125,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
DEBUG(dbgs() << " Res: " << *Res << "\n");
setShadow(&I, Res);
- setOrigin(&I, getCleanOrigin());
+ setOriginForNaryOp(I);
}
void dumpInst(Instruction &I) {
@@ -1816,7 +2148,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
dumpInst(I);
DEBUG(dbgs() << "DEFAULT: " << I << "\n");
for (size_t i = 0, n = I.getNumOperands(); i < n; i++)
- insertCheck(I.getOperand(i), &I);
+ insertShadowCheck(I.getOperand(i), &I);
setShadow(&I, getCleanShadow(&I));
setOrigin(&I, getCleanOrigin());
}
@@ -1970,8 +2302,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
Value *OverflowArgAreaShadowPtr =
MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
- Value *SrcPtr =
- getShadowPtrForVAArgument(VAArgTLSCopy, IRB, AMD64FpEndOffset);
+ Value *SrcPtr = IRB.CreateConstGEP1_32(VAArgTLSCopy, AMD64FpEndOffset);
IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
}
}
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
deleted file mode 100644
index b45aef6..0000000
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments the specified program with counters for edge profiling.
-// Edge profiling can give a reasonable approximation of the hot paths through a
-// program, and is used for a wide variety of program transformations.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-optimal-edge-profiling"
-#include "llvm/Transforms/Instrumentation.h"
-#include "MaximumSpanningTree.h"
-#include "ProfilingUtils.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-using namespace llvm;
-
-STATISTIC(NumEdgesInserted, "The # of edges inserted.");
-
-namespace {
- class OptimalEdgeProfiler : public ModulePass {
- bool runOnModule(Module &M);
- public:
- static char ID; // Pass identification, replacement for typeid
- OptimalEdgeProfiler() : ModulePass(ID) {
- initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(ProfileEstimatorPassID);
- AU.addRequired<ProfileInfo>();
- }
-
- virtual const char *getPassName() const {
- return "Optimal Edge Profiler";
- }
- };
-}
-
-char OptimalEdgeProfiler::ID = 0;
-INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
- "Insert optimal instrumentation for edge profiling",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
-INITIALIZE_AG_DEPENDENCY(ProfileInfo)
-INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
- "Insert optimal instrumentation for edge profiling",
- false, false)
-
-ModulePass *llvm::createOptimalEdgeProfilerPass() {
- return new OptimalEdgeProfiler();
-}
-
-inline static void printEdgeCounter(ProfileInfo::Edge e,
- BasicBlock* b,
- unsigned i) {
- DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \
- << ((b)?(b)->getName():"0") << " (# " << (i) << ")\n");
-}
-
-bool OptimalEdgeProfiler::runOnModule(Module &M) {
- Function *Main = M.getFunction("main");
- if (Main == 0) {
- errs() << "WARNING: cannot insert edge profiling into a module"
- << " with no main function!\n";
- return false; // No main, no instrumentation!
- }
-
- // NumEdges counts all the edges that may be instrumented. Later on its
- // decided which edges to actually instrument, to achieve optimal profiling.
- // For the entry block a virtual edge (0,entry) is reserved, for each block
- // with no successors an edge (BB,0) is reserved. These edges are necessary
- // to calculate a truly optimal maximum spanning tree and thus an optimal
- // instrumentation.
- unsigned NumEdges = 0;
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- // Reserve space for (0,entry) edge.
- ++NumEdges;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- // Keep track of which blocks need to be instrumented. We don't want to
- // instrument blocks that are added as the result of breaking critical
- // edges!
- if (BB->getTerminator()->getNumSuccessors() == 0) {
- // Reserve space for (BB,0) edge.
- ++NumEdges;
- } else {
- NumEdges += BB->getTerminator()->getNumSuccessors();
- }
- }
- }
-
- // In the profiling output a counter for each edge is reserved, but only few
- // are used. This is done to be able to read back in the profile without
- // calulating the maximum spanning tree again, instead each edge counter that
- // is not used is initialised with -1 to signal that this edge counter has to
- // be calculated from other edge counters on reading the profile info back
- // in.
-
- Type *Int32 = Type::getInt32Ty(M.getContext());
- ArrayType *ATy = ArrayType::get(Int32, NumEdges);
- GlobalVariable *Counters =
- new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
- Constant::getNullValue(ATy), "OptEdgeProfCounters");
- NumEdgesInserted = 0;
-
- std::vector<Constant*> Initializer(NumEdges);
- Constant *Zero = ConstantInt::get(Int32, 0);
- Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
-
- // Instrument all of the edges not in MST...
- unsigned i = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Working on " << F->getName() << "\n");
-
- // Calculate a Maximum Spanning Tree with the edge weights determined by
- // ProfileEstimator. ProfileEstimator also assign weights to the virtual
- // edges (0,entry) and (BB,0) (for blocks with no successors) and this
- // edges also participate in the maximum spanning tree calculation.
- // The third parameter of MaximumSpanningTree() has the effect that not the
- // actual MST is returned but the edges _not_ in the MST.
-
- ProfileInfo::EdgeWeights ECs =
- getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
- std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
- MaximumSpanningTree<BasicBlock> MST(EdgeVector);
- std::stable_sort(MST.begin(), MST.end());
-
- // Check if (0,entry) not in the MST. If not, instrument edge
- // (IncrementCounterInBlock()) and set the counter initially to zero, if
- // the edge is in the MST the counter is initialised to -1.
-
- BasicBlock *entry = &(F->getEntryBlock());
- ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry);
- if (!std::binary_search(MST.begin(), MST.end(), edge)) {
- printEdgeCounter(edge, entry, i);
- IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
- Initializer[i++] = (Zero);
- } else{
- Initializer[i++] = (Uncounted);
- }
-
- // InsertedBlocks contains all blocks that were inserted for splitting an
- // edge, this blocks do not have to be instrumented.
- DenseSet<BasicBlock*> InsertedBlocks;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- // Check if block was not inserted and thus does not have to be
- // instrumented.
- if (InsertedBlocks.count(BB)) continue;
-
- // Okay, we have to add a counter of each outgoing edge not in MST. If
- // the outgoing edge is not critical don't split it, just insert the
- // counter in the source or destination of the edge. Also, if the block
- // has no successors, the virtual edge (BB,0) is processed.
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumSuccessors() == 0) {
- ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0);
- if (!std::binary_search(MST.begin(), MST.end(), edge)) {
- printEdgeCounter(edge, BB, i);
- IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
- Initializer[i++] = (Zero);
- } else{
- Initializer[i++] = (Uncounted);
- }
- }
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- BasicBlock *Succ = TI->getSuccessor(s);
- ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ);
- if (!std::binary_search(MST.begin(), MST.end(), edge)) {
-
- // If the edge is critical, split it.
- bool wasInserted = SplitCriticalEdge(TI, s, this);
- Succ = TI->getSuccessor(s);
- if (wasInserted)
- InsertedBlocks.insert(Succ);
-
- // Okay, we are guaranteed that the edge is no longer critical. If
- // we only have a single successor, insert the counter in this block,
- // otherwise insert it in the successor block.
- if (TI->getNumSuccessors() == 1) {
- // Insert counter at the start of the block
- printEdgeCounter(edge, BB, i);
- IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
- } else {
- // Insert counter at the start of the block
- printEdgeCounter(edge, Succ, i);
- IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
- }
- Initializer[i++] = (Zero);
- } else {
- Initializer[i++] = (Uncounted);
- }
- }
- }
- }
-
- // Check if the number of edges counted at first was the number of edges we
- // considered for instrumentation.
- assert(i == NumEdges && "the number of edges in counting array is wrong");
-
- // Assign the now completely defined initialiser to the array.
- Constant *init = ConstantArray::get(ATy, Initializer);
- Counters->setInitializer(init);
-
- // Add the initialization call to main.
- InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters);
- return true;
-}
-
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
deleted file mode 100644
index 7de7326..0000000
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ /dev/null
@@ -1,1424 +0,0 @@
-//===- PathProfiling.cpp - Inserts counters for path profiling ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments functions for Ball-Larus path profiling. Ball-Larus
-// profiling converts the CFG into a DAG by replacing backedges with edges
-// from entry to the start block and from the end block to exit. The paths
-// along the new DAG are enumrated, i.e. each path is given a path number.
-// Edges are instrumented to increment the path number register, such that the
-// path number register will equal the path number of the path taken at the
-// exit.
-//
-// This file defines classes for building a CFG for use with different stages
-// in the Ball-Larus path profiling instrumentation [Ball96]. The
-// requirements are formatting the llvm CFG into the Ball-Larus DAG, path
-// numbering, finding a spanning tree, moving increments from the spanning
-// tree to chords.
-//
-// Terms:
-// DAG - Directed Acyclic Graph.
-// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges
-// removed in the following manner. For every backedge
-// v->w, insert edge ENTRY->w and edge v->EXIT.
-// Path Number - The number corresponding to a specific path through a
-// Ball-Larus DAG.
-// Spanning Tree - A subgraph, S, is a spanning tree if S covers all
-// vertices and is a tree.
-// Chord - An edge not in the spanning tree.
-//
-// [Ball96]
-// T. Ball and J. R. Larus. "Efficient Path Profiling."
-// International Symposium on Microarchitecture, pages 46-57, 1996.
-// http://portal.acm.org/citation.cfm?id=243857
-//
-// [Ball94]
-// Thomas Ball. "Efficiently Counting Program Events with Support for
-// On-line queries."
-// ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5,
-// September 1994, Pages 1399-1410.
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-path-profiling"
-
-#include "llvm/Transforms/Instrumentation.h"
-#include "ProfilingUtils.h"
-#include "llvm/Analysis/PathNumbering.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/TypeBuilder.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <vector>
-
-#define HASH_THRESHHOLD 100000
-
-using namespace llvm;
-
-namespace {
-class BLInstrumentationNode;
-class BLInstrumentationEdge;
-class BLInstrumentationDag;
-
-// ---------------------------------------------------------------------------
-// BLInstrumentationNode extends BallLarusNode with member used by the
-// instrumentation algortihms.
-// ---------------------------------------------------------------------------
-class BLInstrumentationNode : public BallLarusNode {
-public:
- // Creates a new BLInstrumentationNode from a BasicBlock.
- BLInstrumentationNode(BasicBlock* BB);
-
- // Get/sets the Value corresponding to the pathNumber register,
- // constant or phinode. Used by the instrumentation code to remember
- // path number Values.
- Value* getStartingPathNumber();
- void setStartingPathNumber(Value* pathNumber);
-
- Value* getEndingPathNumber();
- void setEndingPathNumber(Value* pathNumber);
-
- // Get/set the PHINode Instruction for this node.
- PHINode* getPathPHI();
- void setPathPHI(PHINode* pathPHI);
-
-private:
-
- Value* _startingPathNumber; // The Value for the current pathNumber.
- Value* _endingPathNumber; // The Value for the current pathNumber.
- PHINode* _pathPHI; // The PHINode for current pathNumber.
-};
-
-// --------------------------------------------------------------------------
-// BLInstrumentationEdge extends BallLarusEdge with data about the
-// instrumentation that will end up on each edge.
-// --------------------------------------------------------------------------
-class BLInstrumentationEdge : public BallLarusEdge {
-public:
- BLInstrumentationEdge(BLInstrumentationNode* source,
- BLInstrumentationNode* target);
-
- // Sets the target node of this edge. Required to split edges.
- void setTarget(BallLarusNode* node);
-
- // Get/set whether edge is in the spanning tree.
- bool isInSpanningTree() const;
- void setIsInSpanningTree(bool isInSpanningTree);
-
- // Get/ set whether this edge will be instrumented with a path number
- // initialization.
- bool isInitialization() const;
- void setIsInitialization(bool isInitialization);
-
- // Get/set whether this edge will be instrumented with a path counter
- // increment. Notice this is incrementing the path counter
- // corresponding to the path number register. The path number
- // increment is determined by getIncrement().
- bool isCounterIncrement() const;
- void setIsCounterIncrement(bool isCounterIncrement);
-
- // Get/set the path number increment that this edge will be instrumented
- // with. This is distinct from the path counter increment and the
- // weight. The counter increment counts the number of executions of
- // some path, whereas the path number keeps track of which path number
- // the program is on.
- long getIncrement() const;
- void setIncrement(long increment);
-
- // Get/set whether the edge has been instrumented.
- bool hasInstrumentation();
- void setHasInstrumentation(bool hasInstrumentation);
-
- // Returns the successor number of this edge in the source.
- unsigned getSuccessorNumber();
-
-private:
- // The increment that the code will be instrumented with.
- long long _increment;
-
- // Whether this edge is in the spanning tree.
- bool _isInSpanningTree;
-
- // Whether this edge is an initialiation of the path number.
- bool _isInitialization;
-
- // Whether this edge is a path counter increment.
- bool _isCounterIncrement;
-
- // Whether this edge has been instrumented.
- bool _hasInstrumentation;
-};
-
-// ---------------------------------------------------------------------------
-// BLInstrumentationDag extends BallLarusDag with algorithms that
-// determine where instrumentation should be placed.
-// ---------------------------------------------------------------------------
-class BLInstrumentationDag : public BallLarusDag {
-public:
- BLInstrumentationDag(Function &F);
-
- // Returns the Exit->Root edge. This edge is required for creating
- // directed cycles in the algorithm for moving instrumentation off of
- // the spanning tree
- BallLarusEdge* getExitRootEdge();
-
- // Returns an array of phony edges which mark those nodes
- // with function calls
- BLEdgeVector getCallPhonyEdges();
-
- // Gets/sets the path counter array
- GlobalVariable* getCounterArray();
- void setCounterArray(GlobalVariable* c);
-
- // Calculates the increments for the chords, thereby removing
- // instrumentation from the spanning tree edges. Implementation is based
- // on the algorithm in Figure 4 of [Ball94]
- void calculateChordIncrements();
-
- // Updates the state when an edge has been split
- void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock);
-
- // Calculates a spanning tree of the DAG ignoring cycles. Whichever
- // edges are in the spanning tree will not be instrumented, but this
- // implementation does not try to minimize the instrumentation overhead
- // by trying to find hot edges.
- void calculateSpanningTree();
-
- // Pushes initialization further down in order to group the first
- // increment and initialization.
- void pushInitialization();
-
- // Pushes the path counter increments up in order to group the last path
- // number increment.
- void pushCounters();
-
- // Removes phony edges from the successor list of the source, and the
- // predecessor list of the target.
- void unlinkPhony();
-
- // Generate dot graph for the function
- void generateDotGraph();
-
-protected:
- // BLInstrumentationDag creates BLInstrumentationNode objects in this
- // method overriding the creation of BallLarusNode objects.
- //
- // Allows subclasses to determine which type of Node is created.
- // Override this method to produce subclasses of BallLarusNode if
- // necessary.
- virtual BallLarusNode* createNode(BasicBlock* BB);
-
- // BLInstrumentationDag create BLInstrumentationEdges.
- //
- // Allows subclasses to determine which type of Edge is created.
- // Override this method to produce subclasses of BallLarusEdge if
- // necessary. Parameters source and target will have been created by
- // createNode and can be cast to the subclass of BallLarusNode*
- // returned by createNode.
- virtual BallLarusEdge* createEdge(
- BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber);
-
-private:
- BLEdgeVector _treeEdges; // All edges in the spanning tree.
- BLEdgeVector _chordEdges; // All edges not in the spanning tree.
- GlobalVariable* _counterArray; // Array to store path counters
-
- // Removes the edge from the appropriate predecessor and successor lists.
- void unlinkEdge(BallLarusEdge* edge);
-
- // Makes an edge part of the spanning tree.
- void makeEdgeSpanning(BLInstrumentationEdge* edge);
-
- // Pushes initialization and calls itself recursively.
- void pushInitializationFromEdge(BLInstrumentationEdge* edge);
-
- // Pushes path counter increments up recursively.
- void pushCountersFromEdge(BLInstrumentationEdge* edge);
-
- // Depth first algorithm for determining the chord increments.f
- void calculateChordIncrementsDfs(
- long weight, BallLarusNode* v, BallLarusEdge* e);
-
- // Determines the relative direction of two edges.
- int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f);
-};
-
-// ---------------------------------------------------------------------------
-// PathProfiler is a module pass which instruments path profiling instructions
-// ---------------------------------------------------------------------------
-class PathProfiler : public ModulePass {
-private:
- // Current context for multi threading support.
- LLVMContext* Context;
-
- // Which function are we currently instrumenting
- unsigned currentFunctionNumber;
-
- // The function prototype in the profiling runtime for incrementing a
- // single path counter in a hash table.
- Constant* llvmIncrementHashFunction;
- Constant* llvmDecrementHashFunction;
-
- // Instruments each function with path profiling. 'main' is instrumented
- // with code to save the profile to disk.
- bool runOnModule(Module &M);
-
- // Analyzes the function for Ball-Larus path profiling, and inserts code.
- void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M);
-
- // Creates an increment constant representing incr.
- ConstantInt* createIncrementConstant(long incr, int bitsize);
-
- // Creates an increment constant representing the value in
- // edge->getIncrement().
- ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge);
-
- // Finds the insertion point after pathNumber in block. PathNumber may
- // be NULL.
- BasicBlock::iterator getInsertionPoint(
- BasicBlock* block, Value* pathNumber);
-
- // Inserts source's pathNumber Value* into target. Target may or may not
- // have multiple predecessors, and may or may not have its phiNode
- // initalized.
- void pushValueIntoNode(
- BLInstrumentationNode* source, BLInstrumentationNode* target);
-
- // Inserts source's pathNumber Value* into the appropriate slot of
- // target's phiNode.
- void pushValueIntoPHI(
- BLInstrumentationNode* target, BLInstrumentationNode* source);
-
- // The Value* in node, oldVal, is updated with a Value* correspodning to
- // oldVal + addition.
- void insertNumberIncrement(BLInstrumentationNode* node, Value* addition,
- bool atBeginning);
-
- // Creates a counter increment in the given node. The Value* in node is
- // taken as the index into a hash table.
- void insertCounterIncrement(
- Value* incValue,
- BasicBlock::iterator insertPoint,
- BLInstrumentationDag* dag,
- bool increment = true);
-
- // A PHINode is created in the node, and its values initialized to -1U.
- void preparePHI(BLInstrumentationNode* node);
-
- // Inserts instrumentation for the given edge
- //
- // Pre: The edge's source node has pathNumber set if edge is non zero
- // path number increment.
- //
- // Post: Edge's target node has a pathNumber set to the path number Value
- // corresponding to the value of the path register after edge's
- // execution.
- void insertInstrumentationStartingAt(
- BLInstrumentationEdge* edge,
- BLInstrumentationDag* dag);
-
- // If this edge is a critical edge, then inserts a node at this edge.
- // This edge becomes the first edge, and a new BallLarusEdge is created.
- bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag);
-
- // Inserts instrumentation according to the marked edges in dag. Phony
- // edges must be unlinked from the DAG, but accessible from the
- // backedges. Dag must have initializations, path number increments, and
- // counter increments present.
- //
- // Counter storage is created here.
- void insertInstrumentation( BLInstrumentationDag& dag, Module &M);
-
-public:
- static char ID; // Pass identification, replacement for typeid
- PathProfiler() : ModulePass(ID) {
- initializePathProfilerPass(*PassRegistry::getPassRegistry());
- }
-
- virtual const char *getPassName() const {
- return "Path Profiler";
- }
-};
-} // end anonymous namespace
-
-// Should we print the dot-graphs
-static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden,
- cl::desc("Output the path profiling DAG for each function."));
-
-// Register the path profiler as a pass
-char PathProfiler::ID = 0;
-INITIALIZE_PASS(PathProfiler, "insert-path-profiling",
- "Insert instrumentation for Ball-Larus path profiling",
- false, false)
-
-ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); }
-
-namespace llvm {
- class PathProfilingFunctionTable {};
-
- // Type for global array storing references to hashes or arrays
- template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable,
- xcompile> {
- public:
- static StructType *get(LLVMContext& C) {
- return( StructType::get(
- TypeBuilder<types::i<32>, xcompile>::get(C), // type
- TypeBuilder<types::i<32>, xcompile>::get(C), // array size
- TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
- NULL));
- }
- };
-
- typedef TypeBuilder<PathProfilingFunctionTable, true>
- ftEntryTypeBuilder;
-
- // BallLarusEdge << operator overloading
- raw_ostream& operator<<(raw_ostream& os,
- const BLInstrumentationEdge& edge)
- LLVM_ATTRIBUTE_USED;
- raw_ostream& operator<<(raw_ostream& os,
- const BLInstrumentationEdge& edge) {
- os << "[" << edge.getSource()->getName() << " -> "
- << edge.getTarget()->getName() << "] init: "
- << (edge.isInitialization() ? "yes" : "no")
- << " incr:" << edge.getIncrement() << " cinc: "
- << (edge.isCounterIncrement() ? "yes" : "no");
- return(os);
- }
-}
-
-// Creates a new BLInstrumentationNode from a BasicBlock.
-BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) :
- BallLarusNode(BB),
- _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {}
-
-// Constructor for BLInstrumentationEdge.
-BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source,
- BLInstrumentationNode* target)
- : BallLarusEdge(source, target, 0),
- _increment(0), _isInSpanningTree(false), _isInitialization(false),
- _isCounterIncrement(false), _hasInstrumentation(false) {}
-
-// Sets the target node of this edge. Required to split edges.
-void BLInstrumentationEdge::setTarget(BallLarusNode* node) {
- _target = node;
-}
-
-// Returns whether this edge is in the spanning tree.
-bool BLInstrumentationEdge::isInSpanningTree() const {
- return(_isInSpanningTree);
-}
-
-// Sets whether this edge is in the spanning tree.
-void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) {
- _isInSpanningTree = isInSpanningTree;
-}
-
-// Returns whether this edge will be instrumented with a path number
-// initialization.
-bool BLInstrumentationEdge::isInitialization() const {
- return(_isInitialization);
-}
-
-// Sets whether this edge will be instrumented with a path number
-// initialization.
-void BLInstrumentationEdge::setIsInitialization(bool isInitialization) {
- _isInitialization = isInitialization;
-}
-
-// Returns whether this edge will be instrumented with a path counter
-// increment. Notice this is incrementing the path counter
-// corresponding to the path number register. The path number
-// increment is determined by getIncrement().
-bool BLInstrumentationEdge::isCounterIncrement() const {
- return(_isCounterIncrement);
-}
-
-// Sets whether this edge will be instrumented with a path counter
-// increment.
-void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) {
- _isCounterIncrement = isCounterIncrement;
-}
-
-// Gets the path number increment that this edge will be instrumented
-// with. This is distinct from the path counter increment and the
-// weight. The counter increment is counts the number of executions of
-// some path, whereas the path number keeps track of which path number
-// the program is on.
-long BLInstrumentationEdge::getIncrement() const {
- return(_increment);
-}
-
-// Set whether this edge will be instrumented with a path number
-// increment.
-void BLInstrumentationEdge::setIncrement(long increment) {
- _increment = increment;
-}
-
-// True iff the edge has already been instrumented.
-bool BLInstrumentationEdge::hasInstrumentation() {
- return(_hasInstrumentation);
-}
-
-// Set whether this edge has been instrumented.
-void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) {
- _hasInstrumentation = hasInstrumentation;
-}
-
-// Returns the successor number of this edge in the source.
-unsigned BLInstrumentationEdge::getSuccessorNumber() {
- BallLarusNode* sourceNode = getSource();
- BallLarusNode* targetNode = getTarget();
- BasicBlock* source = sourceNode->getBlock();
- BasicBlock* target = targetNode->getBlock();
-
- if(source == NULL || target == NULL)
- return(0);
-
- TerminatorInst* terminator = source->getTerminator();
-
- unsigned i;
- for(i=0; i < terminator->getNumSuccessors(); i++) {
- if(terminator->getSuccessor(i) == target)
- break;
- }
-
- return(i);
-}
-
-// BLInstrumentationDag constructor initializes a DAG for the given Function.
-BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F),
- _counterArray(0) {
-}
-
-// Returns the Exit->Root edge. This edge is required for creating
-// directed cycles in the algorithm for moving instrumentation off of
-// the spanning tree
-BallLarusEdge* BLInstrumentationDag::getExitRootEdge() {
- BLEdgeIterator erEdge = getExit()->succBegin();
- return(*erEdge);
-}
-
-BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () {
- BLEdgeVector callEdges;
-
- for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
- edge != end; edge++ ) {
- if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY )
- callEdges.push_back(*edge);
- }
-
- return callEdges;
-}
-
-// Gets the path counter array
-GlobalVariable* BLInstrumentationDag::getCounterArray() {
- return _counterArray;
-}
-
-void BLInstrumentationDag::setCounterArray(GlobalVariable* c) {
- _counterArray = c;
-}
-
-// Calculates the increment for the chords, thereby removing
-// instrumentation from the spanning tree edges. Implementation is based on
-// the algorithm in Figure 4 of [Ball94]
-void BLInstrumentationDag::calculateChordIncrements() {
- calculateChordIncrementsDfs(0, getRoot(), NULL);
-
- BLInstrumentationEdge* chord;
- for(BLEdgeIterator chordEdge = _chordEdges.begin(),
- end = _chordEdges.end(); chordEdge != end; chordEdge++) {
- chord = (BLInstrumentationEdge*) *chordEdge;
- chord->setIncrement(chord->getIncrement() + chord->getWeight());
- }
-}
-
-// Updates the state when an edge has been split
-void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge,
- BasicBlock* newBlock) {
- BallLarusNode* oldTarget = formerEdge->getTarget();
- BallLarusNode* newNode = addNode(newBlock);
- formerEdge->setTarget(newNode);
- newNode->addPredEdge(formerEdge);
-
- DEBUG(dbgs() << " Edge split: " << *formerEdge << "\n");
-
- oldTarget->removePredEdge(formerEdge);
- BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0);
-
- if( formerEdge->getType() == BallLarusEdge::BACKEDGE ||
- formerEdge->getType() == BallLarusEdge::SPLITEDGE) {
- newEdge->setType(formerEdge->getType());
- newEdge->setPhonyRoot(formerEdge->getPhonyRoot());
- newEdge->setPhonyExit(formerEdge->getPhonyExit());
- formerEdge->setType(BallLarusEdge::NORMAL);
- formerEdge->setPhonyRoot(NULL);
- formerEdge->setPhonyExit(NULL);
- }
-}
-
-// Calculates a spanning tree of the DAG ignoring cycles. Whichever
-// edges are in the spanning tree will not be instrumented, but this
-// implementation does not try to minimize the instrumentation overhead
-// by trying to find hot edges.
-void BLInstrumentationDag::calculateSpanningTree() {
- std::stack<BallLarusNode*> dfsStack;
-
- for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end();
- nodeIt != end; nodeIt++) {
- (*nodeIt)->setColor(BallLarusNode::WHITE);
- }
-
- dfsStack.push(getRoot());
- while(dfsStack.size() > 0) {
- BallLarusNode* node = dfsStack.top();
- dfsStack.pop();
-
- if(node->getColor() == BallLarusNode::WHITE)
- continue;
-
- BallLarusNode* nextNode;
- bool forward = true;
- BLEdgeIterator succEnd = node->succEnd();
-
- node->setColor(BallLarusNode::WHITE);
- // first iterate over successors then predecessors
- for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd();
- edge != predEnd; edge++) {
- if(edge == succEnd) {
- edge = node->predBegin();
- forward = false;
- }
-
- // Ignore split edges
- if ((*edge)->getType() == BallLarusEdge::SPLITEDGE)
- continue;
-
- nextNode = forward? (*edge)->getTarget(): (*edge)->getSource();
- if(nextNode->getColor() != BallLarusNode::WHITE) {
- nextNode->setColor(BallLarusNode::WHITE);
- makeEdgeSpanning((BLInstrumentationEdge*)(*edge));
- }
- }
- }
-
- for(BLEdgeIterator edge = _edges.begin(), end = _edges.end();
- edge != end; edge++) {
- BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge);
- // safe since createEdge is overriden
- if(!instEdge->isInSpanningTree() && (*edge)->getType()
- != BallLarusEdge::SPLITEDGE)
- _chordEdges.push_back(instEdge);
- }
-}
-
-// Pushes initialization further down in order to group the first
-// increment and initialization.
-void BLInstrumentationDag::pushInitialization() {
- BLInstrumentationEdge* exitRootEdge =
- (BLInstrumentationEdge*) getExitRootEdge();
- exitRootEdge->setIsInitialization(true);
- pushInitializationFromEdge(exitRootEdge);
-}
-
-// Pushes the path counter increments up in order to group the last path
-// number increment.
-void BLInstrumentationDag::pushCounters() {
- BLInstrumentationEdge* exitRootEdge =
- (BLInstrumentationEdge*) getExitRootEdge();
- exitRootEdge->setIsCounterIncrement(true);
- pushCountersFromEdge(exitRootEdge);
-}
-
-// Removes phony edges from the successor list of the source, and the
-// predecessor list of the target.
-void BLInstrumentationDag::unlinkPhony() {
- BallLarusEdge* edge;
-
- for(BLEdgeIterator next = _edges.begin(),
- end = _edges.end(); next != end; next++) {
- edge = (*next);
-
- if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
- edge->getType() == BallLarusEdge::SPLITEDGE_PHONY ||
- edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) {
- unlinkEdge(edge);
- }
- }
-}
-
-// Generate a .dot graph to represent the DAG and pathNumbers
-void BLInstrumentationDag::generateDotGraph() {
- std::string errorInfo;
- std::string functionName = getFunction().getName().str();
- std::string filename = "pathdag." + functionName + ".dot";
-
- DEBUG (dbgs() << "Writing '" << filename << "'...\n");
- raw_fd_ostream dotFile(filename.c_str(), errorInfo);
-
- if (!errorInfo.empty()) {
- errs() << "Error opening '" << filename.c_str() <<"' for writing!";
- errs() << "\n";
- return;
- }
-
- dotFile << "digraph " << functionName << " {\n";
-
- for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
- edge != end; edge++) {
- std::string sourceName = (*edge)->getSource()->getName();
- std::string targetName = (*edge)->getTarget()->getName();
-
- dotFile << "\t\"" << sourceName.c_str() << "\" -> \""
- << targetName.c_str() << "\" ";
-
- long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
-
- switch( (*edge)->getType() ) {
- case BallLarusEdge::NORMAL:
- dotFile << "[label=" << inc << "] [color=black];\n";
- break;
-
- case BallLarusEdge::BACKEDGE:
- dotFile << "[color=cyan];\n";
- break;
-
- case BallLarusEdge::BACKEDGE_PHONY:
- dotFile << "[label=" << inc
- << "] [color=blue];\n";
- break;
-
- case BallLarusEdge::SPLITEDGE:
- dotFile << "[color=violet];\n";
- break;
-
- case BallLarusEdge::SPLITEDGE_PHONY:
- dotFile << "[label=" << inc << "] [color=red];\n";
- break;
-
- case BallLarusEdge::CALLEDGE_PHONY:
- dotFile << "[label=" << inc << "] [color=green];\n";
- break;
- }
- }
-
- dotFile << "}\n";
-}
-
-// Allows subclasses to determine which type of Node is created.
-// Override this method to produce subclasses of BallLarusNode if
-// necessary. The destructor of BallLarusDag will call free on each pointer
-// created.
-BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) {
- return( new BLInstrumentationNode(BB) );
-}
-
-// Allows subclasses to determine which type of Edge is created.
-// Override this method to produce subclasses of BallLarusEdge if
-// necessary. The destructor of BallLarusDag will call free on each pointer
-// created.
-BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source,
- BallLarusNode* target, unsigned edgeNumber) {
- // One can cast from BallLarusNode to BLInstrumentationNode since createNode
- // is overriden to produce BLInstrumentationNode.
- return( new BLInstrumentationEdge((BLInstrumentationNode*)source,
- (BLInstrumentationNode*)target) );
-}
-
-// Sets the Value corresponding to the pathNumber register, constant,
-// or phinode. Used by the instrumentation code to remember path
-// number Values.
-Value* BLInstrumentationNode::getStartingPathNumber(){
- return(_startingPathNumber);
-}
-
-// Sets the Value of the pathNumber. Used by the instrumentation code.
-void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
- DEBUG(dbgs() << " SPN-" << getName() << " <-- " << (pathNumber ?
- pathNumber->getName() :
- "unused") << "\n");
- _startingPathNumber = pathNumber;
-}
-
-Value* BLInstrumentationNode::getEndingPathNumber(){
- return(_endingPathNumber);
-}
-
-void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
- DEBUG(dbgs() << " EPN-" << getName() << " <-- "
- << (pathNumber ? pathNumber->getName() : "unused") << "\n");
- _endingPathNumber = pathNumber;
-}
-
-// Get the PHINode Instruction for this node. Used by instrumentation
-// code.
-PHINode* BLInstrumentationNode::getPathPHI() {
- return(_pathPHI);
-}
-
-// Set the PHINode Instruction for this node. Used by instrumentation
-// code.
-void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) {
- _pathPHI = pathPHI;
-}
-
-// Removes the edge from the appropriate predecessor and successor
-// lists.
-void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) {
- if(edge == getExitRootEdge())
- DEBUG(dbgs() << " Removing exit->root edge\n");
-
- edge->getSource()->removeSuccEdge(edge);
- edge->getTarget()->removePredEdge(edge);
-}
-
-// Makes an edge part of the spanning tree.
-void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) {
- edge->setIsInSpanningTree(true);
- _treeEdges.push_back(edge);
-}
-
-// Pushes initialization and calls itself recursively.
-void BLInstrumentationDag::pushInitializationFromEdge(
- BLInstrumentationEdge* edge) {
- BallLarusNode* target;
-
- target = edge->getTarget();
- if( target->getNumberPredEdges() > 1 || target == getExit() ) {
- return;
- } else {
- for(BLEdgeIterator next = target->succBegin(),
- end = target->succEnd(); next != end; next++) {
- BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next;
-
- // Skip split edges
- if (intoEdge->getType() == BallLarusEdge::SPLITEDGE)
- continue;
-
- intoEdge->setIncrement(intoEdge->getIncrement() +
- edge->getIncrement());
- intoEdge->setIsInitialization(true);
- pushInitializationFromEdge(intoEdge);
- }
-
- edge->setIncrement(0);
- edge->setIsInitialization(false);
- }
-}
-
-// Pushes path counter increments up recursively.
-void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) {
- BallLarusNode* source;
-
- source = edge->getSource();
- if(source->getNumberSuccEdges() > 1 || source == getRoot()
- || edge->isInitialization()) {
- return;
- } else {
- for(BLEdgeIterator previous = source->predBegin(),
- end = source->predEnd(); previous != end; previous++) {
- BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous;
-
- // Skip split edges
- if (fromEdge->getType() == BallLarusEdge::SPLITEDGE)
- continue;
-
- fromEdge->setIncrement(fromEdge->getIncrement() +
- edge->getIncrement());
- fromEdge->setIsCounterIncrement(true);
- pushCountersFromEdge(fromEdge);
- }
-
- edge->setIncrement(0);
- edge->setIsCounterIncrement(false);
- }
-}
-
-// Depth first algorithm for determining the chord increments.
-void BLInstrumentationDag::calculateChordIncrementsDfs(long weight,
- BallLarusNode* v, BallLarusEdge* e) {
- BLInstrumentationEdge* f;
-
- for(BLEdgeIterator treeEdge = _treeEdges.begin(),
- end = _treeEdges.end(); treeEdge != end; treeEdge++) {
- f = (BLInstrumentationEdge*) *treeEdge;
- if(e != f && v == f->getTarget()) {
- calculateChordIncrementsDfs(
- calculateChordIncrementsDir(e,f)*(weight) +
- f->getWeight(), f->getSource(), f);
- }
- if(e != f && v == f->getSource()) {
- calculateChordIncrementsDfs(
- calculateChordIncrementsDir(e,f)*(weight) +
- f->getWeight(), f->getTarget(), f);
- }
- }
-
- for(BLEdgeIterator chordEdge = _chordEdges.begin(),
- end = _chordEdges.end(); chordEdge != end; chordEdge++) {
- f = (BLInstrumentationEdge*) *chordEdge;
- if(v == f->getSource() || v == f->getTarget()) {
- f->setIncrement(f->getIncrement() +
- calculateChordIncrementsDir(e,f)*weight);
- }
- }
-}
-
-// Determines the relative direction of two edges.
-int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e,
- BallLarusEdge* f) {
- if( e == NULL)
- return(1);
- else if(e->getSource() == f->getTarget()
- || e->getTarget() == f->getSource())
- return(1);
-
- return(-1);
-}
-
-// Creates an increment constant representing incr.
-ConstantInt* PathProfiler::createIncrementConstant(long incr,
- int bitsize) {
- return(ConstantInt::get(IntegerType::get(*Context, 32), incr));
-}
-
-// Creates an increment constant representing the value in
-// edge->getIncrement().
-ConstantInt* PathProfiler::createIncrementConstant(
- BLInstrumentationEdge* edge) {
- return(createIncrementConstant(edge->getIncrement(), 32));
-}
-
-// Finds the insertion point after pathNumber in block. PathNumber may
-// be NULL.
-BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
- pathNumber) {
- if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
- || (((Instruction*)(pathNumber))->getParent()) != block) {
- return(block->getFirstInsertionPt());
- } else {
- Instruction* pathNumberInst = (Instruction*) (pathNumber);
- BasicBlock::iterator insertPoint;
- BasicBlock::iterator end = block->end();
-
- for(insertPoint = block->begin();
- insertPoint != end; insertPoint++) {
- Instruction* insertInst = &(*insertPoint);
-
- if(insertInst == pathNumberInst)
- return(++insertPoint);
- }
-
- return(insertPoint);
- }
-}
-
-// A PHINode is created in the node, and its values initialized to -1U.
-void PathProfiler::preparePHI(BLInstrumentationNode* node) {
- BasicBlock* block = node->getBlock();
- BasicBlock::iterator insertPoint = block->getFirstInsertionPt();
- pred_iterator PB = pred_begin(node->getBlock()),
- PE = pred_end(node->getBlock());
- PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context),
- std::distance(PB, PE), "pathNumber",
- insertPoint );
- node->setPathPHI(phi);
- node->setStartingPathNumber(phi);
- node->setEndingPathNumber(phi);
-
- for(pred_iterator predIt = PB; predIt != PE; predIt++) {
- BasicBlock* pred = (*predIt);
-
- if(pred != NULL)
- phi->addIncoming(createIncrementConstant((long)-1, 32), pred);
- }
-}
-
-// Inserts source's pathNumber Value* into target. Target may or may not
-// have multiple predecessors, and may or may not have its phiNode
-// initalized.
-void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source,
- BLInstrumentationNode* target) {
- if(target->getBlock() == NULL)
- return;
-
-
- if(target->getNumberPredEdges() <= 1) {
- assert(target->getStartingPathNumber() == NULL &&
- "Target already has path number");
- target->setStartingPathNumber(source->getEndingPathNumber());
- target->setEndingPathNumber(source->getEndingPathNumber());
- DEBUG(dbgs() << " Passing path number"
- << (source->getEndingPathNumber() ? "" : " (null)")
- << " value through.\n");
- } else {
- if(target->getPathPHI() == NULL) {
- DEBUG(dbgs() << " Initializing PHI node for block '"
- << target->getName() << "'\n");
- preparePHI(target);
- }
- pushValueIntoPHI(target, source);
- DEBUG(dbgs() << " Passing number value into PHI for block '"
- << target->getName() << "'\n");
- }
-}
-
-// Inserts source's pathNumber Value* into the appropriate slot of
-// target's phiNode.
-void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target,
- BLInstrumentationNode* source) {
- PHINode* phi = target->getPathPHI();
- assert(phi != NULL && " Tried to push value into node with PHI, but node"
- " actually had no PHI.");
- phi->removeIncomingValue(source->getBlock(), false);
- phi->addIncoming(source->getEndingPathNumber(), source->getBlock());
-}
-
-// The Value* in node, oldVal, is updated with a Value* correspodning to
-// oldVal + addition.
-void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
- Value* addition, bool atBeginning) {
- BasicBlock* block = node->getBlock();
- assert(node->getStartingPathNumber() != NULL);
- assert(node->getEndingPathNumber() != NULL);
-
- BasicBlock::iterator insertPoint;
-
- if( atBeginning )
- insertPoint = block->getFirstInsertionPt();
- else
- insertPoint = block->getTerminator();
-
- DEBUG(errs() << " Creating addition instruction.\n");
- Value* newpn = BinaryOperator::Create(Instruction::Add,
- node->getStartingPathNumber(),
- addition, "pathNumber", insertPoint);
-
- node->setEndingPathNumber(newpn);
-
- if( atBeginning )
- node->setStartingPathNumber(newpn);
-}
-
-// Creates a counter increment in the given node. The Value* in node is
-// taken as the index into an array or hash table. The hash table access
-// is a call to the runtime.
-void PathProfiler::insertCounterIncrement(Value* incValue,
- BasicBlock::iterator insertPoint,
- BLInstrumentationDag* dag,
- bool increment) {
- // Counter increment for array
- if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) {
- // Get pointer to the array location
- std::vector<Value*> gepIndices(2);
- gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
- gepIndices[1] = incValue;
-
- GetElementPtrInst* pcPointer =
- GetElementPtrInst::Create(dag->getCounterArray(), gepIndices,
- "counterInc", insertPoint);
-
- // Load from the array - call it oldPC
- LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint);
-
- // Test to see whether adding 1 will overflow the counter
- ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc,
- createIncrementConstant(0xffffffff, 32),
- "isMax");
-
- // Select increment for the path counter based on overflow
- SelectInst* inc =
- SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32),
- createIncrementConstant(0,32),
- "pathInc", insertPoint);
-
- // newPc = oldPc + inc
- BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add,
- oldPc, inc, "newPC",
- insertPoint);
-
- // Store back in to the array
- new StoreInst(newPc, pcPointer, insertPoint);
- } else { // Counter increment for hash
- std::vector<Value*> args(2);
- args[0] = ConstantInt::get(Type::getInt32Ty(*Context),
- currentFunctionNumber);
- args[1] = incValue;
-
- CallInst::Create(
- increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
- args, "", insertPoint);
- }
-}
-
-// Inserts instrumentation for the given edge
-//
-// Pre: The edge's source node has pathNumber set if edge is non zero
-// path number increment.
-//
-// Post: Edge's target node has a pathNumber set to the path number Value
-// corresponding to the value of the path register after edge's
-// execution.
-//
-// FIXME: This should be reworked so it's not recursive.
-void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
- BLInstrumentationDag* dag) {
- // Mark the edge as instrumented
- edge->setHasInstrumentation(true);
- DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n");
-
- // create a new node for this edge's instrumentation
- splitCritical(edge, dag);
-
- BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource();
- BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget();
- BLInstrumentationNode* instrumentNode;
- BLInstrumentationNode* nextSourceNode;
-
- bool atBeginning = false;
-
- // Source node has only 1 successor so any information can be simply
- // inserted in to it without splitting
- if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) {
- DEBUG(dbgs() << " Potential instructions to be placed in: "
- << sourceNode->getName() << " (at end)\n");
- instrumentNode = sourceNode;
- nextSourceNode = targetNode; // ... since we never made any new nodes
- }
-
- // The target node only has one predecessor, so we can safely insert edge
- // instrumentation into it. If there was splitting, it must have been
- // successful.
- else if( targetNode->getNumberPredEdges() == 1 ) {
- DEBUG(dbgs() << " Potential instructions to be placed in: "
- << targetNode->getName() << " (at beginning)\n");
- pushValueIntoNode(sourceNode, targetNode);
- instrumentNode = targetNode;
- nextSourceNode = NULL; // ... otherwise we'll just keep splitting
- atBeginning = true;
- }
-
- // Somehow, splitting must have failed.
- else {
- errs() << "Instrumenting could not split a critical edge.\n";
- DEBUG(dbgs() << " Couldn't split edge " << (*edge) << ".\n");
- return;
- }
-
- // Insert instrumentation if this is a back or split edge
- if( edge->getType() == BallLarusEdge::BACKEDGE ||
- edge->getType() == BallLarusEdge::SPLITEDGE ) {
- BLInstrumentationEdge* top =
- (BLInstrumentationEdge*) edge->getPhonyRoot();
- BLInstrumentationEdge* bottom =
- (BLInstrumentationEdge*) edge->getPhonyExit();
-
- assert( top->isInitialization() && " Top phony edge did not"
- " contain a path number initialization.");
- assert( bottom->isCounterIncrement() && " Bottom phony edge"
- " did not contain a path counter increment.");
-
- // split edge has yet to be initialized
- if( !instrumentNode->getEndingPathNumber() ) {
- instrumentNode->setStartingPathNumber(createIncrementConstant(0,32));
- instrumentNode->setEndingPathNumber(createIncrementConstant(0,32));
- }
-
- BasicBlock::iterator insertPoint = atBeginning ?
- instrumentNode->getBlock()->getFirstInsertionPt() :
- instrumentNode->getBlock()->getTerminator();
-
- // add information from the bottom edge, if it exists
- if( bottom->getIncrement() ) {
- Value* newpn =
- BinaryOperator::Create(Instruction::Add,
- instrumentNode->getStartingPathNumber(),
- createIncrementConstant(bottom),
- "pathNumber", insertPoint);
- instrumentNode->setEndingPathNumber(newpn);
- }
-
- insertCounterIncrement(instrumentNode->getEndingPathNumber(),
- insertPoint, dag);
-
- if( atBeginning )
- instrumentNode->setStartingPathNumber(createIncrementConstant(top));
-
- instrumentNode->setEndingPathNumber(createIncrementConstant(top));
-
- // Check for path counter increments
- if( top->isCounterIncrement() ) {
- insertCounterIncrement(instrumentNode->getEndingPathNumber(),
- instrumentNode->getBlock()->getTerminator(),dag);
- instrumentNode->setEndingPathNumber(0);
- }
- }
-
- // Insert instrumentation if this is a normal edge
- else {
- BasicBlock::iterator insertPoint = atBeginning ?
- instrumentNode->getBlock()->getFirstInsertionPt() :
- instrumentNode->getBlock()->getTerminator();
-
- if( edge->isInitialization() ) { // initialize path number
- instrumentNode->setEndingPathNumber(createIncrementConstant(edge));
- } else if( edge->getIncrement() ) {// increment path number
- Value* newpn =
- BinaryOperator::Create(Instruction::Add,
- instrumentNode->getStartingPathNumber(),
- createIncrementConstant(edge),
- "pathNumber", insertPoint);
- instrumentNode->setEndingPathNumber(newpn);
-
- if( atBeginning )
- instrumentNode->setStartingPathNumber(newpn);
- }
-
- // Check for path counter increments
- if( edge->isCounterIncrement() ) {
- insertCounterIncrement(instrumentNode->getEndingPathNumber(),
- insertPoint, dag);
- instrumentNode->setEndingPathNumber(0);
- }
- }
-
- // Push it along
- if (nextSourceNode && instrumentNode->getEndingPathNumber())
- pushValueIntoNode(instrumentNode, nextSourceNode);
-
- // Add all the successors
- for( BLEdgeIterator next = targetNode->succBegin(),
- end = targetNode->succEnd(); next != end; next++ ) {
- // So long as it is un-instrumented, add it to the list
- if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() )
- insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag);
- else
- DEBUG(dbgs() << " Edge " << *(BLInstrumentationEdge*)(*next)
- << " already instrumented.\n");
- }
-}
-
-// Inserts instrumentation according to the marked edges in dag. Phony edges
-// must be unlinked from the DAG, but accessible from the backedges. Dag
-// must have initializations, path number increments, and counter increments
-// present.
-//
-// Counter storage is created here.
-void PathProfiler::insertInstrumentation(
- BLInstrumentationDag& dag, Module &M) {
-
- BLInstrumentationEdge* exitRootEdge =
- (BLInstrumentationEdge*) dag.getExitRootEdge();
- insertInstrumentationStartingAt(exitRootEdge, &dag);
-
- // Iterate through each call edge and apply the appropriate hash increment
- // and decrement functions
- BLEdgeVector callEdges = dag.getCallPhonyEdges();
- for( BLEdgeIterator edge = callEdges.begin(),
- end = callEdges.end(); edge != end; edge++ ) {
- BLInstrumentationNode* node =
- (BLInstrumentationNode*)(*edge)->getSource();
- BasicBlock::iterator insertPoint = node->getBlock()->getFirstInsertionPt();
-
- // Find the first function call
- while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
- insertPoint++;
-
- DEBUG(dbgs() << "\nInstrumenting method call block '"
- << node->getBlock()->getName() << "'\n");
- DEBUG(dbgs() << " Path number initialized: "
- << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
-
- Value* newpn;
- if( node->getStartingPathNumber() ) {
- long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
- if ( inc )
- newpn = BinaryOperator::Create(Instruction::Add,
- node->getStartingPathNumber(),
- createIncrementConstant(inc,32),
- "pathNumber", insertPoint);
- else
- newpn = node->getStartingPathNumber();
- } else {
- newpn = (Value*)createIncrementConstant(
- ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32);
- }
-
- insertCounterIncrement(newpn, insertPoint, &dag);
- insertCounterIncrement(newpn, node->getBlock()->getTerminator(),
- &dag, false);
- }
-}
-
-// Entry point of the module
-void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
- Function &F, Module &M) {
- // Build DAG from CFG
- BLInstrumentationDag dag = BLInstrumentationDag(F);
- dag.init();
-
- // give each path a unique integer value
- dag.calculatePathNumbers();
-
- // modify path increments to increase the efficiency
- // of instrumentation
- dag.calculateSpanningTree();
- dag.calculateChordIncrements();
- dag.pushInitialization();
- dag.pushCounters();
- dag.unlinkPhony();
-
- // potentially generate .dot graph for the dag
- if (DotPathDag)
- dag.generateDotGraph ();
-
- // Should we store the information in an array or hash
- if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) {
- Type* t = ArrayType::get(Type::getInt32Ty(*Context),
- dag.getNumberOfPaths());
-
- dag.setCounterArray(new GlobalVariable(M, t, false,
- GlobalValue::InternalLinkage,
- Constant::getNullValue(t), ""));
- }
-
- insertInstrumentation(dag, M);
-
- // Add to global function reference table
- unsigned type;
- Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
-
- if( dag.getNumberOfPaths() <= HASH_THRESHHOLD )
- type = ProfilingArray;
- else
- type = ProfilingHash;
-
- std::vector<Constant*> entryArray(3);
- entryArray[0] = createIncrementConstant(type,32);
- entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32);
- entryArray[2] = dag.getCounterArray() ?
- ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) :
- Constant::getNullValue(voidPtr);
-
- StructType* at = ftEntryTypeBuilder::get(*Context);
- ConstantStruct* functionEntry =
- (ConstantStruct*)ConstantStruct::get(at, entryArray);
- ftInit.push_back(functionEntry);
-}
-
-// Output the bitcode if we want to observe instrumentation changess
-#define PRINT_MODULE dbgs() << \
- "\n\n============= MODULE BEGIN ===============\n" << M << \
- "\n============== MODULE END ================\n"
-
-bool PathProfiler::runOnModule(Module &M) {
- Context = &M.getContext();
-
- DEBUG(dbgs()
- << "****************************************\n"
- << "****************************************\n"
- << "** **\n"
- << "** PATH PROFILING INSTRUMENTATION **\n"
- << "** **\n"
- << "****************************************\n"
- << "****************************************\n");
-
- // No main, no instrumentation!
- Function *Main = M.getFunction("main");
-
- // Using fortran? ... this kind of works
- if (!Main)
- Main = M.getFunction("MAIN__");
-
- if (!Main) {
- errs() << "WARNING: cannot insert path profiling into a module"
- << " with no main function!\n";
- return false;
- }
-
- llvmIncrementHashFunction = M.getOrInsertFunction(
- "llvm_increment_path_count",
- Type::getVoidTy(*Context), // return type
- Type::getInt32Ty(*Context), // function number
- Type::getInt32Ty(*Context), // path number
- NULL );
-
- llvmDecrementHashFunction = M.getOrInsertFunction(
- "llvm_decrement_path_count",
- Type::getVoidTy(*Context), // return type
- Type::getInt32Ty(*Context), // function number
- Type::getInt32Ty(*Context), // path number
- NULL );
-
- std::vector<Constant*> ftInit;
- unsigned functionNumber = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
- if (F->isDeclaration())
- continue;
-
- DEBUG(dbgs() << "Function: " << F->getName() << "\n");
- functionNumber++;
-
- // set function number
- currentFunctionNumber = functionNumber;
- runOnFunction(ftInit, *F, M);
- }
-
- Type *t = ftEntryTypeBuilder::get(*Context);
- ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
- Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit);
-
- DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n");
-
- GlobalVariable* functionTable =
- new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage,
- ftInitConstant, "functionPathTable");
- Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
- InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable,
- PointerType::getUnqual(eltType));
-
- DEBUG(PRINT_MODULE);
-
- return true;
-}
-
-// If this edge is a critical edge, then inserts a node at this edge.
-// This edge becomes the first edge, and a new BallLarusEdge is created.
-// Returns true if the edge was split
-bool PathProfiler::splitCritical(BLInstrumentationEdge* edge,
- BLInstrumentationDag* dag) {
- unsigned succNum = edge->getSuccessorNumber();
- BallLarusNode* sourceNode = edge->getSource();
- BallLarusNode* targetNode = edge->getTarget();
- BasicBlock* sourceBlock = sourceNode->getBlock();
- BasicBlock* targetBlock = targetNode->getBlock();
-
- if(sourceBlock == NULL || targetBlock == NULL
- || sourceNode->getNumberSuccEdges() <= 1
- || targetNode->getNumberPredEdges() == 1 ) {
- return(false);
- }
-
- TerminatorInst* terminator = sourceBlock->getTerminator();
-
- if( SplitCriticalEdge(terminator, succNum, this, false)) {
- BasicBlock* newBlock = terminator->getSuccessor(succNum);
- dag->splitUpdate(edge, newBlock);
- return(true);
- } else
- return(false);
-}
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
deleted file mode 100644
index 4b3de6d..0000000
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-//===- ProfilingUtils.cpp - Helper functions shared by profilers ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a few helper functions which are used by profile
-// instrumentation code to instrument the code. This allows the profiler pass
-// to worry about *what* to insert, and these functions take care of *how* to do
-// it.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ProfilingUtils.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-
-void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
- GlobalValue *Array,
- PointerType *arrayType) {
- LLVMContext &Context = MainFn->getContext();
- Type *ArgVTy =
- PointerType::getUnqual(Type::getInt8PtrTy(Context));
- PointerType *UIntPtr = arrayType ? arrayType :
- Type::getInt32PtrTy(Context);
- Module &M = *MainFn->getParent();
- Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
- Type::getInt32Ty(Context),
- ArgVTy, UIntPtr,
- Type::getInt32Ty(Context),
- (Type *)0);
-
- // This could force argc and argv into programs that wouldn't otherwise have
- // them, but instead we just pass null values in.
- std::vector<Value*> Args(4);
- Args[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Args[1] = Constant::getNullValue(ArgVTy);
-
- // Skip over any allocas in the entry block.
- BasicBlock *Entry = MainFn->begin();
- BasicBlock::iterator InsertPos = Entry->begin();
- while (isa<AllocaInst>(InsertPos)) ++InsertPos;
-
- std::vector<Constant*> GEPIndices(2,
- Constant::getNullValue(Type::getInt32Ty(Context)));
- unsigned NumElements = 0;
- if (Array) {
- Args[2] = ConstantExpr::getGetElementPtr(Array, GEPIndices);
- NumElements =
- cast<ArrayType>(Array->getType()->getElementType())->getNumElements();
- } else {
- // If this profiling instrumentation doesn't have a constant array, just
- // pass null.
- Args[2] = ConstantPointerNull::get(UIntPtr);
- }
- Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
-
- CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos);
-
- // If argc or argv are not available in main, just pass null values in.
- Function::arg_iterator AI;
- switch (MainFn->arg_size()) {
- default:
- case 2:
- AI = MainFn->arg_begin(); ++AI;
- if (AI->getType() != ArgVTy) {
- Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
- false);
- InitCall->setArgOperand(1,
- CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
- } else {
- InitCall->setArgOperand(1, AI);
- }
- /* FALL THROUGH */
-
- case 1:
- AI = MainFn->arg_begin();
- // If the program looked at argc, have it look at the return value of the
- // init call instead.
- if (!AI->getType()->isIntegerTy(32)) {
- Instruction::CastOps opcode;
- if (!AI->use_empty()) {
- opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
- AI->replaceAllUsesWith(
- CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
- }
- opcode = CastInst::getCastOpcode(AI, true,
- Type::getInt32Ty(Context), true);
- InitCall->setArgOperand(0,
- CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
- "argc.cast", InitCall));
- } else {
- AI->replaceAllUsesWith(InitCall);
- InitCall->setArgOperand(0, AI);
- }
-
- case 0: break;
- }
-}
-
-void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
- GlobalValue *CounterArray, bool beginning) {
- // Insert the increment after any alloca or PHI instructions...
- BasicBlock::iterator InsertPos = beginning ? BB->getFirstInsertionPt() :
- BB->getTerminator();
- while (isa<AllocaInst>(InsertPos))
- ++InsertPos;
-
- LLVMContext &Context = BB->getContext();
-
- // Create the getelementptr constant expression
- std::vector<Constant*> Indices(2);
- Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
- Constant *ElementPtr =
- ConstantExpr::getGetElementPtr(CounterArray, Indices);
-
- // Load, increment and store the value back.
- Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
- Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
- ConstantInt::get(Type::getInt32Ty(Context), 1),
- "NewFuncCounter", InsertPos);
- new StoreInst(NewVal, ElementPtr, InsertPos);
-}
-
-void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
- // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those
- // types.
- Type *GlobalDtorElems[2] = {
- Type::getInt32Ty(Mod->getContext()),
- FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
- };
- StructType *GlobalDtorElemTy =
- StructType::get(Mod->getContext(), GlobalDtorElems, false);
-
- // Construct the new element we'll be adding.
- Constant *Elem[2] = {
- ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535),
- ConstantExpr::getBitCast(Callee, GlobalDtorElems[1])
- };
-
- // If llvm.global_dtors exists, make a copy of the things in its list and
- // delete it, to replace it with one that has a larger array type.
- std::vector<Constant *> dtors;
- if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) {
- if (ConstantArray *InitList =
- dyn_cast<ConstantArray>(GlobalDtors->getInitializer())) {
- for (unsigned i = 0, e = InitList->getType()->getNumElements();
- i != e; ++i)
- dtors.push_back(cast<Constant>(InitList->getOperand(i)));
- }
- GlobalDtors->eraseFromParent();
- }
-
- // Build up llvm.global_dtors with our new item in it.
- GlobalVariable *GlobalDtors = new GlobalVariable(
- *Mod, ArrayType::get(GlobalDtorElemTy, 1), false,
- GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors");
-
- dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem));
- GlobalDtors->setInitializer(ConstantArray::get(
- cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors));
-}
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.h b/lib/Transforms/Instrumentation/ProfilingUtils.h
deleted file mode 100644
index 09b2217..0000000
--- a/lib/Transforms/Instrumentation/ProfilingUtils.h
+++ /dev/null
@@ -1,36 +0,0 @@
-//===- ProfilingUtils.h - Helper functions shared by profilers --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a few helper functions which are used by profile
-// instrumentation code to instrument the code. This allows the profiler pass
-// to worry about *what* to insert, and these functions take care of *how* to do
-// it.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PROFILINGUTILS_H
-#define PROFILINGUTILS_H
-
-namespace llvm {
- class BasicBlock;
- class Function;
- class GlobalValue;
- class Module;
- class PointerType;
-
- void InsertProfilingInitCall(Function *MainFn, const char *FnName,
- GlobalValue *Arr = 0,
- PointerType *arrayType = 0);
- void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
- GlobalValue *CounterArray,
- bool beginning = true);
- void InsertProfilingShutdownCall(Function *Callee, Module *Mod);
-}
-
-#endif
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index cc971a3..89fb746 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -227,7 +227,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
TD = getAnalysisIfAvailable<DataLayout>();
if (!TD)
return false;
- BL.reset(new SpecialCaseList(BlacklistFile));
+ BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
// Always insert a call to __tsan_init into the module's CTORs.
IRBuilder<> IRB(M.getContext());
@@ -240,12 +240,8 @@ bool ThreadSanitizer::doInitialization(Module &M) {
}
static bool isVtableAccess(Instruction *I) {
- if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) {
- if (Tag->getNumOperands() < 1) return false;
- if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
- if (Tag1->getString() == "vtable pointer") return true;
- }
- }
+ if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa))
+ return Tag->isTBAAVtableAccess();
return false;
}
@@ -362,7 +358,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
// (e.g. variables that do not escape, etc).
// Instrument memory accesses.
- if (ClInstrumentMemoryAccesses)
+ if (ClInstrumentMemoryAccesses && F.hasFnAttribute(Attribute::SanitizeThread))
for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) {
Res |= instrumentLoadOrStore(AllLoadsAndStores[i]);
}
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 6f94a7c..2976df6 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -176,91 +176,6 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
return 0;
}
-/// \brief Test whether the given retainable object pointer escapes.
-///
-/// This differs from regular escape analysis in that a use as an
-/// argument to a call is not considered an escape.
-///
-static bool DoesRetainableObjPtrEscape(const User *Ptr) {
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Target: " << *Ptr << "\n");
-
- // Walk the def-use chains.
- SmallVector<const Value *, 4> Worklist;
- Worklist.push_back(Ptr);
- // If Ptr has any operands add them as well.
- for (User::const_op_iterator I = Ptr->op_begin(), E = Ptr->op_end(); I != E;
- ++I) {
- Worklist.push_back(*I);
- }
-
- // Ensure we do not visit any value twice.
- SmallPtrSet<const Value *, 8> VisitedSet;
-
- do {
- const Value *V = Worklist.pop_back_val();
-
- DEBUG(dbgs() << "Visiting: " << *V << "\n");
-
- for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
- UI != UE; ++UI) {
- const User *UUser = *UI;
-
- DEBUG(dbgs() << "User: " << *UUser << "\n");
-
- // Special - Use by a call (callee or argument) is not considered
- // to be an escape.
- switch (GetBasicInstructionClass(UUser)) {
- case IC_StoreWeak:
- case IC_InitWeak:
- case IC_StoreStrong:
- case IC_Autorelease:
- case IC_AutoreleaseRV: {
- DEBUG(dbgs() << "User copies pointer arguments. Pointer Escapes!\n");
- // These special functions make copies of their pointer arguments.
- return true;
- }
- case IC_IntrinsicUser:
- // Use by the use intrinsic is not an escape.
- continue;
- case IC_User:
- case IC_None:
- // Use by an instruction which copies the value is an escape if the
- // result is an escape.
- if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
- isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
-
- if (VisitedSet.insert(UUser)) {
- DEBUG(dbgs() << "User copies value. Ptr escapes if result escapes."
- " Adding to list.\n");
- Worklist.push_back(UUser);
- } else {
- DEBUG(dbgs() << "Already visited node.\n");
- }
- continue;
- }
- // Use by a load is not an escape.
- if (isa<LoadInst>(UUser))
- continue;
- // Use by a store is not an escape if the use is the address.
- if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
- if (V != SI->getValueOperand())
- continue;
- break;
- default:
- // Regular calls and other stuff are not considered escapes.
- continue;
- }
- // Otherwise, conservatively assume an escape.
- DEBUG(dbgs() << "Assuming ptr escapes.\n");
- return true;
- }
- } while (!Worklist.empty());
-
- // No escapes found.
- DEBUG(dbgs() << "Ptr does not escape.\n");
- return false;
-}
-
/// This is a wrapper around getUnderlyingObjCPtr along the lines of
/// GetUnderlyingObjects except that it returns early when it sees the first
/// alloca.
@@ -517,7 +432,7 @@ namespace {
bool Partial;
/// The current position in the sequence.
- Sequence Seq : 8;
+ unsigned char Seq : 8;
/// Unidirectional information about the current sequence.
RRInfo RRI;
@@ -583,7 +498,7 @@ namespace {
}
Sequence GetSeq() const {
- return Seq;
+ return static_cast<Sequence>(Seq);
}
void ClearSequenceProgress() {
@@ -623,7 +538,8 @@ namespace {
void
PtrState::Merge(const PtrState &Other, bool TopDown) {
- Seq = MergeSeqs(Seq, Other.Seq, TopDown);
+ Seq = MergeSeqs(static_cast<Sequence>(Seq), static_cast<Sequence>(Other.Seq),
+ TopDown);
KnownPositiveRefCount &= Other.KnownPositiveRefCount;
// If we're not in a sequence (anymore), drop all associated state.
@@ -674,7 +590,9 @@ namespace {
SmallVector<BasicBlock *, 2> Succs;
public:
- BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+ static const unsigned OverflowOccurredValue;
+
+ BBState() : TopDownPathCount(0), BottomUpPathCount(0) { }
typedef MapTy::iterator ptr_iterator;
typedef MapTy::const_iterator ptr_const_iterator;
@@ -745,27 +663,31 @@ namespace {
/// Returns true if overflow occured. Returns false if overflow did not
/// occur.
bool GetAllPathCountWithOverflow(unsigned &PathCount) const {
- assert(TopDownPathCount != 0);
- assert(BottomUpPathCount != 0);
+ if (TopDownPathCount == OverflowOccurredValue ||
+ BottomUpPathCount == OverflowOccurredValue)
+ return true;
unsigned long long Product =
(unsigned long long)TopDownPathCount*BottomUpPathCount;
- PathCount = Product;
- // Overflow occured if any of the upper bits of Product are set.
- return Product >> 32;
+ // Overflow occured if any of the upper bits of Product are set or if all
+ // the lower bits of Product are all set.
+ return (Product >> 32) ||
+ ((PathCount = Product) == OverflowOccurredValue);
}
// Specialized CFG utilities.
typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator;
- edge_iterator pred_begin() { return Preds.begin(); }
- edge_iterator pred_end() { return Preds.end(); }
- edge_iterator succ_begin() { return Succs.begin(); }
- edge_iterator succ_end() { return Succs.end(); }
+ edge_iterator pred_begin() const { return Preds.begin(); }
+ edge_iterator pred_end() const { return Preds.end(); }
+ edge_iterator succ_begin() const { return Succs.begin(); }
+ edge_iterator succ_end() const { return Succs.end(); }
void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
bool isExit() const { return Succs.empty(); }
};
+
+ const unsigned BBState::OverflowOccurredValue = 0xffffffff;
}
void BBState::InitFromPred(const BBState &Other) {
@@ -781,13 +703,25 @@ void BBState::InitFromSucc(const BBState &Other) {
/// The top-down traversal uses this to merge information about predecessors to
/// form the initial state for a new block.
void BBState::MergePred(const BBState &Other) {
+ if (TopDownPathCount == OverflowOccurredValue)
+ return;
+
// Other.TopDownPathCount can be 0, in which case it is either dead or a
// loop backedge. Loop backedges are special.
TopDownPathCount += Other.TopDownPathCount;
+ // In order to be consistent, we clear the top down pointers when by adding
+ // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow
+ // has not occured.
+ if (TopDownPathCount == OverflowOccurredValue) {
+ clearTopDownPointers();
+ return;
+ }
+
// Check for overflow. If we have overflow, fall back to conservative
// behavior.
if (TopDownPathCount < Other.TopDownPathCount) {
+ TopDownPathCount = OverflowOccurredValue;
clearTopDownPointers();
return;
}
@@ -813,13 +747,25 @@ void BBState::MergePred(const BBState &Other) {
/// The bottom-up traversal uses this to merge information about successors to
/// form the initial state for a new block.
void BBState::MergeSucc(const BBState &Other) {
+ if (BottomUpPathCount == OverflowOccurredValue)
+ return;
+
// Other.BottomUpPathCount can be 0, in which case it is either dead or a
// loop backedge. Loop backedges are special.
BottomUpPathCount += Other.BottomUpPathCount;
+ // In order to be consistent, we clear the top down pointers when by adding
+ // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow
+ // has not occured.
+ if (BottomUpPathCount == OverflowOccurredValue) {
+ clearBottomUpPointers();
+ return;
+ }
+
// Check for overflow. If we have overflow, fall back to conservative
// behavior.
if (BottomUpPathCount < Other.BottomUpPathCount) {
+ BottomUpPathCount = OverflowOccurredValue;
clearBottomUpPointers();
return;
}
@@ -1158,13 +1104,9 @@ namespace {
unsigned ARCAnnotationProvenanceSourceMDKind;
#endif // ARC_ANNOATIONS
- bool IsRetainBlockOptimizable(const Instruction *Inst);
-
bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
InstructionClass &Class);
- bool OptimizeRetainBlockCall(Function &F, Instruction *RetainBlock,
- InstructionClass &Class);
void OptimizeIndividualCalls(Function &F);
void CheckForCFGHazards(const BasicBlock *BB,
@@ -1253,22 +1195,6 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
}
-bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
- // Without the magic metadata tag, we have to assume this might be an
- // objc_retainBlock call inserted to convert a block pointer to an id,
- // in which case it really is needed.
- if (!Inst->getMetadata(CopyOnEscapeMDKind))
- return false;
-
- // If the pointer "escapes" (not including being used in a call),
- // the copy may be needed.
- if (DoesRetainableObjPtrEscape(Inst))
- return false;
-
- // Otherwise, it's not needed.
- return true;
-}
-
/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
/// not a return value. Or, if it can be paired with an
/// objc_autoreleaseReturnValue, delete the pair and return true.
@@ -1369,41 +1295,6 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
}
-// \brief Attempt to strength reduce objc_retainBlock calls to objc_retain
-// calls.
-//
-// Specifically: If an objc_retainBlock call has the copy_on_escape metadata and
-// does not escape (following the rules of block escaping), strength reduce the
-// objc_retainBlock to an objc_retain.
-//
-// TODO: If an objc_retainBlock call is dominated period by a previous
-// objc_retainBlock call, strength reduce the objc_retainBlock to an
-// objc_retain.
-bool
-ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
- InstructionClass &Class) {
- assert(GetBasicInstructionClass(Inst) == Class);
- assert(IC_RetainBlock == Class);
-
- // If we can not optimize Inst, return false.
- if (!IsRetainBlockOptimizable(Inst))
- return false;
-
- Changed = true;
- ++NumPeeps;
-
- DEBUG(dbgs() << "Strength reduced retainBlock => retain.\n");
- DEBUG(dbgs() << "Old: " << *Inst << "\n");
- CallInst *RetainBlock = cast<CallInst>(Inst);
- Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
- RetainBlock->setCalledFunction(NewDecl);
- // Remove copy_on_escape metadata.
- RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
- Class = IC_Retain;
- DEBUG(dbgs() << "New: " << *Inst << "\n");
- return true;
-}
-
/// Visit each call, one at a time, and make simplifications without doing any
/// additional analysis.
void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
@@ -1480,11 +1371,6 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
}
break;
}
- case IC_RetainBlock:
- // If we strength reduce an objc_retainBlock to an objc_retain, continue
- // onto the objc_retain peephole optimizations. Otherwise break.
- OptimizeRetainBlockCall(F, Inst, Class);
- break;
case IC_RetainRV:
if (OptimizeRetainRVCall(F, Inst))
continue;
@@ -2520,15 +2406,26 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (Jt == Releases.end())
return false;
const RRInfo &NewRetainReleaseRRI = Jt->second;
- assert(NewRetainReleaseRRI.Calls.count(NewRetain));
+
+ // If the release does not have a reference to the retain as well,
+ // something happened which is unaccounted for. Do not do anything.
+ //
+ // This can happen if we catch an additive overflow during path count
+ // merging.
+ if (!NewRetainReleaseRRI.Calls.count(NewRetain))
+ return false;
+
if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
// If we overflow when we compute the path count, don't remove/move
// anything.
const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()];
- unsigned PathCount;
+ unsigned PathCount = BBState::OverflowOccurredValue;
if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
return false;
+ assert(PathCount != BBState::OverflowOccurredValue &&
+ "PathCount at this point can not be "
+ "OverflowOccurredValue.");
OldDelta -= PathCount;
// Merge the ReleaseMetadata and IsTailCallRelease values.
@@ -2558,8 +2455,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
// If we overflow when we compute the path count, don't
// remove/move anything.
const BBState &RIPBBState = BBStates[RIP->getParent()];
+ PathCount = BBState::OverflowOccurredValue;
if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
return false;
+ assert(PathCount != BBState::OverflowOccurredValue &&
+ "PathCount at this point can not be "
+ "OverflowOccurredValue.");
NewDelta -= PathCount;
}
}
@@ -2589,15 +2490,25 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (Jt == Retains.end())
return false;
const RRInfo &NewReleaseRetainRRI = Jt->second;
- assert(NewReleaseRetainRRI.Calls.count(NewRelease));
- if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
+ // If the retain does not have a reference to the release as well,
+ // something happened which is unaccounted for. Do not do anything.
+ //
+ // This can happen if we catch an additive overflow during path count
+ // merging.
+ if (!NewReleaseRetainRRI.Calls.count(NewRelease))
+ return false;
+
+ if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
// If we overflow when we compute the path count, don't remove/move
// anything.
const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()];
- unsigned PathCount;
+ unsigned PathCount = BBState::OverflowOccurredValue;
if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
return false;
+ assert(PathCount != BBState::OverflowOccurredValue &&
+ "PathCount at this point can not be "
+ "OverflowOccurredValue.");
OldDelta += PathCount;
OldCount += PathCount;
@@ -2612,8 +2523,13 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
// If we overflow when we compute the path count, don't
// remove/move anything.
const BBState &RIPBBState = BBStates[RIP->getParent()];
+
+ PathCount = BBState::OverflowOccurredValue;
if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
return false;
+ assert(PathCount != BBState::OverflowOccurredValue &&
+ "PathCount at this point can not be "
+ "OverflowOccurredValue.");
NewDelta += PathCount;
NewCount += PathCount;
}
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
deleted file mode 100644
index e755008..0000000
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a very simple profile guided basic block placement
-// algorithm. The idea is to put frequently executed blocks together at the
-// start of the function, and hopefully increase the number of fall-through
-// conditional branches. If there is no profile information for a particular
-// function, this pass basically orders blocks in depth-first order
-//
-// The algorithm implemented here is basically "Algo1" from "Profile Guided Code
-// Positioning" by Pettis and Hansen, except that it uses basic block counts
-// instead of edge counts. This should be improved in many ways, but is very
-// simple for now.
-//
-// Basically we "place" the entry block, then loop over all successors in a DFO,
-// placing the most frequently executed successor until we run out of blocks. I
-// told you this was _extremely_ simplistic. :) This is also much slower than it
-// could be. When it becomes important, this pass will be rewritten to use a
-// better algorithm, and then we can worry about efficiency.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "block-placement"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include <set>
-using namespace llvm;
-
-STATISTIC(NumMoved, "Number of basic blocks moved");
-
-namespace {
- struct BlockPlacement : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- BlockPlacement() : FunctionPass(ID) {
- initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
- }
-
- virtual bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<ProfileInfo>();
- //AU.addPreserved<ProfileInfo>(); // Does this work?
- }
- private:
- /// PI - The profile information that is guiding us.
- ///
- ProfileInfo *PI;
-
- /// NumMovedBlocks - Every time we move a block, increment this counter.
- ///
- unsigned NumMovedBlocks;
-
- /// PlacedBlocks - Every time we place a block, remember it so we don't get
- /// into infinite loops.
- std::set<BasicBlock*> PlacedBlocks;
-
- /// InsertPos - This an iterator to the next place we want to insert a
- /// block.
- Function::iterator InsertPos;
-
- /// PlaceBlocks - Recursively place the specified blocks and any unplaced
- /// successors.
- void PlaceBlocks(BasicBlock *BB);
- };
-}
-
-char BlockPlacement::ID = 0;
-INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
- "Profile Guided Basic Block Placement", false, false)
-INITIALIZE_AG_DEPENDENCY(ProfileInfo)
-INITIALIZE_PASS_END(BlockPlacement, "block-placement",
- "Profile Guided Basic Block Placement", false, false)
-
-FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
-
-bool BlockPlacement::runOnFunction(Function &F) {
- PI = &getAnalysis<ProfileInfo>();
-
- NumMovedBlocks = 0;
- InsertPos = F.begin();
-
- // Recursively place all blocks.
- PlaceBlocks(F.begin());
-
- PlacedBlocks.clear();
- NumMoved += NumMovedBlocks;
- return NumMovedBlocks != 0;
-}
-
-
-/// PlaceBlocks - Recursively place the specified blocks and any unplaced
-/// successors.
-void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
- assert(!PlacedBlocks.count(BB) && "Already placed this block!");
- PlacedBlocks.insert(BB);
-
- // Place the specified block.
- if (&*InsertPos != BB) {
- // Use splice to move the block into the right place. This avoids having to
- // remove the block from the function then readd it, which causes a bunch of
- // symbol table traffic that is entirely pointless.
- Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList();
- Blocks.splice(InsertPos, Blocks, BB);
-
- ++NumMovedBlocks;
- } else {
- // This block is already in the right place, we don't have to do anything.
- ++InsertPos;
- }
-
- // Keep placing successors until we run out of ones to place. Note that this
- // loop is very inefficient (N^2) for blocks with many successors, like switch
- // statements. FIXME!
- while (1) {
- // Okay, now place any unplaced successors.
- succ_iterator SI = succ_begin(BB), E = succ_end(BB);
-
- // Scan for the first unplaced successor.
- for (; SI != E && PlacedBlocks.count(*SI); ++SI)
- /*empty*/;
- if (SI == E) return; // No more successors to place.
-
- double MaxExecutionCount = PI->getExecutionCount(*SI);
- BasicBlock *MaxSuccessor = *SI;
-
- // Scan for more frequently executed successors
- for (; SI != E; ++SI)
- if (!PlacedBlocks.count(*SI)) {
- double Count = PI->getExecutionCount(*SI);
- if (Count > MaxExecutionCount ||
- // Prefer to not disturb the code.
- (Count == MaxExecutionCount && *SI == &*InsertPos)) {
- MaxExecutionCount = Count;
- MaxSuccessor = *SI;
- }
- }
-
- // Now that we picked the maximally executed successor, place it.
- PlaceBlocks(MaxSuccessor);
- }
-}
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index f5d1db1..626c810 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,6 +1,5 @@
add_llvm_library(LLVMScalarOpts
ADCE.cpp
- BasicBlockPlacement.cpp
CodeGenPrepare.cpp
ConstantProp.cpp
CorrelatedValuePropagation.cpp
@@ -17,12 +16,15 @@ add_llvm_library(LLVMScalarOpts
LoopInstSimplify.cpp
LoopRotation.cpp
LoopStrengthReduce.cpp
+ LoopRerollPass.cpp
LoopUnrollPass.cpp
LoopUnswitch.cpp
LowerAtomic.cpp
MemCpyOptimizer.cpp
+ PartiallyInlineLibCalls.cpp
Reassociate.cpp
Reg2Mem.cpp
+ SampleProfile.cpp
SCCP.cpp
SROA.cpp
Scalar.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 44804a2..007e9b7 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -80,7 +79,6 @@ namespace {
const TargetLowering *TLI;
const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
- ProfileInfo *PFI;
/// CurInstIterator - As we scan instructions optimizing them, this is the
/// next instruction to optimize. Xforms that can invalidate this should
@@ -111,7 +109,6 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
- AU.addPreserved<ProfileInfo>();
AU.addRequired<TargetLibraryInfo>();
}
@@ -151,7 +148,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (TM) TLI = TM->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
- PFI = getAnalysisIfAvailable<ProfileInfo>();
OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize);
@@ -442,10 +438,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
DT->changeImmediateDominator(DestBB, NewIDom);
DT->eraseNode(BB);
}
- if (PFI) {
- PFI->replaceAllUses(BB, DestBB);
- PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
- }
BB->eraseFromParent();
++NumBlocksElim;
@@ -840,10 +832,12 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
}
};
+#ifndef NDEBUG
static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
AM.print(OS);
return OS;
}
+#endif
void ExtAddrMode::print(raw_ostream &OS) const {
bool NeedPlus = false;
@@ -1035,7 +1029,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::IntToPtr:
// This inttoptr is a no-op if the integer type is pointer sized.
if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
- TLI.getPointerTy())
+ TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
return MatchAddr(AddrInst->getOperand(0), Depth);
return false;
case Instruction::BitCast:
@@ -1418,8 +1412,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
Value *Address = User->getOperand(OpNo);
if (!Address->getType()->isPointerTy())
return false;
- Type *AddressAccessTy =
- cast<PointerType>(Address->getType())->getElementType();
+ Type *AddressAccessTy = Address->getType()->getPointerElementType();
// Do a match against the root of this address, ignoring profitability. This
// will tell us if the addressing mode for the memory operation will
@@ -1573,9 +1566,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
- Type *IntPtrTy =
- TLI->getDataLayout()->getIntPtrType(AccessTy->getContext());
-
+ Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
Value *Result = 0;
// Start with the base register. Do this first so that subsequent address
@@ -1894,7 +1885,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P)) {
+ if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
+ TLInfo, DT)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 3c08634..5266894 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -72,11 +72,6 @@ namespace {
}
namespace llvm {
-// SimpleValue is POD.
-template<> struct isPodLike<SimpleValue> {
- static const bool value = true;
-};
-
template<> struct DenseMapInfo<SimpleValue> {
static inline SimpleValue getEmptyKey() {
return DenseMapInfo<Instruction*>::getEmptyKey();
@@ -220,11 +215,6 @@ namespace {
}
namespace llvm {
- // CallValue is POD.
- template<> struct isPodLike<CallValue> {
- static const bool value = true;
- };
-
template<> struct DenseMapInfo<CallValue> {
static inline CallValue getEmptyKey() {
return DenseMapInfo<Instruction*>::getEmptyKey();
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index bc418af..6af269d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
@@ -507,7 +508,9 @@ namespace {
enum ValType {
SimpleVal, // A simple offsetted value that is accessed.
LoadVal, // A value produced by a load.
- MemIntrin // A memory intrinsic which is loaded from.
+ MemIntrin, // A memory intrinsic which is loaded from.
+ UndefVal // A UndefValue representing a value from dead block (which
+ // is not yet physically removed from the CFG).
};
/// V - The value that is live out of the block.
@@ -545,10 +548,20 @@ namespace {
Res.Offset = Offset;
return Res;
}
-
+
+ static AvailableValueInBlock getUndef(BasicBlock *BB) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(0);
+ Res.Val.setInt(UndefVal);
+ Res.Offset = 0;
+ return Res;
+ }
+
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+ bool isUndefValue() const { return Val.getInt() == UndefVal; }
Value *getSimpleValue() const {
assert(isSimpleValue() && "Wrong accessor");
@@ -576,6 +589,7 @@ namespace {
DominatorTree *DT;
const DataLayout *TD;
const TargetLibraryInfo *TLI;
+ SetVector<BasicBlock *> DeadBlocks;
ValueTable VN;
@@ -698,6 +712,9 @@ namespace {
unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
const BasicBlockEdge &Root);
bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
+ bool processFoldableCondBr(BranchInst *BI);
+ void addDeadBlock(BasicBlock *BB);
+ void assignValNumForDeadCode();
};
char GVN::ID = 0;
@@ -1071,14 +1088,15 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (Offset == -1)
return Offset;
+ unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
- llvm::Type::getInt8PtrTy(Src->getContext()));
+ Type::getInt8PtrTy(Src->getContext(), AS));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
if (ConstantFoldLoadFromConstPtr(Src, &TD))
return Offset;
return -1;
@@ -1155,7 +1173,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
Type *DestPTy =
IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
DestPTy = PointerType::get(DestPTy,
- cast<PointerType>(PtrVal->getType())->getAddressSpace());
+ PtrVal->getType()->getPointerAddressSpace());
Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
@@ -1230,15 +1248,16 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
// Otherwise, this is a memcpy/memmove from a constant global.
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
+ unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
- llvm::Type::getInt8PtrTy(Src->getContext()));
+ Type::getInt8PtrTy(Src->getContext(), AS));
Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
@@ -1253,8 +1272,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
// just use the dominating value directly.
if (ValuesPerBlock.size() == 1 &&
gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
- LI->getParent()))
+ LI->getParent())) {
+ assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+ }
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
@@ -1324,7 +1345,7 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
<< *getCoercedLoadValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
- } else {
+ } else if (isMemIntrinValue()) {
const DataLayout *TD = gvn.getDataLayout();
assert(TD && "Need target data to handle type mismatch case");
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
@@ -1332,6 +1353,10 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
+ } else {
+ assert(isUndefValue() && "Should be UndefVal");
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
+ return UndefValue::get(LoadTy);
}
return Res;
}
@@ -1355,6 +1380,13 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
+ if (DeadBlocks.count(DepBB)) {
+ // Dead dependent mem-op disguise as a load evaluating the same value
+ // as the load in question.
+ ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
+ continue;
+ }
+
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
UnavailableBlocks.push_back(DepBB);
continue;
@@ -2191,11 +2223,13 @@ bool GVN::processInstruction(Instruction *I) {
// For conditional branches, we can perform simple conditional propagation on
// the condition value itself.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+ if (!BI->isConditional())
return false;
- Value *BranchCond = BI->getCondition();
+ if (isa<Constant>(BI->getCondition()))
+ return processFoldableCondBr(BI);
+ Value *BranchCond = BI->getCondition();
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
// Avoid multiple edges early.
@@ -2312,6 +2346,9 @@ bool GVN::runOnFunction(Function& F) {
}
if (EnablePRE) {
+ // Fabricate val-num for dead-code in order to suppress assertion in
+ // performPRE().
+ assignValNumForDeadCode();
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
@@ -2325,6 +2362,9 @@ bool GVN::runOnFunction(Function& F) {
// Actually, when this happens, we should just fully integrate PRE into GVN.
cleanupGlobalSets();
+ // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
+ // iteration.
+ DeadBlocks.clear();
return Changed;
}
@@ -2335,6 +2375,9 @@ bool GVN::processBlock(BasicBlock *BB) {
// (and incrementing BI before processing an instruction).
assert(InstrsToErase.empty() &&
"We expect InstrsToErase to be empty across iterations");
+ if (DeadBlocks.count(BB))
+ return false;
+
bool ChangedFunction = false;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
@@ -2628,3 +2671,133 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
}
}
}
+
+// BB is declared dead, which implied other blocks become dead as well. This
+// function is to add all these blocks to "DeadBlocks". For the dead blocks'
+// live successors, update their phi nodes by replacing the operands
+// corresponding to dead blocks with UndefVal.
+//
+void GVN::addDeadBlock(BasicBlock *BB) {
+ SmallVector<BasicBlock *, 4> NewDead;
+ SmallSetVector<BasicBlock *, 4> DF;
+
+ NewDead.push_back(BB);
+ while (!NewDead.empty()) {
+ BasicBlock *D = NewDead.pop_back_val();
+ if (DeadBlocks.count(D))
+ continue;
+
+ // All blocks dominated by D are dead.
+ SmallVector<BasicBlock *, 8> Dom;
+ DT->getDescendants(D, Dom);
+ DeadBlocks.insert(Dom.begin(), Dom.end());
+
+ // Figure out the dominance-frontier(D).
+ for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
+ E = Dom.end(); I != E; I++) {
+ BasicBlock *B = *I;
+ for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
+ BasicBlock *S = *SI;
+ if (DeadBlocks.count(S))
+ continue;
+
+ bool AllPredDead = true;
+ for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
+ if (!DeadBlocks.count(*PI)) {
+ AllPredDead = false;
+ break;
+ }
+
+ if (!AllPredDead) {
+ // S could be proved dead later on. That is why we don't update phi
+ // operands at this moment.
+ DF.insert(S);
+ } else {
+ // While S is not dominated by D, it is dead by now. This could take
+ // place if S already have a dead predecessor before D is declared
+ // dead.
+ NewDead.push_back(S);
+ }
+ }
+ }
+ }
+
+ // For the dead blocks' live successors, update their phi nodes by replacing
+ // the operands corresponding to dead blocks with UndefVal.
+ for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end();
+ I != E; I++) {
+ BasicBlock *B = *I;
+ if (DeadBlocks.count(B))
+ continue;
+
+ SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
+ for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; PI++) {
+ BasicBlock *P = *PI;
+
+ if (!DeadBlocks.count(P))
+ continue;
+
+ if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) {
+ if (BasicBlock *S = splitCriticalEdges(P, B))
+ DeadBlocks.insert(P = S);
+ }
+
+ for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
+ PHINode &Phi = cast<PHINode>(*II);
+ Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
+ UndefValue::get(Phi.getType()));
+ }
+ }
+ }
+}
+
+// If the given branch is recognized as a foldable branch (i.e. conditional
+// branch with constant condition), it will perform following analyses and
+// transformation.
+// 1) If the dead out-coming edge is a critical-edge, split it. Let
+// R be the target of the dead out-coming edge.
+// 1) Identify the set of dead blocks implied by the branch's dead outcoming
+// edge. The result of this step will be {X| X is dominated by R}
+// 2) Identify those blocks which haves at least one dead prodecessor. The
+// result of this step will be dominance-frontier(R).
+// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
+// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
+//
+// Return true iff *NEW* dead code are found.
+bool GVN::processFoldableCondBr(BranchInst *BI) {
+ if (!BI || BI->isUnconditional())
+ return false;
+
+ ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+ if (!Cond)
+ return false;
+
+ BasicBlock *DeadRoot = Cond->getZExtValue() ?
+ BI->getSuccessor(1) : BI->getSuccessor(0);
+ if (DeadBlocks.count(DeadRoot))
+ return false;
+
+ if (!DeadRoot->getSinglePredecessor())
+ DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
+
+ addDeadBlock(DeadRoot);
+ return true;
+}
+
+// performPRE() will trigger assert if it come across an instruciton without
+// associated val-num. As it normally has far more live instructions than dead
+// instructions, it makes more sense just to "fabricate" a val-number for the
+// dead code than checking if instruction involved is dead or not.
+void GVN::assignValNumForDeadCode() {
+ for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
+ E = DeadBlocks.end(); I != E; I++) {
+ BasicBlock *BB = *I;
+ for (BasicBlock::iterator II = BB->begin(), EE = BB->end();
+ II != EE; II++) {
+ Instruction *Inst = &*II;
+ unsigned ValNum = VN.lookup_or_add(Inst);
+ addToLeaderTable(ValNum, Inst, BB);
+ }
+ }
+}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index d51e034..235aaaa 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -532,7 +532,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// and varies predictably *inside* the loop. Evaluate the value it
// contains when the loop exits, if possible.
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
- if (!SE->isLoopInvariant(ExitValue, L) || !isSafeToExpand(ExitValue))
+ if (!SE->isLoopInvariant(ExitValue, L) ||
+ !isSafeToExpand(ExitValue, *SE))
continue;
// Computing the value outside of the loop brings no benefit if :
@@ -1479,8 +1480,14 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
if (IndVar->getType()->isPointerTy()
&& !IVCount->getType()->isPointerTy()) {
+ // IVOffset will be the new GEP offset that is interpreted by GEP as a
+ // signed value. IVCount on the other hand represents the loop trip count,
+ // which is an unsigned value. FindLoopCounter only allows induction
+ // variables that have a positive unit stride of one. This means we don't
+ // have to handle the case of negative offsets (yet) and just need to zero
+ // extend IVCount.
Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
- const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy);
+ const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
// Expand the code for the iteration count.
assert(SE->isLoopInvariant(IVOffset, L) &&
@@ -1492,7 +1499,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
// We could handle pointer IVs other than i8*, but we need to compensate for
// gep index scaling. See canExpandBackedgeTakenCount comments.
- assert(SE->getSizeOfExpr(
+ assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
&& "unit stride pointer IV must be i8*");
@@ -1506,9 +1513,10 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
//
// Valid Cases: (1) both integers is most common; (2) both may be pointers
- // for simple memset-style loops; (3) IVInit is an integer and IVCount is a
- // pointer may occur when enable-iv-rewrite generates a canonical IV on top
- // of case #2.
+ // for simple memset-style loops.
+ //
+ // IVInit integer and IVCount pointer would only occur if a canonical IV
+ // were generated on top of case #2, which is not expected.
const SCEV *IVLimit = 0;
// For unit stride, IVCount = Start + BECount with 2's complement overflow.
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 0b8906d..b3ec2fc 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -827,7 +827,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
return false;
}
-
/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
/// load instruction, eliminate it by replacing it with a PHI node. This is an
/// important optimization that encourages jump threading, and needs to be run
@@ -842,6 +841,12 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (LoadBB->getSinglePredecessor())
return false;
+ // If the load is defined in a landing pad, it can't be partially redundant,
+ // because the edges between the invoke and the landing pad cannot have other
+ // instructions between them.
+ if (LoadBB->isLandingPad())
+ return false;
+
Value *LoadedPtr = LI->getOperand(0);
// If the loaded operand is defined in the LoadBB, it can't be available.
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 07d991b..952b76b 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -314,7 +314,7 @@ bool NclPopcountRecognize::preliminaryScreen() {
if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
return false;
- // Counting population are usually conducted by few arithmetic instrutions.
+ // Counting population are usually conducted by few arithmetic instructions.
// Such instructions can be easilly "absorbed" by vacant slots in a
// non-compact loop. Therefore, recognizing popcount idiom only makes sense
// in a compact loop.
@@ -953,6 +953,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = 0;
+ unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
+
// If we're allowed to form a memset, and the stored value would be acceptable
// for memset, use it.
if (SplatValue && TLI->has(LibFunc::memset) &&
@@ -961,8 +963,10 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
CurLoop->isLoopInvariant(SplatValue)) {
// Keep and use SplatValue.
PatternValue = 0;
- } else if (TLI->has(LibFunc::memset_pattern16) &&
+ } else if (DestAS == 0 &&
+ TLI->has(LibFunc::memset_pattern16) &&
(PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+ // Don't create memset_pattern16s with address spaces.
// It looks like we can use PatternValue!
SplatValue = 0;
} else {
@@ -978,20 +982,20 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, "loop-idiom");
+ Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
+
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
// or write to the aliased location. Check for any overlap by generating the
// base pointer and checking the region.
- unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
Value *BasePtr =
- Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+ Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy,
Preheader->getTerminator());
-
if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
CurLoop, BECount,
- StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
+ StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
deleteIfDeadInstruction(BasePtr, *SE, TLI);
@@ -1002,27 +1006,35 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+ Type *IntPtr = Builder.getIntPtrTy(TD, DestAS);
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
SCEV::FlagNUW);
- if (StoreSize != 1)
+ if (StoreSize != 1) {
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
SCEV::FlagNUW);
+ }
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
CallInst *NewCall;
- if (SplatValue)
- NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
- else {
+ if (SplatValue) {
+ NewCall = Builder.CreateMemSet(BasePtr,
+ SplatValue,
+ NumBytes,
+ StoreAlignment);
+ } else {
+ // Everything is emitted in default address space
+ Type *Int8PtrTy = DestInt8PtrTy;
+
Module *M = TheStore->getParent()->getParent()->getParent();
Value *MSP = M->getOrInsertFunction("memset_pattern16",
Builder.getVoidTy(),
- Builder.getInt8PtrTy(),
- Builder.getInt8PtrTy(), IntPtr,
+ Int8PtrTy,
+ Int8PtrTy,
+ IntPtr,
(void*)0);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
@@ -1032,7 +1044,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
PatternValue, ".memset_pattern");
GV->setUnnamedAddr(true); // Ok to merge these.
GV->setAlignment(16);
- Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
+ Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
}
@@ -1108,17 +1120,17 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = TD->getIntPtrType(SI->getContext());
- BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+ Type *IntPtrTy = Builder.getIntPtrTy(TD, SI->getPointerAddressSpace());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
- const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1),
SCEV::FlagNUW);
if (StoreSize != 1)
- NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
SCEV::FlagNUW);
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
CallInst *NewCall =
Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
new file mode 100644
index 0000000..335af81
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -0,0 +1,1184 @@
+//===-- LoopReroll.cpp - Loop rerolling pass ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a simple loop reroller.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-reroll"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+
+STATISTIC(NumRerolledLoops, "Number of rerolled loops");
+
+static cl::opt<unsigned>
+MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
+ cl::desc("The maximum increment for loop rerolling"));
+
+// This loop re-rolling transformation aims to transform loops like this:
+//
+// int foo(int a);
+// void bar(int *x) {
+// for (int i = 0; i < 500; i += 3) {
+// foo(i);
+// foo(i+1);
+// foo(i+2);
+// }
+// }
+//
+// into a loop like this:
+//
+// void bar(int *x) {
+// for (int i = 0; i < 500; ++i)
+// foo(i);
+// }
+//
+// It does this by looking for loops that, besides the latch code, are composed
+// of isomorphic DAGs of instructions, with each DAG rooted at some increment
+// to the induction variable, and where each DAG is isomorphic to the DAG
+// rooted at the induction variable (excepting the sub-DAGs which root the
+// other induction-variable increments). In other words, we're looking for loop
+// bodies of the form:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// f(%iv)
+// %iv.1 = add %iv, 1 <-- a root increment
+// f(%iv.1)
+// %iv.2 = add %iv, 2 <-- a root increment
+// f(%iv.2)
+// %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
+// f(%iv.scale_m_1)
+// ...
+// %iv.next = add %iv, scale
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+//
+// where each f(i) is a set of instructions that, collectively, are a function
+// only of i (and other loop-invariant values).
+//
+// As a special case, we can also reroll loops like this:
+//
+// int foo(int);
+// void bar(int *x) {
+// for (int i = 0; i < 500; ++i) {
+// x[3*i] = foo(0);
+// x[3*i+1] = foo(0);
+// x[3*i+2] = foo(0);
+// }
+// }
+//
+// into this:
+//
+// void bar(int *x) {
+// for (int i = 0; i < 1500; ++i)
+// x[i] = foo(0);
+// }
+//
+// in which case, we're looking for inputs like this:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// %scaled.iv = mul %iv, scale
+// f(%scaled.iv)
+// %scaled.iv.1 = add %scaled.iv, 1
+// f(%scaled.iv.1)
+// %scaled.iv.2 = add %scaled.iv, 2
+// f(%scaled.iv.2)
+// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
+// f(%scaled.iv.scale_m_1)
+// ...
+// %iv.next = add %iv, 1
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+
+namespace {
+ class LoopReroll : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopReroll() : LoopPass(ID) {
+ initializeLoopRerollPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+
+protected:
+ AliasAnalysis *AA;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetLibraryInfo *TLI;
+ DominatorTree *DT;
+
+ typedef SmallVector<Instruction *, 16> SmallInstructionVector;
+ typedef SmallSet<Instruction *, 16> SmallInstructionSet;
+
+ // A chain of isomorphic instructions, indentified by a single-use PHI,
+ // representing a reduction. Only the last value may be used outside the
+ // loop.
+ struct SimpleLoopReduction {
+ SimpleLoopReduction(Instruction *P, Loop *L)
+ : Valid(false), Instructions(1, P) {
+ assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
+ add(L);
+ }
+
+ bool valid() const {
+ return Valid;
+ }
+
+ Instruction *getPHI() const {
+ assert(Valid && "Using invalid reduction");
+ return Instructions.front();
+ }
+
+ Instruction *getReducedValue() const {
+ assert(Valid && "Using invalid reduction");
+ return Instructions.back();
+ }
+
+ Instruction *get(size_t i) const {
+ assert(Valid && "Using invalid reduction");
+ return Instructions[i+1];
+ }
+
+ Instruction *operator [] (size_t i) const { return get(i); }
+
+ // The size, ignoring the initial PHI.
+ size_t size() const {
+ assert(Valid && "Using invalid reduction");
+ return Instructions.size()-1;
+ }
+
+ typedef SmallInstructionVector::iterator iterator;
+ typedef SmallInstructionVector::const_iterator const_iterator;
+
+ iterator begin() {
+ assert(Valid && "Using invalid reduction");
+ return llvm::next(Instructions.begin());
+ }
+
+ const_iterator begin() const {
+ assert(Valid && "Using invalid reduction");
+ return llvm::next(Instructions.begin());
+ }
+
+ iterator end() { return Instructions.end(); }
+ const_iterator end() const { return Instructions.end(); }
+
+ protected:
+ bool Valid;
+ SmallInstructionVector Instructions;
+
+ void add(Loop *L);
+ };
+
+ // The set of all reductions, and state tracking of possible reductions
+ // during loop instruction processing.
+ struct ReductionTracker {
+ typedef SmallVector<SimpleLoopReduction, 16> SmallReductionVector;
+
+ // Add a new possible reduction.
+ void addSLR(SimpleLoopReduction &SLR) {
+ PossibleReds.push_back(SLR);
+ }
+
+ // Setup to track possible reductions corresponding to the provided
+ // rerolling scale. Only reductions with a number of non-PHI instructions
+ // that is divisible by the scale are considered. Three instructions sets
+ // are filled in:
+ // - A set of all possible instructions in eligible reductions.
+ // - A set of all PHIs in eligible reductions
+ // - A set of all reduced values (last instructions) in eligible reductions.
+ void restrictToScale(uint64_t Scale,
+ SmallInstructionSet &PossibleRedSet,
+ SmallInstructionSet &PossibleRedPHISet,
+ SmallInstructionSet &PossibleRedLastSet) {
+ PossibleRedIdx.clear();
+ PossibleRedIter.clear();
+ Reds.clear();
+
+ for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
+ if (PossibleReds[i].size() % Scale == 0) {
+ PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
+ PossibleRedPHISet.insert(PossibleReds[i].getPHI());
+
+ PossibleRedSet.insert(PossibleReds[i].getPHI());
+ PossibleRedIdx[PossibleReds[i].getPHI()] = i;
+ for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
+ JE = PossibleReds[i].end(); J != JE; ++J) {
+ PossibleRedSet.insert(*J);
+ PossibleRedIdx[*J] = i;
+ }
+ }
+ }
+
+ // The functions below are used while processing the loop instructions.
+
+ // Are the two instructions both from reductions, and furthermore, from
+ // the same reduction?
+ bool isPairInSame(Instruction *J1, Instruction *J2) {
+ DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
+ if (J1I != PossibleRedIdx.end()) {
+ DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
+ if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
+ return true;
+ }
+
+ return false;
+ }
+
+ // The two provided instructions, the first from the base iteration, and
+ // the second from iteration i, form a matched pair. If these are part of
+ // a reduction, record that fact.
+ void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
+ if (PossibleRedIdx.count(J1)) {
+ assert(PossibleRedIdx.count(J2) &&
+ "Recording reduction vs. non-reduction instruction?");
+
+ PossibleRedIter[J1] = 0;
+ PossibleRedIter[J2] = i;
+
+ int Idx = PossibleRedIdx[J1];
+ assert(Idx == PossibleRedIdx[J2] &&
+ "Recording pair from different reductions?");
+ Reds.insert(Idx);
+ }
+ }
+
+ // The functions below can be called after we've finished processing all
+ // instructions in the loop, and we know which reductions were selected.
+
+ // Is the provided instruction the PHI of a reduction selected for
+ // rerolling?
+ bool isSelectedPHI(Instruction *J) {
+ if (!isa<PHINode>(J))
+ return false;
+
+ for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
+ RI != RIE; ++RI) {
+ int i = *RI;
+ if (cast<Instruction>(J) == PossibleReds[i].getPHI())
+ return true;
+ }
+
+ return false;
+ }
+
+ bool validateSelected();
+ void replaceSelected();
+
+ protected:
+ // The vector of all possible reductions (for any scale).
+ SmallReductionVector PossibleReds;
+
+ DenseMap<Instruction *, int> PossibleRedIdx;
+ DenseMap<Instruction *, int> PossibleRedIter;
+ DenseSet<int> Reds;
+ };
+
+ void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
+ void collectPossibleReductions(Loop *L,
+ ReductionTracker &Reductions);
+ void collectInLoopUserSet(Loop *L,
+ const SmallInstructionVector &Roots,
+ const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users);
+ void collectInLoopUserSet(Loop *L,
+ Instruction * Root,
+ const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users);
+ bool findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
+ Instruction *&IV,
+ SmallInstructionVector &LoopIncs);
+ bool collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale, Instruction *IV,
+ SmallVector<SmallInstructionVector, 32> &Roots,
+ SmallInstructionSet &AllRoots,
+ SmallInstructionVector &LoopIncs);
+ bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
+ ReductionTracker &Reductions);
+ };
+}
+
+char LoopReroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
+
+Pass *llvm::createLoopRerollPass() {
+ return new LoopReroll;
+}
+
+// Returns true if the provided instruction is used outside the given loop.
+// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
+// non-loop blocks to be outside the loop.
+static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
+ for (Value::use_iterator UI = I->use_begin(),
+ UIE = I->use_end(); UI != UIE; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (!L->contains(User))
+ return true;
+ }
+
+ return false;
+}
+
+// Collect the list of loop induction variables with respect to which it might
+// be possible to reroll the loop.
+void LoopReroll::collectPossibleIVs(Loop *L,
+ SmallInstructionVector &PossibleIVs) {
+ BasicBlock *Header = L->getHeader();
+ for (BasicBlock::iterator I = Header->begin(),
+ IE = Header->getFirstInsertionPt(); I != IE; ++I) {
+ if (!isa<PHINode>(I))
+ continue;
+ if (!I->getType()->isIntegerTy())
+ continue;
+
+ if (const SCEVAddRecExpr *PHISCEV =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(I))) {
+ if (PHISCEV->getLoop() != L)
+ continue;
+ if (!PHISCEV->isAffine())
+ continue;
+ if (const SCEVConstant *IncSCEV =
+ dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
+ if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
+ continue;
+ if (IncSCEV->getValue()->uge(MaxInc))
+ continue;
+
+ DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
+ *PHISCEV << "\n");
+ PossibleIVs.push_back(I);
+ }
+ }
+ }
+}
+
+// Add the remainder of the reduction-variable chain to the instruction vector
+// (the initial PHINode has already been added). If successful, the object is
+// marked as valid.
+void LoopReroll::SimpleLoopReduction::add(Loop *L) {
+ assert(!Valid && "Cannot add to an already-valid chain");
+
+ // The reduction variable must be a chain of single-use instructions
+ // (including the PHI), except for the last value (which is used by the PHI
+ // and also outside the loop).
+ Instruction *C = Instructions.front();
+
+ do {
+ C = cast<Instruction>(*C->use_begin());
+ if (C->hasOneUse()) {
+ if (!C->isBinaryOp())
+ return;
+
+ if (!(isa<PHINode>(Instructions.back()) ||
+ C->isSameOperationAs(Instructions.back())))
+ return;
+
+ Instructions.push_back(C);
+ }
+ } while (C->hasOneUse());
+
+ if (Instructions.size() < 2 ||
+ !C->isSameOperationAs(Instructions.back()) ||
+ C->use_begin() == C->use_end())
+ return;
+
+ // C is now the (potential) last instruction in the reduction chain.
+ for (Value::use_iterator UI = C->use_begin(), UIE = C->use_end();
+ UI != UIE; ++UI) {
+ // The only in-loop user can be the initial PHI.
+ if (L->contains(cast<Instruction>(*UI)))
+ if (cast<Instruction>(*UI ) != Instructions.front())
+ return;
+ }
+
+ Instructions.push_back(C);
+ Valid = true;
+}
+
+// Collect the vector of possible reduction variables.
+void LoopReroll::collectPossibleReductions(Loop *L,
+ ReductionTracker &Reductions) {
+ BasicBlock *Header = L->getHeader();
+ for (BasicBlock::iterator I = Header->begin(),
+ IE = Header->getFirstInsertionPt(); I != IE; ++I) {
+ if (!isa<PHINode>(I))
+ continue;
+ if (!I->getType()->isSingleValueType())
+ continue;
+
+ SimpleLoopReduction SLR(I, L);
+ if (!SLR.valid())
+ continue;
+
+ DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with " <<
+ SLR.size() << " chained instructions)\n");
+ Reductions.addSLR(SLR);
+ }
+}
+
+// Collect the set of all users of the provided root instruction. This set of
+// users contains not only the direct users of the root instruction, but also
+// all users of those users, and so on. There are two exceptions:
+//
+// 1. Instructions in the set of excluded instructions are never added to the
+// use set (even if they are users). This is used, for example, to exclude
+// including root increments in the use set of the primary IV.
+//
+// 2. Instructions in the set of final instructions are added to the use set
+// if they are users, but their users are not added. This is used, for
+// example, to prevent a reduction update from forcing all later reduction
+// updates into the use set.
+void LoopReroll::collectInLoopUserSet(Loop *L,
+ Instruction *Root, const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users) {
+ SmallInstructionVector Queue(1, Root);
+ while (!Queue.empty()) {
+ Instruction *I = Queue.pop_back_val();
+ if (!Users.insert(I).second)
+ continue;
+
+ if (!Final.count(I))
+ for (Value::use_iterator UI = I->use_begin(),
+ UIE = I->use_end(); UI != UIE; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ // Ignore "wrap-around" uses to PHIs of this loop's header.
+ if (PN->getIncomingBlock(UI) == L->getHeader())
+ continue;
+ }
+
+ if (L->contains(User) && !Exclude.count(User)) {
+ Queue.push_back(User);
+ }
+ }
+
+ // We also want to collect single-user "feeder" values.
+ for (User::op_iterator OI = I->op_begin(),
+ OIE = I->op_end(); OI != OIE; ++OI) {
+ if (Instruction *Op = dyn_cast<Instruction>(*OI))
+ if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
+ !Final.count(Op))
+ Queue.push_back(Op);
+ }
+ }
+}
+
+// Collect all of the users of all of the provided root instructions (combined
+// into a single set).
+void LoopReroll::collectInLoopUserSet(Loop *L,
+ const SmallInstructionVector &Roots,
+ const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users) {
+ for (SmallInstructionVector::const_iterator I = Roots.begin(),
+ IE = Roots.end(); I != IE; ++I)
+ collectInLoopUserSet(L, *I, Exclude, Final, Users);
+}
+
+static bool isSimpleLoadStore(Instruction *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->isSimple();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->isSimple();
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+ return !MI->isVolatile();
+ return false;
+}
+
+// Recognize loops that are setup like this:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// %scaled.iv = mul %iv, scale
+// f(%scaled.iv)
+// %scaled.iv.1 = add %scaled.iv, 1
+// f(%scaled.iv.1)
+// %scaled.iv.2 = add %scaled.iv, 2
+// f(%scaled.iv.2)
+// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
+// f(%scaled.iv.scale_m_1)
+// ...
+// %iv.next = add %iv, 1
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+//
+// and, if found, set IV = %scaled.iv, and add %iv.next to LoopIncs.
+bool LoopReroll::findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
+ Instruction *&IV,
+ SmallInstructionVector &LoopIncs) {
+ // This is a special case: here we're looking for all uses (except for
+ // the increment) to be multiplied by a common factor. The increment must
+ // be by one. This is to capture loops like:
+ // for (int i = 0; i < 500; ++i) {
+ // foo(3*i); foo(3*i+1); foo(3*i+2);
+ // }
+ if (RealIV->getNumUses() != 2)
+ return false;
+ const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(RealIV));
+ Instruction *User1 = cast<Instruction>(*RealIV->use_begin()),
+ *User2 = cast<Instruction>(*llvm::next(RealIV->use_begin()));
+ if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType()))
+ return false;
+ const SCEVAddRecExpr *User1SCEV =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User1)),
+ *User2SCEV =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User2));
+ if (!User1SCEV || !User1SCEV->isAffine() ||
+ !User2SCEV || !User2SCEV->isAffine())
+ return false;
+
+ // We assume below that User1 is the scale multiply and User2 is the
+ // increment. If this can't be true, then swap them.
+ if (User1SCEV == RealIVSCEV->getPostIncExpr(*SE)) {
+ std::swap(User1, User2);
+ std::swap(User1SCEV, User2SCEV);
+ }
+
+ if (User2SCEV != RealIVSCEV->getPostIncExpr(*SE))
+ return false;
+ assert(User2SCEV->getStepRecurrence(*SE)->isOne() &&
+ "Invalid non-unit step for multiplicative scaling");
+ LoopIncs.push_back(User2);
+
+ if (const SCEVConstant *MulScale =
+ dyn_cast<SCEVConstant>(User1SCEV->getStepRecurrence(*SE))) {
+ // Make sure that both the start and step have the same multiplier.
+ if (RealIVSCEV->getStart()->getType() != MulScale->getType())
+ return false;
+ if (SE->getMulExpr(RealIVSCEV->getStart(), MulScale) !=
+ User1SCEV->getStart())
+ return false;
+
+ ConstantInt *MulScaleCI = MulScale->getValue();
+ if (!MulScaleCI->uge(2) || MulScaleCI->uge(MaxInc))
+ return false;
+ Scale = MulScaleCI->getZExtValue();
+ IV = User1;
+ } else
+ return false;
+
+ DEBUG(dbgs() << "LRR: Found possible scaling " << *User1 << "\n");
+ return true;
+}
+
+// Collect all root increments with respect to the provided induction variable
+// (normally the PHI, but sometimes a multiply). A root increment is an
+// instruction, normally an add, with a positive constant less than Scale. In a
+// rerollable loop, each of these increments is the root of an instruction
+// graph isomorphic to the others. Also, we collect the final induction
+// increment (the increment equal to the Scale), and its users in LoopIncs.
+bool LoopReroll::collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale,
+ Instruction *IV,
+ SmallVector<SmallInstructionVector, 32> &Roots,
+ SmallInstructionSet &AllRoots,
+ SmallInstructionVector &LoopIncs) {
+ for (Value::use_iterator UI = IV->use_begin(),
+ UIE = IV->use_end(); UI != UIE; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (!SE->isSCEVable(User->getType()))
+ continue;
+ if (User->getType() != IV->getType())
+ continue;
+ if (!L->contains(User))
+ continue;
+ if (hasUsesOutsideLoop(User, L))
+ continue;
+
+ if (const SCEVConstant *Diff = dyn_cast<SCEVConstant>(SE->getMinusSCEV(
+ SE->getSCEV(User), SE->getSCEV(IV)))) {
+ uint64_t Idx = Diff->getValue()->getValue().getZExtValue();
+ if (Idx > 0 && Idx < Scale) {
+ Roots[Idx-1].push_back(User);
+ AllRoots.insert(User);
+ } else if (Idx == Scale && Inc > 1) {
+ LoopIncs.push_back(User);
+ }
+ }
+ }
+
+ if (Roots[0].empty())
+ return false;
+ bool AllSame = true;
+ for (unsigned i = 1; i < Scale-1; ++i)
+ if (Roots[i].size() != Roots[0].size()) {
+ AllSame = false;
+ break;
+ }
+
+ if (!AllSame)
+ return false;
+
+ return true;
+}
+
+// Validate the selected reductions. All iterations must have an isomorphic
+// part of the reduction chain and, for non-associative reductions, the chain
+// entries must appear in order.
+bool LoopReroll::ReductionTracker::validateSelected() {
+ // For a non-associative reduction, the chain entries must appear in order.
+ for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
+ RI != RIE; ++RI) {
+ int i = *RI;
+ int PrevIter = 0, BaseCount = 0, Count = 0;
+ for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
+ JE = PossibleReds[i].end(); J != JE; ++J) {
+ // Note that all instructions in the chain must have been found because
+ // all instructions in the function must have been assigned to some
+ // iteration.
+ int Iter = PossibleRedIter[*J];
+ if (Iter != PrevIter && Iter != PrevIter + 1 &&
+ !PossibleReds[i].getReducedValue()->isAssociative()) {
+ DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " <<
+ *J << "\n");
+ return false;
+ }
+
+ if (Iter != PrevIter) {
+ if (Count != BaseCount) {
+ DEBUG(dbgs() << "LRR: Iteration " << PrevIter <<
+ " reduction use count " << Count <<
+ " is not equal to the base use count " <<
+ BaseCount << "\n");
+ return false;
+ }
+
+ Count = 0;
+ }
+
+ ++Count;
+ if (Iter == 0)
+ ++BaseCount;
+
+ PrevIter = Iter;
+ }
+ }
+
+ return true;
+}
+
+// For all selected reductions, remove all parts except those in the first
+// iteration (and the PHI). Replace outside uses of the reduced value with uses
+// of the first-iteration reduced value (in other words, reroll the selected
+// reductions).
+void LoopReroll::ReductionTracker::replaceSelected() {
+ // Fixup reductions to refer to the last instruction associated with the
+ // first iteration (not the last).
+ for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
+ RI != RIE; ++RI) {
+ int i = *RI;
+ int j = 0;
+ for (int e = PossibleReds[i].size(); j != e; ++j)
+ if (PossibleRedIter[PossibleReds[i][j]] != 0) {
+ --j;
+ break;
+ }
+
+ // Replace users with the new end-of-chain value.
+ SmallInstructionVector Users;
+ for (Value::use_iterator UI =
+ PossibleReds[i].getReducedValue()->use_begin(),
+ UIE = PossibleReds[i].getReducedValue()->use_end(); UI != UIE; ++UI)
+ Users.push_back(cast<Instruction>(*UI));
+
+ for (SmallInstructionVector::iterator J = Users.begin(),
+ JE = Users.end(); J != JE; ++J)
+ (*J)->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
+ PossibleReds[i][j]);
+ }
+}
+
+// Reroll the provided loop with respect to the provided induction variable.
+// Generally, we're looking for a loop like this:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// f(%iv)
+// %iv.1 = add %iv, 1 <-- a root increment
+// f(%iv.1)
+// %iv.2 = add %iv, 2 <-- a root increment
+// f(%iv.2)
+// %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
+// f(%iv.scale_m_1)
+// ...
+// %iv.next = add %iv, scale
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+//
+// Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
+// instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
+// be intermixed with eachother. The restriction imposed by this algorithm is
+// that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
+// etc. be the same.
+//
+// First, we collect the use set of %iv, excluding the other increment roots.
+// This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
+// times, having collected the use set of f(%iv.(i+1)), during which we:
+// - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
+// the next unmatched instruction in f(%iv.(i+1)).
+// - Ensure that both matched instructions don't have any external users
+// (with the exception of last-in-chain reduction instructions).
+// - Track the (aliasing) write set, and other side effects, of all
+// instructions that belong to future iterations that come before the matched
+// instructions. If the matched instructions read from that write set, then
+// f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
+// f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
+// if any of these future instructions had side effects (could not be
+// speculatively executed), and so do the matched instructions, when we
+// cannot reorder those side-effect-producing instructions, and rerolling
+// fails.
+//
+// Finally, we make sure that all loop instructions are either loop increment
+// roots, belong to simple latch code, parts of validated reductions, part of
+// f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
+// have been validated), then we reroll the loop.
+bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
+ const SCEV *IterCount,
+ ReductionTracker &Reductions) {
+ const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
+ uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
+ getValue()->getZExtValue();
+ // The collection of loop increment instructions.
+ SmallInstructionVector LoopIncs;
+ uint64_t Scale = Inc;
+
+ // The effective induction variable, IV, is normally also the real induction
+ // variable. When we're dealing with a loop like:
+ // for (int i = 0; i < 500; ++i)
+ // x[3*i] = ...;
+ // x[3*i+1] = ...;
+ // x[3*i+2] = ...;
+ // then the real IV is still i, but the effective IV is (3*i).
+ Instruction *RealIV = IV;
+ if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs))
+ return false;
+
+ assert(Scale <= MaxInc && "Scale is too large");
+ assert(Scale > 1 && "Scale must be at least 2");
+
+ // The set of increment instructions for each increment value.
+ SmallVector<SmallInstructionVector, 32> Roots(Scale-1);
+ SmallInstructionSet AllRoots;
+ if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs))
+ return false;
+
+ DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
+ *RealIV << "\n");
+
+ // An array of just the possible reductions for this scale factor. When we
+ // collect the set of all users of some root instructions, these reduction
+ // instructions are treated as 'final' (their uses are not considered).
+ // This is important because we don't want the root use set to search down
+ // the reduction chain.
+ SmallInstructionSet PossibleRedSet;
+ SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet;
+ Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet,
+ PossibleRedLastSet);
+
+ // We now need to check for equivalence of the use graph of each root with
+ // that of the primary induction variable (excluding the roots). Our goal
+ // here is not to solve the full graph isomorphism problem, but rather to
+ // catch common cases without a lot of work. As a result, we will assume
+ // that the relative order of the instructions in each unrolled iteration
+ // is the same (although we will not make an assumption about how the
+ // different iterations are intermixed). Note that while the order must be
+ // the same, the instructions may not be in the same basic block.
+ SmallInstructionSet Exclude(AllRoots);
+ Exclude.insert(LoopIncs.begin(), LoopIncs.end());
+
+ DenseSet<Instruction *> BaseUseSet;
+ collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet);
+
+ DenseSet<Instruction *> AllRootUses;
+ std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1);
+
+ bool MatchFailed = false;
+ for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) {
+ DenseSet<Instruction *> &RootUseSet = RootUseSets[i];
+ collectInLoopUserSet(L, Roots[i], SmallInstructionSet(),
+ PossibleRedSet, RootUseSet);
+
+ DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() <<
+ " vs. iteration increment " << (i+1) <<
+ " use set size: " << RootUseSet.size() << "\n");
+
+ if (BaseUseSet.size() != RootUseSet.size()) {
+ MatchFailed = true;
+ break;
+ }
+
+ // In addition to regular aliasing information, we need to look for
+ // instructions from later (future) iterations that have side effects
+ // preventing us from reordering them past other instructions with side
+ // effects.
+ bool FutureSideEffects = false;
+ AliasSetTracker AST(*AA);
+
+ // The map between instructions in f(%iv.(i+1)) and f(%iv).
+ DenseMap<Value *, Value *> BaseMap;
+
+ assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops");
+ for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(),
+ JE = Header->end(); J1 != JE && !MatchFailed; ++J1) {
+ if (cast<Instruction>(J1) == RealIV)
+ continue;
+ if (cast<Instruction>(J1) == IV)
+ continue;
+ if (!BaseUseSet.count(J1))
+ continue;
+ if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs.
+ continue;
+
+ while (J2 != JE && (!RootUseSet.count(J2) ||
+ std::find(Roots[i].begin(), Roots[i].end(), J2) !=
+ Roots[i].end())) {
+ // As we iterate through the instructions, instructions that don't
+ // belong to previous iterations (or the base case), must belong to
+ // future iterations. We want to track the alias set of writes from
+ // previous iterations.
+ if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) &&
+ !AllRootUses.count(J2)) {
+ if (J2->mayWriteToMemory())
+ AST.add(J2);
+
+ // Note: This is specifically guarded by a check on isa<PHINode>,
+ // which while a valid (somewhat arbitrary) micro-optimization, is
+ // needed because otherwise isSafeToSpeculativelyExecute returns
+ // false on PHI nodes.
+ if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL))
+ FutureSideEffects = true;
+ }
+
+ ++J2;
+ }
+
+ if (!J1->isSameOperationAs(J2)) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << "\n");
+ MatchFailed = true;
+ break;
+ }
+
+ // Make sure that this instruction, which is in the use set of this
+ // root instruction, does not also belong to the base set or the set of
+ // some previous root instruction.
+ if (BaseUseSet.count(J2) || AllRootUses.count(J2)) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << " (prev. case overlap)\n");
+ MatchFailed = true;
+ break;
+ }
+
+ // Make sure that we don't alias with any instruction in the alias set
+ // tracker. If we do, then we depend on a future iteration, and we
+ // can't reroll.
+ if (J2->mayReadFromMemory()) {
+ for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end();
+ K != KE && !MatchFailed; ++K) {
+ if (K->aliasesUnknownInst(J2, *AA)) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << " (depends on future store)\n");
+ MatchFailed = true;
+ break;
+ }
+ }
+ }
+
+ // If we've past an instruction from a future iteration that may have
+ // side effects, and this instruction might also, then we can't reorder
+ // them, and this matching fails. As an exception, we allow the alias
+ // set tracker to handle regular (simple) load/store dependencies.
+ if (FutureSideEffects &&
+ ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) ||
+ (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 <<
+ " (side effects prevent reordering)\n");
+ MatchFailed = true;
+ break;
+ }
+
+ // For instructions that are part of a reduction, if the operation is
+ // associative, then don't bother matching the operands (because we
+ // already know that the instructions are isomorphic, and the order
+ // within the iteration does not matter). For non-associative reductions,
+ // we do need to match the operands, because we need to reject
+ // out-of-order instructions within an iteration!
+ // For example (assume floating-point addition), we need to reject this:
+ // x += a[i]; x += b[i];
+ // x += a[i+1]; x += b[i+1];
+ // x += b[i+2]; x += a[i+2];
+ bool InReduction = Reductions.isPairInSame(J1, J2);
+
+ if (!(InReduction && J1->isAssociative())) {
+ bool Swapped = false, SomeOpMatched = false;;
+ for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
+ Value *Op2 = J2->getOperand(j);
+
+ // If this is part of a reduction (and the operation is not
+ // associatve), then we match all operands, but not those that are
+ // part of the reduction.
+ if (InReduction)
+ if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
+ if (Reductions.isPairInSame(J2, Op2I))
+ continue;
+
+ DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
+ if (BMI != BaseMap.end())
+ Op2 = BMI->second;
+ else if (std::find(Roots[i].begin(), Roots[i].end(),
+ (Instruction*) Op2) != Roots[i].end())
+ Op2 = IV;
+
+ if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
+ // If we've not already decided to swap the matched operands, and
+ // we've not already matched our first operand (note that we could
+ // have skipped matching the first operand because it is part of a
+ // reduction above), and the instruction is commutative, then try
+ // the swapped match.
+ if (!Swapped && J1->isCommutative() && !SomeOpMatched &&
+ J1->getOperand(!j) == Op2) {
+ Swapped = true;
+ } else {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << " (operand " << j << ")\n");
+ MatchFailed = true;
+ break;
+ }
+ }
+
+ SomeOpMatched = true;
+ }
+ }
+
+ if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) ||
+ (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << " (uses outside loop)\n");
+ MatchFailed = true;
+ break;
+ }
+
+ if (!MatchFailed)
+ BaseMap.insert(std::pair<Value *, Value *>(J2, J1));
+
+ AllRootUses.insert(J2);
+ Reductions.recordPair(J1, J2, i+1);
+
+ ++J2;
+ }
+ }
+
+ if (MatchFailed)
+ return false;
+
+ DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
+ *RealIV << "\n");
+
+ DenseSet<Instruction *> LoopIncUseSet;
+ collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(),
+ SmallInstructionSet(), LoopIncUseSet);
+ DEBUG(dbgs() << "LRR: Loop increment set size: " <<
+ LoopIncUseSet.size() << "\n");
+
+ // Make sure that all instructions in the loop have been included in some
+ // use set.
+ for (BasicBlock::iterator J = Header->begin(), JE = Header->end();
+ J != JE; ++J) {
+ if (isa<DbgInfoIntrinsic>(J))
+ continue;
+ if (cast<Instruction>(J) == RealIV)
+ continue;
+ if (cast<Instruction>(J) == IV)
+ continue;
+ if (BaseUseSet.count(J) || AllRootUses.count(J) ||
+ (LoopIncUseSet.count(J) && (J->isTerminator() ||
+ isSafeToSpeculativelyExecute(J, DL))))
+ continue;
+
+ if (AllRoots.count(J))
+ continue;
+
+ if (Reductions.isSelectedPHI(J))
+ continue;
+
+ DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV <<
+ " unprocessed instruction found: " << *J << "\n");
+ MatchFailed = true;
+ break;
+ }
+
+ if (MatchFailed)
+ return false;
+
+ DEBUG(dbgs() << "LRR: all instructions processed from " <<
+ *RealIV << "\n");
+
+ if (!Reductions.validateSelected())
+ return false;
+
+ // At this point, we've validated the rerolling, and we're committed to
+ // making changes!
+
+ Reductions.replaceSelected();
+
+ // Remove instructions associated with non-base iterations.
+ for (BasicBlock::reverse_iterator J = Header->rbegin();
+ J != Header->rend();) {
+ if (AllRootUses.count(&*J)) {
+ Instruction *D = &*J;
+ DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
+ D->eraseFromParent();
+ continue;
+ }
+
+ ++J;
+ }
+
+ // Insert the new induction variable.
+ const SCEV *Start = RealIVSCEV->getStart();
+ if (Inc == 1)
+ Start = SE->getMulExpr(Start,
+ SE->getConstant(Start->getType(), Scale));
+ const SCEVAddRecExpr *H =
+ cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start,
+ SE->getConstant(RealIVSCEV->getType(), 1),
+ L, SCEV::FlagAnyWrap));
+ { // Limit the lifetime of SCEVExpander.
+ SCEVExpander Expander(*SE, "reroll");
+ PHINode *NewIV =
+ cast<PHINode>(Expander.expandCodeFor(H, IV->getType(),
+ Header->begin()));
+ for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
+ JE = BaseUseSet.end(); J != JE; ++J)
+ (*J)->replaceUsesOfWith(IV, NewIV);
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
+ if (LoopIncUseSet.count(BI)) {
+ const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
+ if (Inc == 1)
+ ICSCEV =
+ SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
+ Value *IC;
+ if (isa<SCEVConstant>(ICSCEV)) {
+ IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI);
+ } else {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ Preheader = InsertPreheaderForLoop(L, this);
+
+ IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(),
+ Preheader->getTerminator());
+ }
+
+ Value *NewIVNext = NewIV->getIncomingValueForBlock(Header);
+ Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC,
+ "exitcond");
+ BI->setCondition(Cond);
+
+ if (BI->getSuccessor(1) != Header)
+ BI->swapSuccessors();
+ }
+ }
+ }
+
+ SimplifyInstructionsInBlock(Header, DL, TLI);
+ DeleteDeadPHIs(Header, TLI);
+ ++NumRerolledLoops;
+ return true;
+}
+
+bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+ AA = &getAnalysis<AliasAnalysis>();
+ LI = &getAnalysis<LoopInfo>();
+ SE = &getAnalysis<ScalarEvolution>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ DT = &getAnalysis<DominatorTree>();
+
+ BasicBlock *Header = L->getHeader();
+ DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
+ "] Loop %" << Header->getName() << " (" <<
+ L->getNumBlocks() << " block(s))\n");
+
+ bool Changed = false;
+
+ // For now, we'll handle only single BB loops.
+ if (L->getNumBlocks() > 1)
+ return Changed;
+
+ if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+ return Changed;
+
+ const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
+ const SCEV *IterCount =
+ SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1));
+ DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
+
+ // First, we need to find the induction variable with respect to which we can
+ // reroll (there may be several possible options).
+ SmallInstructionVector PossibleIVs;
+ collectPossibleIVs(L, PossibleIVs);
+
+ if (PossibleIVs.empty()) {
+ DEBUG(dbgs() << "LRR: No possible IVs found\n");
+ return Changed;
+ }
+
+ ReductionTracker Reductions;
+ collectPossibleReductions(L, Reductions);
+
+ // For each possible IV, collect the associated possible set of 'root' nodes
+ // (i+1, i+2, etc.).
+ for (SmallInstructionVector::iterator I = PossibleIVs.begin(),
+ IE = PossibleIVs.end(); I != IE; ++I)
+ if (reroll(*I, L, Header, IterCount, Reductions)) {
+ Changed = true;
+ break;
+ }
+
+ return Changed;
+}
+
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 14cb979..eff5268 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1170,6 +1170,13 @@ public:
/// may be used.
bool AllFixupsOutsideLoop;
+ /// RigidFormula is set to true to guarantee that this use will be associated
+ /// with a single formula--the one that initially matched. Some SCEV
+ /// expressions cannot be expanded. This allows LSR to consider the registers
+ /// used by those expressions without the need to expand them later after
+ /// changing the formula.
+ bool RigidFormula;
+
/// WidestFixupType - This records the widest use type for any fixup using
/// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
/// max fixup widths to be equivalent, because the narrower one may be relying
@@ -1188,6 +1195,7 @@ public:
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
+ RigidFormula(false),
WidestFixupType(0) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
@@ -1214,6 +1222,9 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRUse::InsertFormula(const Formula &F) {
+ if (!Formulae.empty() && RigidFormula)
+ return false;
+
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
@@ -1433,7 +1444,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
}
case LSRUse::ICmpZero:
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
- // Therefore, return 0 in case F.Scale == -1.
+ // Therefore, return 0 in case F.Scale == -1.
return F.Scale != -1;
case LSRUse::Basic:
@@ -2943,7 +2954,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
@@ -2986,6 +2997,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
/// and loop-computable portions.
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+ // Mark uses whose expressions cannot be expanded.
+ if (!isSafeToExpand(S, SE))
+ LU.RigidFormula = true;
+
Formula F;
F.InitialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
@@ -4353,6 +4368,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) const {
const LSRUse &LU = Uses[LF.LUIdx];
+ if (LU.RigidFormula)
+ return LF.OperandValToReplace;
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 80d060b..08ac38d 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -49,12 +49,17 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll(int T = -1, int C = -1, int P = -1) : LoopPass(ID) {
+ LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
+ CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+ UserAllowPartial = (P != -1) ||
+ (UnrollAllowPartial.getNumOccurrences() > 0);
+ UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
+ UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -75,7 +80,11 @@ namespace {
unsigned CurrentCount;
unsigned CurrentThreshold;
bool CurrentAllowPartial;
+ bool CurrentRuntime;
+ bool UserCount; // CurrentCount is user-specified.
bool UserThreshold; // CurrentThreshold is user-specified.
+ bool UserAllowPartial; // CurrentAllowPartial is user-specified.
+ bool UserRuntime; // CurrentRuntime is user-specified.
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -110,8 +119,9 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
- return new LoopUnroll(Threshold, Count, AllowPartial);
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
+ int Runtime) {
+ return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
}
/// ApproximateLoopSize - Approximate the size of the loop.
@@ -145,16 +155,24 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
<< "] Loop %" << Header->getName() << "\n");
(void)Header;
+ TargetTransformInfo::UnrollingPreferences UP;
+ UP.Threshold = CurrentThreshold;
+ UP.OptSizeThreshold = OptSizeUnrollThreshold;
+ UP.Count = CurrentCount;
+ UP.Partial = CurrentAllowPartial;
+ UP.Runtime = CurrentRuntime;
+ TTI.getUnrollingPreferences(L, UP);
+
// Determine the current unrolling threshold. While this is normally set
// from UnrollThreshold, it is overridden to a smaller value if the current
// function is marked as optimize-for-size, and the unroll threshold was
// not user specified.
- unsigned Threshold = CurrentThreshold;
+ unsigned Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
if (!UserThreshold &&
Header->getParent()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize))
- Threshold = OptSizeUnrollThreshold;
+ Threshold = UP.OptSizeThreshold;
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
@@ -167,11 +185,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
}
+
+ bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime;
+
// Use a default unroll-count if the user doesn't specify a value
// and the trip count is a run-time value. The default is different
// for run-time or compile-time trip count loops.
- unsigned Count = CurrentCount;
- if (UnrollRuntime && CurrentCount == 0 && TripCount == 0)
+ unsigned Count = UserCount ? CurrentCount : UP.Count;
+ if (Runtime && Count == 0 && TripCount == 0)
Count = UnrollRuntimeCount;
if (Count == 0) {
@@ -204,7 +225,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (TripCount != 1 && Size > Threshold) {
DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
<< " because size: " << Size << ">" << Threshold << "\n");
- if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
+ bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
+ if (!AllowPartial && !(Runtime && TripCount == 0)) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
@@ -215,7 +237,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
while (Count != 0 && TripCount%Count != 0)
Count--;
}
- else if (UnrollRuntime) {
+ else if (Runtime) {
// Reduce unroll count to be a lower power-of-two value
while (Count != 0 && Size > Threshold) {
Count >>= 1;
@@ -231,7 +253,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, &LPM))
return false;
return true;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 59aff31..c4ebfd5 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -212,8 +212,6 @@ namespace {
Instruction *InsertPt);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
- void RemoveBlockIfDead(BasicBlock *BB,
- std::vector<Instruction*> &Worklist, Loop *l);
void RemoveLoopFromHierarchy(Loop *L);
bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
BasicBlock **LoopExit = 0);
@@ -946,114 +944,6 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
++NumSimplify;
}
-/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
-/// information, and remove any dead successors it has.
-///
-void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
- std::vector<Instruction*> &Worklist,
- Loop *L) {
- if (pred_begin(BB) != pred_end(BB)) {
- // This block isn't dead, since an edge to BB was just removed, see if there
- // are any easy simplifications we can do now.
- if (BasicBlock *Pred = BB->getSinglePredecessor()) {
- // If it has one pred, fold phi nodes in BB.
- while (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
- ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
-
- // If this is the header of a loop and the only pred is the latch, we now
- // have an unreachable loop.
- if (Loop *L = LI->getLoopFor(BB))
- if (loopHeader == BB && L->contains(Pred)) {
- // Remove the branch from the latch to the header block, this makes
- // the header dead, which will make the latch dead (because the header
- // dominates the latch).
- LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
- Pred->getTerminator()->eraseFromParent();
- new UnreachableInst(BB->getContext(), Pred);
-
- // The loop is now broken, remove it from LI.
- RemoveLoopFromHierarchy(L);
-
- // Reprocess the header, which now IS dead.
- RemoveBlockIfDead(BB, Worklist, L);
- return;
- }
-
- // If pred ends in a uncond branch, add uncond branch to worklist so that
- // the two blocks will get merged.
- if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
- if (BI->isUnconditional())
- Worklist.push_back(BI);
- }
- return;
- }
-
- DEBUG(dbgs() << "Nuking dead block: " << *BB);
-
- // Remove the instructions in the basic block from the worklist.
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- RemoveFromWorklist(I, Worklist);
-
- // Anything that uses the instructions in this basic block should have their
- // uses replaced with undefs.
- // If I is not void type then replaceAllUsesWith undef.
- // This allows ValueHandlers and custom metadata to adjust itself.
- if (!I->getType()->isVoidTy())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- }
-
- // If this is the edge to the header block for a loop, remove the loop and
- // promote all subloops.
- if (Loop *BBLoop = LI->getLoopFor(BB)) {
- if (BBLoop->getLoopLatch() == BB) {
- RemoveLoopFromHierarchy(BBLoop);
- if (currentLoop == BBLoop) {
- currentLoop = 0;
- redoLoop = false;
- }
- }
- }
-
- // Remove the block from the loop info, which removes it from any loops it
- // was in.
- LI->removeBlock(BB);
-
- // Remove phi node entries in successors for this block.
- TerminatorInst *TI = BB->getTerminator();
- SmallVector<BasicBlock*, 4> Succs;
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
- Succs.push_back(TI->getSuccessor(i));
- TI->getSuccessor(i)->removePredecessor(BB);
- }
-
- // Unique the successors, remove anything with multiple uses.
- array_pod_sort(Succs.begin(), Succs.end());
- Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
-
- // Remove the basic block, including all of the instructions contained in it.
- LPM->deleteSimpleAnalysisValue(BB, L);
- BB->eraseFromParent();
- // Remove successor blocks here that are not dead, so that we know we only
- // have dead blocks in this list. Nondead blocks have a way of becoming dead,
- // then getting removed before we revisit them, which is badness.
- //
- for (unsigned i = 0; i != Succs.size(); ++i)
- if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
- // One exception is loop headers. If this block was the preheader for a
- // loop, then we DO want to visit the loop so the loop gets deleted.
- // We know that if the successor is a loop header, that this loop had to
- // be the preheader: the case where this was the latch block was handled
- // above and headers can only have two predecessors.
- if (!LI->isLoopHeader(Succs[i])) {
- Succs.erase(Succs.begin()+i);
- --i;
- }
- }
-
- for (unsigned i = 0, e = Succs.size(); i != e; ++i)
- RemoveBlockIfDead(Succs[i], Worklist, L);
-}
-
/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
/// become unwrapped, either because the backedge was deleted, or because the
/// edge into the header was removed. If the edge into the header from the
@@ -1262,23 +1152,6 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
continue;
}
- if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
- // Conditional branch. Turn it into an unconditional branch, then
- // remove dead blocks.
- continue; // FIXME: Enable.
-
- DEBUG(dbgs() << "Folded branch: " << *BI);
- BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
- BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
- DeadSucc->removePredecessor(BI->getParent(), true);
- Worklist.push_back(BranchInst::Create(LiveSucc, BI));
- LPM->deleteSimpleAnalysisValue(BI, L);
- BI->eraseFromParent();
- RemoveFromWorklist(BI, Worklist);
- ++NumSimplify;
-
- RemoveBlockIfDead(DeadSucc, Worklist, L);
- }
continue;
}
}
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 8f61ffd..9912d3d 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -170,14 +170,17 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
// pessimize the llvm optimizer.
//
// Since we don't have perfect knowledge here, make some assumptions: assume
- // the maximum GPR width is the same size as the pointer size and assume that
- // this width can be stored. If so, check to see whether we will end up
- // actually reducing the number of stores used.
+ // the maximum GPR width is the same size as the largest legal integer
+ // size. If so, check to see whether we will end up actually reducing the
+ // number of stores used.
unsigned Bytes = unsigned(End-Start);
- unsigned NumPointerStores = Bytes/TD.getPointerSize();
+ unsigned MaxIntSize = TD.getLargestLegalIntTypeSize();
+ if (MaxIntSize == 0)
+ MaxIntSize = 1;
+ unsigned NumPointerStores = Bytes / MaxIntSize;
// Assume the remaining bytes if any are done a byte at a time.
- unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+ unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
diff --git a/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index de3f09c..15cee44 100644
--- a/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -1,4 +1,4 @@
-//===---- MipsOptimizeMathLibCalls.cpp - Optimize math lib calls. ----===//
+//===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,76 +7,60 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass does an IR transformation which enables the backend to emit native
-// math instructions.
+// This pass tries to partially inline the fast path of well-known library
+// functions, such as using square-root instructions for cases where sqrt()
+// does not need to set errno.
//
//===----------------------------------------------------------------------===//
-#include "MipsTargetMachine.h"
+#define DEBUG_TYPE "partially-inline-libcalls"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
-static cl::opt<bool> DisableOpt("disable-mips-math-optimization",
- cl::init(false),
- cl::desc("MIPS: Disable math lib call "
- "optimization."), cl::Hidden);
-
namespace {
- class MipsOptimizeMathLibCalls : public FunctionPass {
+ class PartiallyInlineLibCalls : public FunctionPass {
public:
static char ID;
- MipsOptimizeMathLibCalls(MipsTargetMachine &TM_) :
- FunctionPass(ID), TM(TM_) {}
-
- virtual const char *getPassName() const {
- return "MIPS: Optimize calls to math library functions.";
+ PartiallyInlineLibCalls() :
+ FunctionPass(ID) {
+ initializePartiallyInlineLibCallsPass(*PassRegistry::getPassRegistry());
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
virtual bool runOnFunction(Function &F);
private:
/// Optimize calls to sqrt.
bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
- BasicBlock &CurrBB,
- Function::iterator &BB);
-
- const TargetMachine &TM;
+ BasicBlock &CurrBB, Function::iterator &BB);
};
- char MipsOptimizeMathLibCalls::ID = 0;
+ char PartiallyInlineLibCalls::ID = 0;
}
-FunctionPass *llvm::createMipsOptimizeMathLibCalls(MipsTargetMachine &TM) {
- return new MipsOptimizeMathLibCalls(TM);
-}
+INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
+ "Partially inline calls to library functions", false, false)
-void MipsOptimizeMathLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
+void PartiallyInlineLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetTransformInfo>();
FunctionPass::getAnalysisUsage(AU);
}
-bool MipsOptimizeMathLibCalls::runOnFunction(Function &F) {
- if (DisableOpt)
- return false;
-
- const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
-
- if (Subtarget.inMips16Mode())
- return false;
-
+bool PartiallyInlineLibCalls::runOnFunction(Function &F) {
bool Changed = false;
Function::iterator CurrBB;
- const TargetLibraryInfo *LibInfo = &getAnalysis<TargetLibraryInfo>();
-
+ TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfo>();
for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
CurrBB = BB++;
@@ -88,25 +72,18 @@ bool MipsOptimizeMathLibCalls::runOnFunction(Function &F) {
if (!Call || !(CalledFunc = Call->getCalledFunction()))
continue;
- LibFunc::Func LibFunc;
- Attribute A = CalledFunc->getAttributes()
- .getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
-
- // Skip if function has "use-soft-float" attribute.
- if ((A.isStringAttribute() && (A.getValueAsString() == "true")) ||
- TM.Options.UseSoftFloat)
- continue;
-
// Skip if function either has local linkage or is not a known library
// function.
+ LibFunc::Func LibFunc;
if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
- !LibInfo->getLibFunc(CalledFunc->getName(), LibFunc))
+ !TLI->getLibFunc(CalledFunc->getName(), LibFunc))
continue;
switch (LibFunc) {
case LibFunc::sqrtf:
case LibFunc::sqrt:
- if (optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
+ if (TTI->haveFastSqrt(Call->getType()) &&
+ optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
break;
continue;
default:
@@ -121,10 +98,10 @@ bool MipsOptimizeMathLibCalls::runOnFunction(Function &F) {
return Changed;
}
-bool MipsOptimizeMathLibCalls::optimizeSQRT(CallInst *Call,
- Function *CalledFunc,
- BasicBlock &CurrBB,
- Function::iterator &BB) {
+bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
+ Function *CalledFunc,
+ BasicBlock &CurrBB,
+ Function::iterator &BB) {
// There is no need to change the IR, since backend will emit sqrt
// instruction if the call has already been marked read-only.
if (Call->onlyReadsMemory())
@@ -173,3 +150,7 @@ bool MipsOptimizeMathLibCalls::optimizeSQRT(CallInst *Call,
BB = JoinBB;
return true;
}
+
+FunctionPass *llvm::createPartiallyInlineLibCallsPass() {
+ return new PartiallyInlineLibCalls();
+}
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 5c55143..9f3fc83 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -733,9 +733,9 @@ class AllocaPromoter : public LoadAndStorePromoter {
SmallVector<DbgValueInst *, 4> DVIs;
public:
- AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ AllocaPromoter(const SmallVectorImpl<Instruction *> &Insts, SSAUpdater &S,
AllocaInst &AI, DIBuilder &DIB)
- : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
+ : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
void run(const SmallVectorImpl<Instruction*> &Insts) {
// Retain the debug information attached to the alloca for use when
@@ -762,9 +762,30 @@ public:
virtual bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &Insts) const {
+ Value *Ptr;
if (LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->getOperand(0) == &AI;
- return cast<StoreInst>(I)->getPointerOperand() == &AI;
+ Ptr = LI->getOperand(0);
+ else
+ Ptr = cast<StoreInst>(I)->getPointerOperand();
+
+ // Only used to detect cycles, which will be rare and quickly found as
+ // we're walking up a chain of defs rather than down through uses.
+ SmallPtrSet<Value *, 4> Visited;
+
+ do {
+ if (Ptr == &AI)
+ return true;
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr))
+ Ptr = BCI->getOperand(0);
+ else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
+ Ptr = GEPI->getPointerOperand();
+ else
+ return false;
+
+ } while (Visited.insert(Ptr));
+
+ return false;
}
virtual void updateDebugInfo(Instruction *Inst) const {
@@ -917,6 +938,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
AllocaSlices::const_iterator E,
uint64_t EndOffset) {
Type *Ty = 0;
+ bool IgnoreNonIntegralTypes = false;
for (AllocaSlices::const_iterator I = B; I != E; ++I) {
Use *U = I->getUse();
if (isa<IntrinsicInst>(*U->getUser()))
@@ -925,29 +947,37 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
continue;
Type *UserTy = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
UserTy = LI->getType();
- else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
UserTy = SI->getValueOperand()->getType();
- else
- return 0; // Bail if we have weird uses.
+ } else {
+ IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
+ continue;
+ }
if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
// If the type is larger than the partition, skip it. We only encounter
// this for split integer operations where we want to use the type of the
- // entity causing the split.
- if (ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
+ // entity causing the split. Also skip if the type is not a byte width
+ // multiple.
+ if (ITy->getBitWidth() % 8 != 0 ||
+ ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
continue;
// If we have found an integer type use covering the alloca, use that
- // regardless of the other types, as integers are often used for a
- // "bucket
- // of bits" type.
+ // regardless of the other types, as integers are often used for
+ // a "bucket of bits" type.
+ //
+ // NB: This *must* be the only return from inside the loop so that the
+ // order of slices doesn't impact the computed type.
return ITy;
+ } else if (IgnoreNonIntegralTypes) {
+ continue;
}
if (Ty && Ty != UserTy)
- return 0;
+ IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
Ty = UserTy;
}
@@ -1431,6 +1461,10 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
return false;
+ // We can convert pointers to integers and vice-versa. Same for vectors
+ // of pointers and integers.
+ OldTy = OldTy->getScalarType();
+ NewTy = NewTy->getScalarType();
if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
if (NewTy->isPointerTy() && OldTy->isPointerTy())
return true;
@@ -1449,21 +1483,53 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
/// two types for viability with this routine.
static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
- Type *Ty) {
- assert(canConvertValue(DL, V->getType(), Ty) &&
- "Value not convertable to type");
- if (V->getType() == Ty)
+ Type *NewTy) {
+ Type *OldTy = V->getType();
+ assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
+
+ if (OldTy == NewTy)
return V;
- if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
- if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
if (NewITy->getBitWidth() > OldITy->getBitWidth())
return IRB.CreateZExt(V, NewITy);
- if (V->getType()->isIntegerTy() && Ty->isPointerTy())
- return IRB.CreateIntToPtr(V, Ty);
- if (V->getType()->isPointerTy() && Ty->isIntegerTy())
- return IRB.CreatePtrToInt(V, Ty);
- return IRB.CreateBitCast(V, Ty);
+ // See if we need inttoptr for this type pair. A cast involving both scalars
+ // and vectors requires and additional bitcast.
+ if (OldTy->getScalarType()->isIntegerTy() &&
+ NewTy->getScalarType()->isPointerTy()) {
+ // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
+ if (OldTy->isVectorTy() && !NewTy->isVectorTy())
+ return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
+ NewTy);
+
+ // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
+ if (!OldTy->isVectorTy() && NewTy->isVectorTy())
+ return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
+ NewTy);
+
+ return IRB.CreateIntToPtr(V, NewTy);
+ }
+
+ // See if we need ptrtoint for this type pair. A cast involving both scalars
+ // and vectors requires and additional bitcast.
+ if (OldTy->getScalarType()->isPointerTy() &&
+ NewTy->getScalarType()->isIntegerTy()) {
+ // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
+ if (OldTy->isVectorTy() && !NewTy->isVectorTy())
+ return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
+ NewTy);
+
+ // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
+ if (!OldTy->isVectorTy() && NewTy->isVectorTy())
+ return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
+ NewTy);
+
+ return IRB.CreatePtrToInt(V, NewTy);
+ }
+
+ return IRB.CreateBitCast(V, NewTy);
}
/// \brief Test whether the given slice use can be promoted to a vector.
@@ -3364,12 +3430,12 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
}
static void enqueueUsersInWorklist(Instruction &I,
- SmallVectorImpl<Use *> &UseWorklist,
- SmallPtrSet<Use *, 8> &VisitedUses) {
+ SmallVectorImpl<Instruction *> &Worklist,
+ SmallPtrSet<Instruction *, 8> &Visited) {
for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
++UI)
- if (VisitedUses.insert(&UI.getUse()))
- UseWorklist.push_back(&UI.getUse());
+ if (Visited.insert(cast<Instruction>(*UI)))
+ Worklist.push_back(cast<Instruction>(*UI));
}
/// \brief Promote the allocas, using the best available technique.
@@ -3388,7 +3454,7 @@ bool SROA::promoteAllocas(Function &F) {
if (DT && !ForceSSAUpdater) {
DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
- PromoteMemToReg(PromotableAllocas, *DT, DL);
+ PromoteMemToReg(PromotableAllocas, *DT);
PromotableAllocas.clear();
return true;
}
@@ -3396,29 +3462,29 @@ bool SROA::promoteAllocas(Function &F) {
DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
SSAUpdater SSA;
DIBuilder DIB(*F.getParent());
- SmallVector<Instruction*, 64> Insts;
+ SmallVector<Instruction *, 64> Insts;
// We need a worklist to walk the uses of each alloca.
- SmallVector<Use *, 8> UseWorklist;
- SmallPtrSet<Use *, 8> VisitedUses;
+ SmallVector<Instruction *, 8> Worklist;
+ SmallPtrSet<Instruction *, 8> Visited;
SmallVector<Instruction *, 32> DeadInsts;
for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
AllocaInst *AI = PromotableAllocas[Idx];
- UseWorklist.clear();
- VisitedUses.clear();
+ Insts.clear();
+ Worklist.clear();
+ Visited.clear();
- enqueueUsersInWorklist(*AI, UseWorklist, VisitedUses);
+ enqueueUsersInWorklist(*AI, Worklist, Visited);
- while (!UseWorklist.empty()) {
- Use *U = UseWorklist.pop_back_val();
- Instruction &I = *cast<Instruction>(U->getUser());
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
// FIXME: Currently the SSAUpdater infrastructure doesn't reason about
// lifetime intrinsics and so we strip them (and the bitcasts+GEPs
// leading to them) here. Eventually it should use them to optimize the
// scalar values produced.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end);
II->eraseFromParent();
@@ -3428,12 +3494,12 @@ bool SROA::promoteAllocas(Function &F) {
// Push the loads and stores we find onto the list. SROA will already
// have validated that all loads and stores are viable candidates for
// promotion.
- if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
assert(LI->getType() == AI->getAllocatedType());
Insts.push_back(LI);
continue;
}
- if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
assert(SI->getValueOperand()->getType() == AI->getAllocatedType());
Insts.push_back(SI);
continue;
@@ -3442,11 +3508,10 @@ bool SROA::promoteAllocas(Function &F) {
// For everything else, we know that only no-op bitcasts and GEPs will
// make it this far, just recurse through them and recall them for later
// removal.
- DeadInsts.push_back(&I);
- enqueueUsersInWorklist(I, UseWorklist, VisitedUses);
+ DeadInsts.push_back(I);
+ enqueueUsersInWorklist(*I, Worklist, Visited);
}
AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
- Insts.clear();
while (!DeadInsts.empty())
DeadInsts.pop_back_val()->eraseFromParent();
AI->eraseFromParent();
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
new file mode 100644
index 0000000..9bcd702
--- /dev/null
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -0,0 +1,479 @@
+//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleProfileLoader transformation. This pass
+// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
+// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
+// profile information in the given profile.
+//
+// This pass generates branch weight annotations on the IR:
+//
+// - prof: Represents branch weights. This annotation is added to branches
+// to indicate the weights of each edge coming out of the branch.
+// The weight of each edge is the weight of the target block for
+// that edge. The weight of a block B is computed as the maximum
+// number of samples found in B.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sample-profile"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+// Command line option to specify the file to read samples from. This is
+// mainly used for debugging.
+static cl::opt<std::string> SampleProfileFile(
+ "sample-profile-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
+
+namespace {
+/// \brief Sample-based profile reader.
+///
+/// Each profile contains sample counts for all the functions
+/// executed. Inside each function, statements are annotated with the
+/// collected samples on all the instructions associated with that
+/// statement.
+///
+/// For this to produce meaningful data, the program needs to be
+/// compiled with some debug information (at minimum, line numbers:
+/// -gline-tables-only). Otherwise, it will be impossible to match IR
+/// instructions to the line numbers collected by the profiler.
+///
+/// From the profile file, we are interested in collecting the
+/// following information:
+///
+/// * A list of functions included in the profile (mangled names).
+///
+/// * For each function F:
+/// 1. The total number of samples collected in F.
+///
+/// 2. The samples collected at each line in F. To provide some
+/// protection against source code shuffling, line numbers should
+/// be relative to the start of the function.
+class SampleProfile {
+public:
+ SampleProfile(StringRef F) : Profiles(0), Filename(F) {}
+
+ void dump();
+ void loadText();
+ void loadNative() { llvm_unreachable("not implemented"); }
+ bool emitAnnotations(Function &F);
+ void printFunctionProfile(raw_ostream &OS, StringRef FName);
+ void dumpFunctionProfile(StringRef FName);
+
+protected:
+ typedef DenseMap<uint32_t, uint32_t> BodySampleMap;
+ typedef DenseMap<BasicBlock *, uint32_t> BlockWeightMap;
+
+ /// \brief Representation of the runtime profile for a function.
+ ///
+ /// This data structure contains the runtime profile for a given
+ /// function. It contains the total number of samples collected
+ /// in the function and a map of samples collected in every statement.
+ struct FunctionProfile {
+ /// \brief Total number of samples collected inside this function.
+ ///
+ /// Samples are cumulative, they include all the samples collected
+ /// inside this function and all its inlined callees.
+ unsigned TotalSamples;
+
+ // \brief Total number of samples collected at the head of the function.
+ unsigned TotalHeadSamples;
+
+ /// \brief Map line offsets to collected samples.
+ ///
+ /// Each entry in this map contains the number of samples
+ /// collected at the corresponding line offset. All line locations
+ /// are an offset from the start of the function.
+ BodySampleMap BodySamples;
+
+ /// \brief Map basic blocks to their computed weights.
+ ///
+ /// The weight of a basic block is defined to be the maximum
+ /// of all the instruction weights in that block.
+ BlockWeightMap BlockWeights;
+ };
+
+ uint32_t getInstWeight(Instruction &I, unsigned FirstLineno,
+ BodySampleMap &BodySamples);
+ uint32_t computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
+ BodySampleMap &BodySamples);
+
+ /// \brief Map every function to its associated profile.
+ ///
+ /// The profile of every function executed at runtime is collected
+ /// in the structure FunctionProfile. This maps function objects
+ /// to their corresponding profiles.
+ StringMap<FunctionProfile> Profiles;
+
+ /// \brief Path name to the file holding the profile data.
+ ///
+ /// The format of this file is defined by each profiler
+ /// independently. If possible, the profiler should have a text
+ /// version of the profile format to be used in constructing test
+ /// cases and debugging.
+ StringRef Filename;
+};
+
+/// \brief Loader class for text-based profiles.
+///
+/// This class defines a simple interface to read text files containing
+/// profiles. It keeps track of line number information and location of
+/// the file pointer. Users of this class are responsible for actually
+/// parsing the lines returned by the readLine function.
+///
+/// TODO - This does not really belong here. It is a generic text file
+/// reader. It should be moved to the Support library and made more general.
+class ExternalProfileTextLoader {
+public:
+ ExternalProfileTextLoader(StringRef F) : Filename(F) {
+ error_code EC;
+ EC = MemoryBuffer::getFile(Filename, Buffer);
+ if (EC)
+ report_fatal_error("Could not open profile file " + Filename + ": " +
+ EC.message());
+ FP = Buffer->getBufferStart();
+ Lineno = 0;
+ }
+
+ /// \brief Read a line from the mapped file.
+ StringRef readLine() {
+ size_t Length = 0;
+ const char *start = FP;
+ while (FP != Buffer->getBufferEnd() && *FP != '\n') {
+ Length++;
+ FP++;
+ }
+ if (FP != Buffer->getBufferEnd())
+ FP++;
+ Lineno++;
+ return StringRef(start, Length);
+ }
+
+ /// \brief Return true, if we've reached EOF.
+ bool atEOF() const { return FP == Buffer->getBufferEnd(); }
+
+ /// \brief Report a parse error message and stop compilation.
+ void reportParseError(Twine Msg) const {
+ report_fatal_error(Filename + ":" + Twine(Lineno) + ": " + Msg + "\n");
+ }
+
+private:
+ /// \brief Memory buffer holding the text file.
+ OwningPtr<MemoryBuffer> Buffer;
+
+ /// \brief Current position into the memory buffer.
+ const char *FP;
+
+ /// \brief Current line number.
+ int64_t Lineno;
+
+ /// \brief Path name where to the profile file.
+ StringRef Filename;
+};
+
+/// \brief Sample profile pass.
+///
+/// This pass reads profile data from the file specified by
+/// -sample-profile-file and annotates every affected function with the
+/// profile information found in that file.
+class SampleProfileLoader : public FunctionPass {
+public:
+ // Class identification, replacement for typeinfo
+ static char ID;
+
+ SampleProfileLoader(StringRef Name = SampleProfileFile)
+ : FunctionPass(ID), Profiler(0), Filename(Name) {
+ initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool doInitialization(Module &M);
+
+ void dump() { Profiler->dump(); }
+
+ virtual const char *getPassName() const { return "Sample profile pass"; }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+
+protected:
+ /// \brief Profile reader object.
+ OwningPtr<SampleProfile> Profiler;
+
+ /// \brief Name of the profile file to load.
+ StringRef Filename;
+};
+}
+
+/// \brief Print the function profile for \p FName on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param FName Name of the function to print.
+void SampleProfile::printFunctionProfile(raw_ostream &OS, StringRef FName) {
+ FunctionProfile FProfile = Profiles[FName];
+ OS << "Function: " << FName << ", " << FProfile.TotalSamples << ", "
+ << FProfile.TotalHeadSamples << ", " << FProfile.BodySamples.size()
+ << " sampled lines\n";
+ for (BodySampleMap::const_iterator SI = FProfile.BodySamples.begin(),
+ SE = FProfile.BodySamples.end();
+ SI != SE; ++SI)
+ OS << "\tline offset: " << SI->first
+ << ", number of samples: " << SI->second << "\n";
+ OS << "\n";
+}
+
+/// \brief Dump the function profile for \p FName.
+///
+/// \param FName Name of the function to print.
+void SampleProfile::dumpFunctionProfile(StringRef FName) {
+ printFunctionProfile(dbgs(), FName);
+}
+
+/// \brief Dump all the function profiles found.
+void SampleProfile::dump() {
+ for (StringMap<FunctionProfile>::const_iterator I = Profiles.begin(),
+ E = Profiles.end();
+ I != E; ++I)
+ dumpFunctionProfile(I->getKey());
+}
+
+/// \brief Load samples from a text file.
+///
+/// The file is divided in two segments:
+///
+/// Symbol table (represented with the string "symbol table")
+/// Number of symbols in the table
+/// symbol 1
+/// symbol 2
+/// ...
+/// symbol N
+///
+/// Function body profiles
+/// function1:total_samples:total_head_samples:number_of_locations
+/// location_offset_1: number_of_samples
+/// location_offset_2: number_of_samples
+/// ...
+/// location_offset_N: number_of_samples
+///
+/// Function names must be mangled in order for the profile loader to
+/// match them in the current translation unit.
+///
+/// Since this is a flat profile, a function that shows up more than
+/// once gets all its samples aggregated across all its instances.
+/// TODO - flat profiles are too imprecise to provide good optimization
+/// opportunities. Convert them to context-sensitive profile.
+///
+/// This textual representation is useful to generate unit tests and
+/// for debugging purposes, but it should not be used to generate
+/// profiles for large programs, as the representation is extremely
+/// inefficient.
+void SampleProfile::loadText() {
+ ExternalProfileTextLoader Loader(Filename);
+
+ // Read the symbol table.
+ StringRef Line = Loader.readLine();
+ if (Line != "symbol table")
+ Loader.reportParseError("Expected 'symbol table', found " + Line);
+ int NumSymbols;
+ Line = Loader.readLine();
+ if (Line.getAsInteger(10, NumSymbols))
+ Loader.reportParseError("Expected a number, found " + Line);
+ for (int I = 0; I < NumSymbols; I++) {
+ StringRef FName = Loader.readLine();
+ FunctionProfile &FProfile = Profiles[FName];
+ FProfile.BodySamples.clear();
+ FProfile.TotalSamples = 0;
+ FProfile.TotalHeadSamples = 0;
+ }
+
+ // Read the profile of each function. Since each function may be
+ // mentioned more than once, and we are collecting flat profiles,
+ // accumulate samples as we parse them.
+ Regex HeadRE("^([^:]+):([0-9]+):([0-9]+):([0-9]+)$");
+ Regex LineSample("^([0-9]+): ([0-9]+)$");
+ while (!Loader.atEOF()) {
+ SmallVector<StringRef, 4> Matches;
+ Line = Loader.readLine();
+ if (!HeadRE.match(Line, &Matches))
+ Loader.reportParseError("Expected 'mangled_name:NUM:NUM:NUM', found " +
+ Line);
+ assert(Matches.size() == 5);
+ StringRef FName = Matches[1];
+ unsigned NumSamples, NumHeadSamples, NumSampledLines;
+ Matches[2].getAsInteger(10, NumSamples);
+ Matches[3].getAsInteger(10, NumHeadSamples);
+ Matches[4].getAsInteger(10, NumSampledLines);
+ FunctionProfile &FProfile = Profiles[FName];
+ FProfile.TotalSamples += NumSamples;
+ FProfile.TotalHeadSamples += NumHeadSamples;
+ BodySampleMap &SampleMap = FProfile.BodySamples;
+ unsigned I;
+ for (I = 0; I < NumSampledLines && !Loader.atEOF(); I++) {
+ Line = Loader.readLine();
+ if (!LineSample.match(Line, &Matches))
+ Loader.reportParseError("Expected 'NUM: NUM', found " + Line);
+ assert(Matches.size() == 3);
+ unsigned LineOffset, NumSamples;
+ Matches[1].getAsInteger(10, LineOffset);
+ Matches[2].getAsInteger(10, NumSamples);
+ SampleMap[LineOffset] += NumSamples;
+ }
+
+ if (I < NumSampledLines)
+ Loader.reportParseError("Unexpected end of file");
+ }
+}
+
+/// \brief Get the weight for an instruction.
+///
+/// The "weight" of an instruction \p Inst is the number of samples
+/// collected on that instruction at runtime. To retrieve it, we
+/// need to compute the line number of \p Inst relative to the start of its
+/// function. We use \p FirstLineno to compute the offset. We then
+/// look up the samples collected for \p Inst using \p BodySamples.
+///
+/// \param Inst Instruction to query.
+/// \param FirstLineno Line number of the first instruction in the function.
+/// \param BodySamples Map of relative source line locations to samples.
+///
+/// \returns The profiled weight of I.
+uint32_t SampleProfile::getInstWeight(Instruction &Inst, unsigned FirstLineno,
+ BodySampleMap &BodySamples) {
+ unsigned LOffset = Inst.getDebugLoc().getLine() - FirstLineno + 1;
+ return BodySamples.lookup(LOffset);
+}
+
+/// \brief Compute the weight of a basic block.
+///
+/// The weight of basic block \p B is the maximum weight of all the
+/// instructions in B.
+///
+/// \param B The basic block to query.
+/// \param FirstLineno The line number for the first line in the
+/// function holding B.
+/// \param BodySamples The map containing all the samples collected in that
+/// function.
+///
+/// \returns The computed weight of B.
+uint32_t SampleProfile::computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
+ BodySampleMap &BodySamples) {
+ // If we've computed B's weight before, return it.
+ Function *F = B->getParent();
+ FunctionProfile &FProfile = Profiles[F->getName()];
+ std::pair<BlockWeightMap::iterator, bool> Entry =
+ FProfile.BlockWeights.insert(std::make_pair(B, 0));
+ if (!Entry.second)
+ return Entry.first->second;
+
+ // Otherwise, compute and cache B's weight.
+ uint32_t Weight = 0;
+ for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
+ uint32_t InstWeight = getInstWeight(*I, FirstLineno, BodySamples);
+ if (InstWeight > Weight)
+ Weight = InstWeight;
+ }
+ Entry.first->second = Weight;
+ return Weight;
+}
+
+/// \brief Generate branch weight metadata for all branches in \p F.
+///
+/// For every branch instruction B in \p F, we compute the weight of the
+/// target block for each of the edges out of B. This is the weight
+/// that we associate with that branch.
+///
+/// TODO - This weight assignment will most likely be wrong if the
+/// target branch has more than two predecessors. This needs to be done
+/// using some form of flow propagation.
+///
+/// Once all the branch weights are computed, we emit the MD_prof
+/// metadata on B using the computed values.
+///
+/// \param F The function to query.
+bool SampleProfile::emitAnnotations(Function &F) {
+ bool Changed = false;
+ FunctionProfile &FProfile = Profiles[F.getName()];
+ unsigned FirstLineno = inst_begin(F)->getDebugLoc().getLine();
+ MDBuilder MDB(F.getContext());
+
+ // Clear the block weights cache.
+ FProfile.BlockWeights.clear();
+
+ // When we find a branch instruction: For each edge E out of the branch,
+ // the weight of E is the weight of the target block.
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *B = I;
+ TerminatorInst *TI = B->getTerminator();
+ if (TI->getNumSuccessors() == 1)
+ continue;
+ if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+ continue;
+
+ SmallVector<uint32_t, 4> Weights;
+ unsigned NSuccs = TI->getNumSuccessors();
+ for (unsigned I = 0; I < NSuccs; ++I) {
+ BasicBlock *Succ = TI->getSuccessor(I);
+ uint32_t Weight =
+ computeBlockWeight(Succ, FirstLineno, FProfile.BodySamples);
+ Weights.push_back(Weight);
+ }
+
+ TI->setMetadata(llvm::LLVMContext::MD_prof,
+ MDB.createBranchWeights(Weights));
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+char SampleProfileLoader::ID = 0;
+INITIALIZE_PASS(SampleProfileLoader, "sample-profile", "Sample Profile loader",
+ false, false)
+
+bool SampleProfileLoader::runOnFunction(Function &F) {
+ return Profiler->emitAnnotations(F);
+}
+
+bool SampleProfileLoader::doInitialization(Module &M) {
+ Profiler.reset(new SampleProfile(Filename));
+ Profiler->loadText();
+ return true;
+}
+
+FunctionPass *llvm::createSampleProfileLoaderPass() {
+ return new SampleProfileLoader(SampleProfileFile);
+}
+
+FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
+ return new SampleProfileLoader(Name);
+}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 758334d..857597e 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCEPass(Registry);
- initializeBlockPlacementPass(Registry);
+ initializeSampleProfileLoaderPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeConstantPropagationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);
@@ -44,12 +44,14 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopInstSimplifyPass(Registry);
initializeLoopRotatePass(Registry);
initializeLoopStrengthReducePass(Registry);
+ initializeLoopRerollPass(Registry);
initializeLoopUnrollPass(Registry);
initializeLoopUnswitchPass(Registry);
initializeLoopIdiomRecognizePass(Registry);
initializeLowerAtomicPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeMemCpyOptPass(Registry);
+ initializePartiallyInlineLibCallsPass(Registry);
initializeReassociatePass(Registry);
initializeRegToMemPass(Registry);
initializeSCCPPass(Registry);
@@ -111,6 +113,10 @@ void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopRotatePass());
}
+void LLVMAddLoopRerollPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopRerollPass());
+}
+
void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopUnrollPass());
}
@@ -123,6 +129,10 @@ void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createMemCpyOptPass());
}
+void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPartiallyInlineLibCallsPass());
+}
+
void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createPromoteMemoryToRegisterPass());
}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 73b2edf..57b290e 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -963,7 +963,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
else if (SV->getType()->isPointerTy())
- SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
+ SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getType()));
// Zero extend or truncate the value if needed.
if (SV->getType() != AllocaType) {
@@ -1426,7 +1426,7 @@ bool SROA::performPromotion(Function &F) {
if (Allocas.empty()) break;
if (HasDomTree)
- PromoteMemToReg(Allocas, *DT, TD);
+ PromoteMemToReg(Allocas, *DT);
else {
SSAUpdater SSA;
for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 6d05640..8371f6d 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -66,161 +66,6 @@ FunctionPass *llvm::createCFGSimplificationPass() {
return new CFGSimplifyPass();
}
-/// changeToUnreachable - Insert an unreachable instruction before the specified
-/// instruction, making it and the rest of the code in the block dead.
-static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
- BasicBlock *BB = I->getParent();
- // Loop over all of the successors, removing BB's entry from any PHI
- // nodes.
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- (*SI)->removePredecessor(BB);
-
- // Insert a call to llvm.trap right before this. This turns the undefined
- // behavior into a hard fail instead of falling through into random code.
- if (UseLLVMTrap) {
- Function *TrapFn =
- Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
- CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
- CallTrap->setDebugLoc(I->getDebugLoc());
- }
- new UnreachableInst(I->getContext(), I);
-
- // All instructions after this are dead.
- BasicBlock::iterator BBI = I, BBE = BB->end();
- while (BBI != BBE) {
- if (!BBI->use_empty())
- BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
- BB->getInstList().erase(BBI++);
- }
-}
-
-/// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II) {
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
- CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
- NewCall->takeName(II);
- NewCall->setCallingConv(II->getCallingConv());
- NewCall->setAttributes(II->getAttributes());
- NewCall->setDebugLoc(II->getDebugLoc());
- II->replaceAllUsesWith(NewCall);
-
- // Follow the call by a branch to the normal destination.
- BranchInst::Create(II->getNormalDest(), II);
-
- // Update PHI nodes in the unwind destination
- II->getUnwindDest()->removePredecessor(II->getParent());
- II->eraseFromParent();
-}
-
-static bool markAliveBlocks(BasicBlock *BB,
- SmallPtrSet<BasicBlock*, 128> &Reachable) {
-
- SmallVector<BasicBlock*, 128> Worklist;
- Worklist.push_back(BB);
- Reachable.insert(BB);
- bool Changed = false;
- do {
- BB = Worklist.pop_back_val();
-
- // Do a quick scan of the basic block, turning any obviously unreachable
- // instructions into LLVM unreachable insts. The instruction combining pass
- // canonicalizes unreachable insts into stores to null or undef.
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
- if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
- if (CI->doesNotReturn()) {
- // If we found a call to a no-return function, insert an unreachable
- // instruction after it. Make sure there isn't *already* one there
- // though.
- ++BBI;
- if (!isa<UnreachableInst>(BBI)) {
- // Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(BBI, false);
- Changed = true;
- }
- break;
- }
- }
-
- // Store to undef and store to null are undefined and used to signal that
- // they should be changed to unreachable by passes that can't modify the
- // CFG.
- if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
- // Don't touch volatile stores.
- if (SI->isVolatile()) continue;
-
- Value *Ptr = SI->getOperand(1);
-
- if (isa<UndefValue>(Ptr) ||
- (isa<ConstantPointerNull>(Ptr) &&
- SI->getPointerAddressSpace() == 0)) {
- changeToUnreachable(SI, true);
- Changed = true;
- break;
- }
- }
- }
-
- // Turn invokes that call 'nounwind' functions into ordinary calls.
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- Value *Callee = II->getCalledValue();
- if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
- changeToUnreachable(II, true);
- Changed = true;
- } else if (II->doesNotThrow()) {
- if (II->use_empty() && II->onlyReadsMemory()) {
- // jump to the normal destination branch.
- BranchInst::Create(II->getNormalDest(), II);
- II->getUnwindDest()->removePredecessor(II->getParent());
- II->eraseFromParent();
- } else
- changeToCall(II);
- Changed = true;
- }
- }
-
- Changed |= ConstantFoldTerminator(BB, true);
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- if (Reachable.insert(*SI))
- Worklist.push_back(*SI);
- } while (!Worklist.empty());
- return Changed;
-}
-
-/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
-/// if they are in a dead cycle. Return true if a change was made, false
-/// otherwise.
-static bool removeUnreachableBlocksFromFn(Function &F) {
- SmallPtrSet<BasicBlock*, 128> Reachable;
- bool Changed = markAliveBlocks(F.begin(), Reachable);
-
- // If there are unreachable blocks in the CFG...
- if (Reachable.size() == F.size())
- return Changed;
-
- assert(Reachable.size() < F.size());
- NumSimpl += F.size()-Reachable.size();
-
- // Loop over all of the basic blocks that are not reachable, dropping all of
- // their internal references...
- for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
- if (Reachable.count(BB))
- continue;
-
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- if (Reachable.count(*SI))
- (*SI)->removePredecessor(BB);
- BB->dropAllReferences();
- }
-
- for (Function::iterator I = ++F.begin(); I != F.end();)
- if (!Reachable.count(I))
- I = F.getBasicBlockList().erase(I);
- else
- ++I;
-
- return true;
-}
-
/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
/// node) return blocks, merge them together to promote recursive block merging.
static bool mergeEmptyReturnBlocks(Function &F) {
@@ -325,7 +170,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
bool CFGSimplifyPass::runOnFunction(Function &F) {
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
- bool EverChanged = removeUnreachableBlocksFromFn(F);
+ bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
@@ -333,16 +178,16 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
if (!EverChanged) return false;
// iterativelySimplifyCFG can (rarely) make some loops dead. If this happens,
- // removeUnreachableBlocksFromFn is needed to nuke them, which means we should
+ // removeUnreachableBlocks is needed to nuke them, which means we should
// iterate between the two optimizations. We structure the code like this to
// avoid reruning iterativelySimplifyCFG if the second pass of
- // removeUnreachableBlocksFromFn doesn't do anything.
- if (!removeUnreachableBlocksFromFn(F))
+ // removeUnreachableBlocks doesn't do anything.
+ if (!removeUnreachableBlocks(F))
return true;
do {
EverChanged = iterativelySimplifyCFG(F, TTI, TD);
- EverChanged |= removeUnreachableBlocksFromFn(F);
+ EverChanged |= removeUnreachableBlocks(F);
} while (EverChanged);
return true;
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index bb6f163..5045ff8 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -231,7 +231,7 @@ public:
StructurizeCFG() :
RegionPass(ID) {
- initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+ initializeStructurizeCFGPass(*PassRegistry::getPassRegistry());
}
using Pass::doInitialization;
@@ -244,6 +244,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();
RegionPass::getAnalysisUsage(AU);
@@ -256,6 +257,7 @@ char StructurizeCFG::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
false, false)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(RegionInfo)
INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG",
@@ -321,21 +323,32 @@ Value *StructurizeCFG::invert(Value *Condition) {
if (match(Condition, m_Not(m_Value(Condition))))
return Condition;
- // Third: Check all the users for an invert
- BasicBlock *Parent = cast<Instruction>(Condition)->getParent();
- for (Value::use_iterator I = Condition->use_begin(),
- E = Condition->use_end(); I != E; ++I) {
+ if (Instruction *Inst = dyn_cast<Instruction>(Condition)) {
+ // Third: Check all the users for an invert
+ BasicBlock *Parent = Inst->getParent();
+ for (Value::use_iterator I = Condition->use_begin(),
+ E = Condition->use_end(); I != E; ++I) {
- Instruction *User = dyn_cast<Instruction>(*I);
- if (!User || User->getParent() != Parent)
- continue;
+ Instruction *User = dyn_cast<Instruction>(*I);
+ if (!User || User->getParent() != Parent)
+ continue;
+
+ if (match(*I, m_Not(m_Specific(Condition))))
+ return *I;
+ }
- if (match(*I, m_Not(m_Specific(Condition))))
- return *I;
+ // Last option: Create a new instruction
+ return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
}
- // Last option: Create a new instruction
- return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
+ if (Argument *Arg = dyn_cast<Argument>(Condition)) {
+ BasicBlock &EntryBlock = Arg->getParent()->getEntryBlock();
+ return BinaryOperator::CreateNot(Condition,
+ Arg->getName() + ".inv",
+ EntryBlock.getTerminator());
+ }
+
+ llvm_unreachable("Unhandled condition to invert");
}
/// \brief Build the condition for one edge
@@ -766,6 +779,20 @@ void StructurizeCFG::handleLoops(bool ExitUseAllowed,
handleLoops(false, LoopEnd);
}
+ // If the start of the loop is the entry block, we can't branch to it so
+ // insert a new dummy entry block.
+ Function *LoopFunc = LoopStart->getParent();
+ if (LoopStart == &LoopFunc->getEntryBlock()) {
+ LoopStart->setName("entry.orig");
+
+ BasicBlock *NewEntry =
+ BasicBlock::Create(LoopStart->getContext(),
+ "entry",
+ LoopFunc,
+ LoopStart);
+ BranchInst::Create(LoopStart, NewEntry);
+ }
+
// Create an extra loop end node
LoopEnd = needPrefix(false);
BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index e17a416..12de9ee 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -248,7 +248,6 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
// If the edge isn't critical, then BB has a single successor or Succ has a
// single pred. Split the block.
- BasicBlock::iterator SplitPoint;
if (BasicBlock *SP = Succ->getSinglePredecessor()) {
// If the successor only has a single pred, split the top of the successor
// block.
@@ -401,8 +400,12 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
// If all incoming values for the new PHI would be the same, just don't
// make a new PHI. Instead, just remove the incoming values from the old
// PHI.
- for (unsigned i = 0, e = Preds.size(); i != e; ++i)
- PN->removeIncomingValue(Preds[i], false);
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // Explicitly check the BB index here to handle duplicates in Preds.
+ int Idx = PN->getBasicBlockIndex(Preds[i]);
+ if (Idx >= 0)
+ PN->removeIncomingValue(Idx, false);
+ }
} else {
// If the values coming into the block are not the same, we need a PHI.
// Create the new PHI node, insert it into NewBB at the end of the block
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 8f3ff96..0e7f7f7 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
@@ -45,7 +44,6 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
AU.addPreserved<LoopInfo>();
- AU.addPreserved<ProfileInfo>();
// No loop canonicalization guarantees are broken by this pass.
AU.addPreservedID(LoopSimplifyID);
@@ -213,10 +211,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
- ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
// If we have nothing to update, just return.
- if (DT == 0 && LI == 0 && PI == 0)
+ if (DT == 0 && LI == 0)
return NewBB;
// Now update analysis information. Since the only predecessor of NewBB is
@@ -369,9 +366,5 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
}
- // Update ProfileInfo if it is around.
- if (PI)
- PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
-
return NewBB;
}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 3648fd6..5afd6b8 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMTransformUtils
CmpInstAnalysis.cpp
CodeExtractor.cpp
DemoteRegToStack.cpp
+ GlobalStatus.cpp
InlineFunction.cpp
InstructionNamer.cpp
IntegerDivision.cpp
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 82013f9..6f00864 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -665,8 +665,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
TheSwitch->setCondition(call);
TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
// Remove redundant case
- SwitchInst::CaseIt ToBeRemoved(TheSwitch, NumExitBlocks-1);
- TheSwitch->removeCase(ToBeRemoved);
+ TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
break;
}
}
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 9cbe15d..1da226b 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -266,8 +266,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
BasicBlock *CB;
BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
bool Iteration = true;
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ IRBuilder<>::InsertPointGuard Guard(Builder);
Value *PC = PBI->getCondition();
do {
@@ -298,7 +297,6 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
new UnreachableInst(CB->getContext(), CB);
} while (Iteration);
- Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
return true;
}
@@ -372,7 +370,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
/// Check whether \param BB is the merge block of a if-region. If yes, check
/// whether there exists an adjacent if-region upstream, the two if-regions
-/// contain identical instuctions and can be legally merged. \returns true if
+/// contain identical instructions and can be legally merged. \returns true if
/// the two if-regions are merged.
///
/// From:
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
new file mode 100644
index 0000000..5f0a563
--- /dev/null
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -0,0 +1,183 @@
+//===-- GlobalStatus.cpp - Compute status info for globals -----------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+
+using namespace llvm;
+
+/// Return the stronger of the two ordering. If the two orderings are acquire
+/// and release, then return AcquireRelease.
+///
+static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
+ if (X == Acquire && Y == Release)
+ return AcquireRelease;
+ if (Y == Acquire && X == Release)
+ return AcquireRelease;
+ return (AtomicOrdering)std::max(X, Y);
+}
+
+/// It is safe to destroy a constant iff it is only used by constants itself.
+/// Note that constants cannot be cyclic, so this test is pretty easy to
+/// implement recursively.
+///
+bool llvm::isSafeToDestroyConstant(const Constant *C) {
+ if (isa<GlobalValue>(C))
+ return false;
+
+ for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+ ++UI)
+ if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+ if (!isSafeToDestroyConstant(CU))
+ return false;
+ } else
+ return false;
+ return true;
+}
+
+static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
+ SmallPtrSet<const PHINode *, 16> &PhiUsers) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const User *U = *UI;
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ GS.HasNonInstructionUser = true;
+
+ // If the result of the constantexpr isn't pointer type, then we won't
+ // know to expect it in various places. Just reject early.
+ if (!isa<PointerType>(CE->getType()))
+ return true;
+
+ if (analyzeGlobalAux(CE, GS, PhiUsers))
+ return true;
+ } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ if (!GS.HasMultipleAccessingFunctions) {
+ const Function *F = I->getParent()->getParent();
+ if (GS.AccessingFunction == 0)
+ GS.AccessingFunction = F;
+ else if (GS.AccessingFunction != F)
+ GS.HasMultipleAccessingFunctions = true;
+ }
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ GS.IsLoaded = true;
+ // Don't hack on volatile loads.
+ if (LI->isVolatile())
+ return true;
+ GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering());
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Don't allow a store OF the address, only stores TO the address.
+ if (SI->getOperand(0) == V)
+ return true;
+
+ // Don't hack on volatile stores.
+ if (SI->isVolatile())
+ return true;
+
+ GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());
+
+ // If this is a direct store to the global (i.e., the global is a scalar
+ // value, not an aggregate), keep more specific information about
+ // stores.
+ if (GS.StoredType != GlobalStatus::Stored) {
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(SI->getOperand(1))) {
+ Value *StoredVal = SI->getOperand(0);
+
+ if (Constant *C = dyn_cast<Constant>(StoredVal)) {
+ if (C->isThreadDependent()) {
+ // The stored value changes between threads; don't track it.
+ return true;
+ }
+ }
+
+ if (StoredVal == GV->getInitializer()) {
+ if (GS.StoredType < GlobalStatus::InitializerStored)
+ GS.StoredType = GlobalStatus::InitializerStored;
+ } else if (isa<LoadInst>(StoredVal) &&
+ cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+ if (GS.StoredType < GlobalStatus::InitializerStored)
+ GS.StoredType = GlobalStatus::InitializerStored;
+ } else if (GS.StoredType < GlobalStatus::StoredOnce) {
+ GS.StoredType = GlobalStatus::StoredOnce;
+ GS.StoredOnceValue = StoredVal;
+ } else if (GS.StoredType == GlobalStatus::StoredOnce &&
+ GS.StoredOnceValue == StoredVal) {
+ // noop.
+ } else {
+ GS.StoredType = GlobalStatus::Stored;
+ }
+ } else {
+ GS.StoredType = GlobalStatus::Stored;
+ }
+ }
+ } else if (isa<BitCastInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<GetElementPtrInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<SelectInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+ // PHI nodes we can check just like select or GEP instructions, but we
+ // have to be careful about infinite recursion.
+ if (PhiUsers.insert(PN)) // Not already visited.
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<CmpInst>(I)) {
+ GS.IsCompared = true;
+ } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+ if (MTI->isVolatile())
+ return true;
+ if (MTI->getArgOperand(0) == V)
+ GS.StoredType = GlobalStatus::Stored;
+ if (MTI->getArgOperand(1) == V)
+ GS.IsLoaded = true;
+ } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
+ assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
+ if (MSI->isVolatile())
+ return true;
+ GS.StoredType = GlobalStatus::Stored;
+ } else if (ImmutableCallSite C = I) {
+ if (!C.isCallee(UI))
+ return true;
+ GS.IsLoaded = true;
+ } else {
+ return true; // Any other non-load instruction might take address!
+ }
+ } else if (const Constant *C = dyn_cast<Constant>(U)) {
+ GS.HasNonInstructionUser = true;
+ // We might have a dead and dangling constant hanging off of here.
+ if (!isSafeToDestroyConstant(C))
+ return true;
+ } else {
+ GS.HasNonInstructionUser = true;
+ // Otherwise must be some other user.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
+ SmallPtrSet<const PHINode *, 16> PhiUsers;
+ return analyzeGlobalAux(V, GS, PhiUsers);
+}
+
+GlobalStatus::GlobalStatus()
+ : IsCompared(false), IsLoaded(false), StoredType(NotStored),
+ StoredOnceValue(0), AccessingFunction(0),
+ HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
+ Ordering(NotAtomic) {}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index dabb67b..d021bce 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -193,7 +193,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
CallInst *CI = dyn_cast<CallInst>(I);
// If this call cannot unwind, don't convert it to an invoke.
- if (!CI || CI->doesNotThrow())
+ // Inline asm calls cannot throw.
+ if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
continue;
// Convert this function call into an invoke instruction. First, split the
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 2d1b166..f15e8d5 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -55,7 +55,6 @@ namespace {
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
- std::vector<BasicBlock*> LoopBlocks;
PredIteratorCache PredCache;
Loop *L;
@@ -82,11 +81,6 @@ namespace {
// Check the special guarantees that LCSSA makes.
assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
}
-
- /// inLoop - returns true if the given block is within the current loop
- bool inLoop(BasicBlock *B) const {
- return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
- }
};
}
@@ -129,11 +123,6 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
if (ExitBlocks.empty())
return false;
- // Speed up queries by creating a sorted vector of blocks.
- LoopBlocks.clear();
- LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
- array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
-
// Look at all the instructions in the loop, checking to see if they have uses
// outside the loop. If so, rewrite those uses.
bool MadeChange = false;
@@ -198,7 +187,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
if (PHINode *PN = dyn_cast<PHINode>(U))
UserBB = PN->getIncomingBlock(UI);
- if (InstBB != UserBB && !inLoop(UserBB))
+ if (InstBB != UserBB && !L->contains(UserBB))
UsesToRewrite.push_back(&UI.getUse());
}
@@ -244,7 +233,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
// If the exit block has a predecessor not within the loop, arrange for
// the incoming value use corresponding to that predecessor to be
// rewritten in terms of a different LCSSA PHI.
- if (!inLoop(*PI))
+ if (!L->contains(*PI))
UsesToRewrite.push_back(
&PN->getOperandUse(
PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 08e1808..2768041 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -16,10 +16,10 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/DIBuilder.h"
#include "llvm/DebugInfo.h"
@@ -43,6 +43,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
+
//===----------------------------------------------------------------------===//
// Local constant propagation.
//
@@ -193,33 +195,28 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Otherwise, we can fold this switch into a conditional branch
// instruction if it has only one non-default destination.
SwitchInst::CaseIt FirstCase = SI->case_begin();
- IntegersSubset& Case = FirstCase.getCaseValueEx();
- if (Case.isSingleNumber()) {
- // FIXME: Currently work with ConstantInt based numbers.
- Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
- Case.getSingleNumber(0).toConstantInt(),
- "cond");
-
- // Insert the new branch.
- BranchInst *NewBr = Builder.CreateCondBr(Cond,
- FirstCase.getCaseSuccessor(),
- SI->getDefaultDest());
- MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
- if (MD && MD->getNumOperands() == 3) {
- ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
- ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
- assert(SICase && SIDef);
- // The TrueWeight should be the weight for the single case of SI.
- NewBr->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BB->getContext()).
- createBranchWeights(SICase->getValue().getZExtValue(),
- SIDef->getValue().getZExtValue()));
- }
+ Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
+ FirstCase.getCaseValue(), "cond");
- // Delete the old switch.
- SI->eraseFromParent();
- return true;
+ // Insert the new branch.
+ BranchInst *NewBr = Builder.CreateCondBr(Cond,
+ FirstCase.getCaseSuccessor(),
+ SI->getDefaultDest());
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ if (MD && MD->getNumOperands() == 3) {
+ ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
+ ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
+ assert(SICase && SIDef);
+ // The TrueWeight should be the weight for the single case of SI.
+ NewBr->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(SICase->getValue().getZExtValue(),
+ SIDef->getValue().getZExtValue()));
}
+
+ // Delete the old switch.
+ SI->eraseFromParent();
+ return true;
}
return false;
}
@@ -415,7 +412,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
Instruction *Inst = BI++;
WeakVH BIHandle(BI);
- if (recursivelySimplifyInstruction(Inst, TD)) {
+ if (recursivelySimplifyInstruction(Inst, TD, TLI)) {
MadeChange = true;
if (BIHandle != BI)
BI = BB->begin();
@@ -515,11 +512,6 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
DT->changeImmediateDominator(DestBB, PredBBIDom);
DT->eraseNode(PredBB);
}
- ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
- if (PI) {
- PI->replaceAllUses(PredBB, DestBB);
- PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
- }
}
// Nuke BB.
PredBB->eraseFromParent();
@@ -533,7 +525,7 @@ static bool CanMergeValues(Value *First, Value *Second) {
}
/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
-/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+/// almost-empty BB ending in an unconditional branch to Succ, into Succ.
///
/// Assumption: Succ is the single successor for BB.
///
@@ -1053,7 +1045,11 @@ bool llvm::LowerDbgDeclare(Function &F) {
for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(),
E = Dbgs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
- if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+ AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
+ // If this is an alloca for a scalar variable, insert a dbg.value
+ // at each load and store to the alloca and erase the dbg.declare.
+ if (AI && !AI->isArrayAllocation()) {
+
// We only remove the dbg.declare intrinsic if all uses are
// converted to dbg.value intrinsics.
bool RemoveDDI = true;
@@ -1121,33 +1117,153 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
return true;
}
-bool llvm::removeUnreachableBlocks(Function &F) {
- SmallPtrSet<BasicBlock*, 16> Reachable;
+/// changeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+ BasicBlock *BB = I->getParent();
+ // Loop over all of the successors, removing BB's entry from any PHI
+ // nodes.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ (*SI)->removePredecessor(BB);
+
+ // Insert a call to llvm.trap right before this. This turns the undefined
+ // behavior into a hard fail instead of falling through into random code.
+ if (UseLLVMTrap) {
+ Function *TrapFn =
+ Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+ CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+ CallTrap->setDebugLoc(I->getDebugLoc());
+ }
+ new UnreachableInst(I->getContext(), I);
+
+ // All instructions after this are dead.
+ BasicBlock::iterator BBI = I, BBE = BB->end();
+ while (BBI != BBE) {
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BB->getInstList().erase(BBI++);
+ }
+}
+
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Follow the call by a branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Update PHI nodes in the unwind destination
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+}
+
+static bool markAliveBlocks(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 128> &Reachable) {
+
SmallVector<BasicBlock*, 128> Worklist;
- Worklist.push_back(&F.getEntryBlock());
- Reachable.insert(&F.getEntryBlock());
+ Worklist.push_back(BB);
+ Reachable.insert(BB);
+ bool Changed = false;
do {
- BasicBlock *BB = Worklist.pop_back_val();
+ BB = Worklist.pop_back_val();
+
+ // Do a quick scan of the basic block, turning any obviously unreachable
+ // instructions into LLVM unreachable insts. The instruction combining pass
+ // canonicalizes unreachable insts into stores to null or undef.
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+ if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+ if (CI->doesNotReturn()) {
+ // If we found a call to a no-return function, insert an unreachable
+ // instruction after it. Make sure there isn't *already* one there
+ // though.
+ ++BBI;
+ if (!isa<UnreachableInst>(BBI)) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ changeToUnreachable(BBI, false);
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // Store to undef and store to null are undefined and used to signal that
+ // they should be changed to unreachable by passes that can't modify the
+ // CFG.
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ // Don't touch volatile stores.
+ if (SI->isVolatile()) continue;
+
+ Value *Ptr = SI->getOperand(1);
+
+ if (isa<UndefValue>(Ptr) ||
+ (isa<ConstantPointerNull>(Ptr) &&
+ SI->getPointerAddressSpace() == 0)) {
+ changeToUnreachable(SI, true);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Value *Callee = II->getCalledValue();
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ changeToUnreachable(II, true);
+ Changed = true;
+ } else if (II->doesNotThrow()) {
+ if (II->use_empty() && II->onlyReadsMemory()) {
+ // jump to the normal destination branch.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+ } else
+ changeToCall(II);
+ Changed = true;
+ }
+ }
+
+ Changed |= ConstantFoldTerminator(BB, true);
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
if (Reachable.insert(*SI))
Worklist.push_back(*SI);
} while (!Worklist.empty());
+ return Changed;
+}
+/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle. Return true if a change was made, false
+/// otherwise.
+bool llvm::removeUnreachableBlocks(Function &F) {
+ SmallPtrSet<BasicBlock*, 128> Reachable;
+ bool Changed = markAliveBlocks(F.begin(), Reachable);
+
+ // If there are unreachable blocks in the CFG...
if (Reachable.size() == F.size())
- return false;
+ return Changed;
assert(Reachable.size() < F.size());
- for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ++I) {
- if (Reachable.count(I))
+ NumRemoved += F.size()-Reachable.size();
+
+ // Loop over all of the basic blocks that are not reachable, dropping all of
+ // their internal references...
+ for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Reachable.count(BB))
continue;
- for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
if (Reachable.count(*SI))
- (*SI)->removePredecessor(I);
- I->dropAllReferences();
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
}
- for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+ for (Function::iterator I = ++F.begin(); I != F.end();)
if (!Reachable.count(I))
I = F.getBasicBlockList().erase(I);
else
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index cb581b3..162807d 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -90,7 +90,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
// Move all definitions in the successor to the predecessor...
OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
- std::string OldName = BB->getName();
+ // OldName will be valid until erased.
+ StringRef OldName = BB->getName();
// Erase basic block from the function...
@@ -102,12 +103,13 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
}
}
LI->removeBlock(BB);
- BB->eraseFromParent();
// Inherit predecessor's name if it exists...
if (!OldName.empty() && !OnlyPred->hasName())
OnlyPred->setName(OldName);
+ BB->eraseFromParent();
+
return OnlyPred;
}
@@ -239,8 +241,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
DEBUG(dbgs() << "!\n");
}
- std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
-
bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index 4aee8ff..e017f50 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -29,7 +29,7 @@
using namespace llvm;
-STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled");
+STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled");
static cl::opt<uint32_t>
LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index f66b54d..9799a30 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -346,7 +346,6 @@ splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
// Scan all of the uses and see if the live range is live across an unwind
// edge. If we find a use live across an invoke edge, create an alloca
// and spill the value.
- std::set<InvokeInst*> InvokesWithStoreInserted;
// Find all of the blocks that this value is live in.
std::set<BasicBlock*> LiveBBs;
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 955b853..2d2a8a5 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -66,6 +66,18 @@ namespace {
BasicBlock* OrigBlock, BasicBlock* Default);
unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
};
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator () (const LowerSwitch::CaseRange& C1,
+ const LowerSwitch::CaseRange& C2) {
+
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
}
char LowerSwitch::ID = 0;
@@ -147,7 +159,7 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
Function::iterator FI = OrigBlock;
F->getBasicBlockList().insert(++FI, NewNode);
- ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_ULT,
+ ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
Val, Pivot.Low, "Pivot");
NewNode->getInstList().push_back(Comp);
BranchInst::Create(LBranch, RBranch, Comp, NewNode);
@@ -222,34 +234,40 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
// Clusterify - Transform simple list of Cases into list of CaseRange's
unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
-
- IntegersSubsetToBB TheClusterifier;
+ unsigned numCmps = 0;
// Start with "simple" cases
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
- BasicBlock *SuccBB = i.getCaseSuccessor();
- IntegersSubset CaseRanges = i.getCaseValueEx();
- TheClusterifier.add(CaseRanges, SuccBB);
- }
-
- TheClusterifier.optimize();
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+ Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(),
+ i.getCaseSuccessor()));
- size_t numCmps = 0;
- for (IntegersSubsetToBB::RangeIterator i = TheClusterifier.begin(),
- e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
- IntegersSubsetToBB::Cluster &C = *i;
-
- // FIXME: Currently work with ConstantInt based numbers.
- // Changing it to APInt based is a pretty heavy for this commit.
- Cases.push_back(CaseRange(C.first.getLow().toConstantInt(),
- C.first.getHigh().toConstantInt(), C.second));
- if (C.first.isSingleNumber())
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size()>=2)
+ for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+ int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+ int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+ BasicBlock* nextBB = J->BB;
+ BasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
// A range counts double, since it requires two compares.
++numCmps;
}
- return numCmps;
+ return numCmps;
}
// processSwitchInst - Replace the specified switch instruction with a sequence
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index ebd7db6..61b3965 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -16,7 +16,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -28,7 +27,6 @@ STATISTIC(NumPromoted, "Number of alloca's promoted");
namespace {
struct PromotePass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
-
PromotePass() : FunctionPass(ID) {
initializePromotePassPass(*PassRegistry::getPassRegistry());
}
@@ -64,7 +62,6 @@ bool PromotePass::runOnFunction(Function &F) {
bool Changed = false;
DominatorTree &DT = getAnalysis<DominatorTree>();
- const DataLayout *DL = getAnalysisIfAvailable<DataLayout>();
while (1) {
Allocas.clear();
@@ -73,12 +70,12 @@ bool PromotePass::runOnFunction(Function &F) {
// the entry node
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
- if (isAllocaPromotable(AI, DL))
+ if (isAllocaPromotable(AI))
Allocas.push_back(AI);
if (Allocas.empty()) break;
- PromoteMemToReg(Allocas, DT, DL);
+ PromoteMemToReg(Allocas, DT);
NumPromoted += Allocas.size();
Changed = true;
}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 6910180..8f6eee3 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -30,7 +30,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -46,7 +45,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/InstVisitor.h"
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -58,16 +56,56 @@ STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
-namespace {
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+ // FIXME: If the memory unit is of pointer or integer type, we can permit
+ // assignments to subsections of the memory unit.
+
+ // Only allow direct and non-volatile loads and stores...
+ for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) { // Loop over all of the uses of the alloca
+ const User *U = *UI;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Note that atomic loads can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
+ if (LI->isVolatile())
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI)
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ // Note that atomic stores can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
+ if (SI->isVolatile())
+ return false;
+ } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
+ } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+ return false;
+ if (!onlyUsedByLifetimeMarkers(BCI))
+ return false;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+ return false;
+ if (!GEPI->hasAllZeroIndices())
+ return false;
+ if (!onlyUsedByLifetimeMarkers(GEPI))
+ return false;
+ } else {
+ return false;
+ }
+ }
-struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
- const DataLayout *DL;
+ return true;
+}
+
+namespace {
+struct AllocaInfo {
SmallVector<BasicBlock *, 32> DefiningBlocks;
SmallVector<BasicBlock *, 32> UsingBlocks;
- SmallVector<Instruction *, 8> DeadInsts;
- Type *AllocaTy;
StoreInst *OnlyStore;
BasicBlock *OnlyBlock;
bool OnlyUsedInOneBlock;
@@ -75,13 +113,9 @@ struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
Value *AllocaPointerVal;
DbgDeclareInst *DbgDeclare;
- AllocaInfo(const DataLayout *DL) : DL(DL) {}
-
void clear() {
DefiningBlocks.clear();
UsingBlocks.clear();
- DeadInsts.clear();
- AllocaTy = 0;
OnlyStore = 0;
OnlyBlock = 0;
OnlyUsedInOneBlock = true;
@@ -91,116 +125,39 @@ struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
/// Scan the uses of the specified alloca, filling in the AllocaInfo used
/// by the rest of the pass to reason about the uses of this alloca.
- bool analyzeAlloca(AllocaInst &AI) {
+ void AnalyzeAlloca(AllocaInst *AI) {
clear();
- AllocaTy = AI.getAllocatedType();
- enqueueUsers(AI);
-
- // Walk queued up uses in the worklist to handle nested uses.
- while (!UseWorklist.empty()) {
- U = UseWorklist.pop_back_val();
- Instruction &I = *cast<Instruction>(U->getUser());
- if (!visit(I))
- return false; // Propagate failure to promote up.
+ // As we scan the uses of the alloca instruction, keep track of stores,
+ // and decide whether all of the loads and stores to the alloca are within
+ // the same basic block.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Remember the basic blocks which define new values for the alloca
+ DefiningBlocks.push_back(SI->getParent());
+ AllocaPointerVal = SI->getOperand(0);
+ OnlyStore = SI;
+ } else {
+ LoadInst *LI = cast<LoadInst>(User);
+ // Otherwise it must be a load instruction, keep track of variable
+ // reads.
+ UsingBlocks.push_back(LI->getParent());
+ AllocaPointerVal = LI;
+ }
if (OnlyUsedInOneBlock) {
if (OnlyBlock == 0)
- OnlyBlock = I.getParent();
- else if (OnlyBlock != I.getParent())
+ OnlyBlock = User->getParent();
+ else if (OnlyBlock != User->getParent())
OnlyUsedInOneBlock = false;
}
}
- DbgDeclare = FindAllocaDbgDeclare(&AI);
- return true;
- }
-
-private:
- // Befriend the base class so it can call through private visitor methods.
- friend class InstVisitor<AllocaInfo, bool>;
-
- /// \brief A use pointer that is non-null when visiting uses.
- Use *U;
-
- /// \brief A worklist for recursively visiting all uses of an alloca.
- SmallVector<Use *, 8> UseWorklist;
-
- /// \brief A set for preventing cyclic visitation.
- SmallPtrSet<Use *, 8> VisitedUses;
-
- void enqueueUsers(Instruction &I) {
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
- ++UI)
- if (VisitedUses.insert(&UI.getUse()))
- UseWorklist.push_back(&UI.getUse());
+ DbgDeclare = FindAllocaDbgDeclare(AI);
}
-
- bool visitLoadInst(LoadInst &LI) {
- if (LI.isVolatile() || LI.getType() != AllocaTy)
- return false;
-
- // Keep track of variable reads.
- UsingBlocks.push_back(LI.getParent());
- AllocaPointerVal = &LI;
- return true;
- }
-
- bool visitStoreInst(StoreInst &SI) {
- if (SI.isVolatile() || SI.getValueOperand() == U->get() ||
- SI.getValueOperand()->getType() != AllocaTy)
- return false;
-
- // Remember the basic blocks which define new values for the alloca
- DefiningBlocks.push_back(SI.getParent());
- AllocaPointerVal = SI.getOperand(0);
- OnlyStore = &SI;
- return true;
- }
-
- bool visitBitCastInst(BitCastInst &BC) {
- if (BC.use_empty())
- DeadInsts.push_back(&BC);
- else
- enqueueUsers(BC);
- return true;
- }
-
- bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
- if (GEPI.use_empty()) {
- DeadInsts.push_back(&GEPI);
- return true;
- }
-
- enqueueUsers(GEPI);
-
- return GEPI.hasAllZeroIndices();
- }
-
- // We can promote through debug info intrinsics as they don't alter the
- // value stored in memory.
- bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) {
- DeadInsts.push_back(&I);
- return true;
- }
-
- bool visitIntrinsicInst(IntrinsicInst &II) {
- switch (II.getIntrinsicID()) {
- default:
- return false;
-
- // Lifetime intrinsics don't preclude promoting the memory to a register.
- // FIXME: We should use these to promote to undef when outside of a valid
- // lifetime.
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- DeadInsts.push_back(&II);
- return true;
- }
- }
-
- // The fallback is that the alloca cannot be promoted.
- bool visitInstruction(Instruction &I) { return false; }
};
// Data package used by RenamePass()
@@ -278,7 +235,6 @@ struct PromoteMem2Reg {
std::vector<AllocaInst *> Allocas;
DominatorTree &DT;
DIBuilder DIB;
- const DataLayout *DL;
/// An AliasSetTracker object to update. If null, don't update it.
AliasSetTracker *AST;
@@ -324,9 +280,9 @@ struct PromoteMem2Reg {
public:
PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- const DataLayout *DL, AliasSetTracker *AST)
+ AliasSetTracker *AST)
: Allocas(Allocas.begin(), Allocas.end()), DT(DT),
- DIB(*DT.getRoot()->getParent()->getParent()), DL(DL), AST(AST) {}
+ DIB(*DT.getRoot()->getParent()->getParent()), AST(AST) {}
void run();
@@ -357,39 +313,27 @@ private:
} // end of anonymous namespace
-/// \brief Walk a small vector of dead instructions and recursively remove them
-/// and subsequently dead instructions.
-///
-/// This is only valid to call on dead instructions using an alloca which is
-/// promotable, as we leverage that assumption to delete them faster.
-static void removeDeadInstructions(AllocaInst *AI,
- SmallVectorImpl<Instruction *> &DeadInsts) {
- while (!DeadInsts.empty()) {
- Instruction *I = DeadInsts.pop_back_val();
-
- // Don't delete the alloca itself.
- if (I == AI)
- continue;
+static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+ // Knowing that this alloca is promotable, we know that it's safe to kill all
+ // instructions except for load and store.
- // Note that we open code the deletion algorithm here because we know
- // apriori that all of the instructions using an alloca that reaches here
- // are trivially dead when their use list becomes empty (The only risk are
- // lifetime markers which we specifically want to nuke). By coding it here
- // we can skip the triviality test and be more efficient.
- //
- // Null out all of the instruction's operands to see if any operand becomes
- // dead as we go.
- for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE;
- ++OI) {
- Instruction *Op = dyn_cast<Instruction>(*OI);
- if (!Op)
- continue;
-
- OI->set(0);
- if (!Op->use_empty())
- continue;
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE;) {
+ Instruction *I = cast<Instruction>(*UI);
+ ++UI;
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ continue;
- DeadInsts.push_back(Op);
+ if (!I->getType()->isVoidTy()) {
+ // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+ // Follow the use/def chain to erase them now instead of leaving it for
+ // dead code elimination later.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE;) {
+ Instruction *Inst = cast<Instruction>(*UI);
+ ++UI;
+ Inst->eraseFromParent();
+ }
}
I->eraseFromParent();
}
@@ -474,6 +418,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
DIBuilder DIB(*AI->getParent()->getParent()->getParent());
ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
DDI->eraseFromParent();
+ LBI.deleteValue(DDI);
}
// Remove the (now dead) store and alloca.
Info.OnlyStore->eraseFromParent();
@@ -486,16 +431,6 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
return true;
}
-namespace {
-/// This is a helper predicate used to search by the first element of a pair.
-struct StoreIndexSearchPredicate {
- bool operator()(const std::pair<unsigned, StoreInst *> &LHS,
- const std::pair<unsigned, StoreInst *> &RHS) {
- return LHS.first < RHS.first;
- }
-};
-}
-
/// Many allocas are only used within a single basic block. If this is the
/// case, avoid traversing the CFG and inserting a lot of potentially useless
/// PHI nodes by just performing a single linear pass over the basic block
@@ -528,8 +463,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Sort the stores by their index, making it efficient to do a lookup with a
// binary search.
- std::sort(StoresByIndex.begin(), StoresByIndex.end(),
- StoreIndexSearchPredicate());
+ std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
// Walk all of the loads from this alloca, replacing them with the nearest
// store above them, if any.
@@ -544,7 +478,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
StoresByIndexTy::iterator I =
std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
std::make_pair(LoadIdx, static_cast<StoreInst *>(0)),
- StoreIndexSearchPredicate());
+ less_first());
if (I == StoresByIndex.begin())
// If there is no store before this load, the load takes the undef value.
@@ -577,8 +511,10 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LBI.deleteValue(AI);
// The alloca's debuginfo can be removed as well.
- if (DbgDeclareInst *DDI = Info.DbgDeclare)
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
DDI->eraseFromParent();
+ LBI.deleteValue(DDI);
+ }
++NumLocalPromoted;
}
@@ -590,23 +526,17 @@ void PromoteMem2Reg::run() {
PointerAllocaValues.resize(Allocas.size());
AllocaDbgDeclares.resize(Allocas.size());
- AllocaInfo Info(DL);
+ AllocaInfo Info;
LargeBlockInfo LBI;
for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
AllocaInst *AI = Allocas[AllocaNum];
+ assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
assert(AI->getParent()->getParent() == &F &&
"All allocas should be in the same function, which is same as DF!");
- // Calculate the set of read and write-locations for each alloca. This is
- // analogous to finding the 'uses' and 'definitions' of each variable.
- bool Good = Info.analyzeAlloca(*AI);
- (void)Good;
- assert(Good && "Cannot promote non-promotable alloca!");
-
- // Nuke all of the dead instructions.
- removeDeadInstructions(AI, Info.DeadInsts);
+ removeLifetimeIntrinsicUsers(AI);
if (AI->use_empty()) {
// If there are no uses of the alloca, just delete it now.
@@ -620,6 +550,10 @@ void PromoteMem2Reg::run() {
continue;
}
+ // Calculate the set of read and write-locations for each alloca. This is
+ // analogous to finding the 'uses' and 'definitions' of each variable.
+ Info.AnalyzeAlloca(AI);
+
// If there is only a single store to this value, replace any loads of
// it that are directly dominated by the definition with the value stored.
if (Info.DefiningBlocks.size() == 1) {
@@ -904,16 +838,6 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
}
}
-namespace {
-typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
-
-struct DomTreeNodeCompare {
- bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
- return LHS.second < RHS.second;
- }
-};
-} // end anonymous namespace
-
/// At this point, we're committed to promoting the alloca using IDF's, and the
/// standard SSA construction algorithm. Determine which blocks need phi nodes
/// and see if we can optimize out some work by avoiding insertion of dead phi
@@ -931,9 +855,9 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
// Use a priority queue keyed on dominator tree level so that inserted nodes
// are handled from the bottom of the dominator tree upwards.
- typedef std::priority_queue<DomTreeNodePair,
- SmallVector<DomTreeNodePair, 32>,
- DomTreeNodeCompare> IDFPriorityQueue;
+ typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
+ typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+ less_second> IDFPriorityQueue;
IDFPriorityQueue PQ;
for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(),
@@ -1145,19 +1069,11 @@ NextIteration:
goto NextIteration;
}
-bool llvm::isAllocaPromotable(const AllocaInst *AI, const DataLayout *DL) {
- // We cast away constness because we re-use the non-const analysis that the
- // actual promotion routine uses. While it is non-const, it doesn't actually
- // mutate anything at this phase, and we discard the non-const results that
- // promotion uses to mutate the alloca.
- return AllocaInfo(DL).analyzeAlloca(*const_cast<AllocaInst *>(AI));
-}
-
void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- const DataLayout *DL, AliasSetTracker *AST) {
+ AliasSetTracker *AST) {
// If there is nothing to do, bail out...
if (Allocas.empty())
return;
- PromoteMem2Reg(Allocas, DT, DL, AST).run();
+ PromoteMem2Reg(Allocas, DT, AST).run();
}
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index fc85ef3..30adbfa 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -63,7 +63,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
}
static bool IsEquivalentPHI(PHINode *PHI,
- DenseMap<BasicBlock*, Value*> &ValueMapping) {
+ SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) {
unsigned PHINumValues = PHI->getNumIncomingValues();
if (PHINumValues != ValueMapping.size())
return false;
@@ -136,8 +136,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// Otherwise, we do need a PHI: check to see if we already have one available
// in this block that produces the right value.
if (isa<PHINode>(BB->begin())) {
- DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
- PredValues.end());
+ SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(),
+ PredValues.end());
PHINode *SomePHI;
for (BasicBlock::iterator It = BB->begin();
(SomePHI = dyn_cast<PHINode>(It)); ++It) {
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index c4c1423..ff50b12 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -475,9 +476,13 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
CV = ICI->getOperand(0);
// Unwrap any lossless ptrtoint cast.
- if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
- if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
- CV = PTII->getOperand(0);
+ if (TD && CV) {
+ if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
+ Value *Ptr = PTII->getPointerOperand();
+ if (PTII->getType() == TD->getIntPtrType(Ptr->getType()))
+ CV = Ptr;
+ }
+ }
return CV;
}
@@ -699,9 +704,10 @@ namespace {
};
}
-static int ConstantIntSortPredicate(const void *P1, const void *P2) {
- const ConstantInt *LHS = *(const ConstantInt*const*)P1;
- const ConstantInt *RHS = *(const ConstantInt*const*)P2;
+static int ConstantIntSortPredicate(ConstantInt *const *P1,
+ ConstantInt *const *P2) {
+ const ConstantInt *LHS = *P1;
+ const ConstantInt *RHS = *P2;
if (LHS->getValue().ult(RHS->getValue()))
return 1;
if (LHS->getValue() == RHS->getValue())
@@ -924,7 +930,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// Convert pointer to int before we switch.
if (CV->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without DataLayout");
- CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
+ CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()),
"magicptr");
}
@@ -1556,6 +1562,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
return true;
}
+/// \returns True if this block contains a CallInst with the NoDuplicate
+/// attribute.
+static bool HasNoDuplicateCall(const BasicBlock *BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ const CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI)
+ continue;
+ if (CI->cannotDuplicate())
+ return true;
+ }
+ return false;
+}
+
/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
/// across this block.
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
@@ -1603,6 +1622,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
// Now we know that this block has multiple preds and two succs.
if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+ if (HasNoDuplicateCall(BB)) return false;
+
// Okay, this is a simple enough basic block. See if any phi values are
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -2069,14 +2090,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Ensure that any values used in the bonus instruction are also used
// by the terminator of the predecessor. This means that those values
// must already have been resolved, so we won't be inhibiting the
- // out-of-order core by speculating them earlier.
- if (BonusInst) {
+ // out-of-order core by speculating them earlier. We also allow
+ // instructions that are used by the terminator's condition because it
+ // exposes more merging opportunities.
+ bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() &&
+ *BonusInst->use_begin() == Cond);
+
+ if (BonusInst && !UsedByBranch) {
// Collect the values used by the bonus inst
SmallPtrSet<Value*, 4> UsedValues;
for (Instruction::op_iterator OI = BonusInst->op_begin(),
OE = BonusInst->op_end(); OI != OE; ++OI) {
Value *V = *OI;
- if (!isa<Constant>(V))
+ if (!isa<Constant>(V) && !isa<Argument>(V))
UsedValues.insert(V);
}
@@ -2787,7 +2813,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
if (CompVal->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without DataLayout");
CompVal = Builder.CreatePtrToInt(CompVal,
- TD->getIntPtrType(CompVal->getContext()),
+ TD->getIntPtrType(CompVal->getType()),
"magicptr");
}
@@ -3160,7 +3186,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
/// and use it to remove dead cases.
static bool EliminateDeadSwitchCases(SwitchInst *SI) {
Value *Cond = SI->getCondition();
- unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth();
+ unsigned Bits = Cond->getType()->getIntegerBitWidth();
APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
ComputeMaskedBits(Cond, KnownZero, KnownOne);
@@ -3303,28 +3329,10 @@ static Constant *LookupConstant(Value *V,
/// simple instructions such as binary operations where both operands are
/// constant or can be replaced by constants from the ConstantPool. Returns the
/// resulting constant on success, 0 otherwise.
-static Constant *ConstantFold(Instruction *I,
- const SmallDenseMap<Value*, Constant*>& ConstantPool) {
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- Constant *A = LookupConstant(BO->getOperand(0), ConstantPool);
- if (!A)
- return 0;
- Constant *B = LookupConstant(BO->getOperand(1), ConstantPool);
- if (!B)
- return 0;
- return ConstantExpr::get(BO->getOpcode(), A, B);
- }
-
- if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
- Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
- if (!A)
- return 0;
- Constant *B = LookupConstant(I->getOperand(1), ConstantPool);
- if (!B)
- return 0;
- return ConstantExpr::getCompare(Cmp->getPredicate(), A, B);
- }
-
+static Constant *
+ConstantFold(Instruction *I,
+ const SmallDenseMap<Value *, Constant *> &ConstantPool,
+ const DataLayout *DL) {
if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
if (!A)
@@ -3336,14 +3344,19 @@ static Constant *ConstantFold(Instruction *I,
return 0;
}
- if (CastInst *Cast = dyn_cast<CastInst>(I)) {
- Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
- if (!A)
+ SmallVector<Constant *, 4> COps;
+ for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
+ if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
+ COps.push_back(A);
+ else
return 0;
- return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy());
}
- return 0;
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
+ COps[1], DL);
+
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL);
}
/// GetCaseResults - Try to determine the resulting constant values in phi nodes
@@ -3355,7 +3368,8 @@ GetCaseResults(SwitchInst *SI,
ConstantInt *CaseVal,
BasicBlock *CaseDest,
BasicBlock **CommonDest,
- SmallVectorImpl<std::pair<PHINode*,Constant*> > &Res) {
+ SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res,
+ const DataLayout *DL) {
// The block from which we enter the common destination.
BasicBlock *Pred = SI->getParent();
@@ -3374,7 +3388,7 @@ GetCaseResults(SwitchInst *SI,
} else if (isa<DbgInfoIntrinsic>(I)) {
// Skip debug intrinsic.
continue;
- } else if (Constant *C = ConstantFold(I, ConstantPool)) {
+ } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) {
// Instruction is side-effect free and constant.
ConstantPool.insert(std::make_pair(I, C));
} else {
@@ -3698,7 +3712,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
ResultsTy Results;
if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
- Results))
+ Results, TD))
return false;
// Append the result from this case to the list for each phi.
@@ -3712,7 +3726,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// Get the resulting values for the default case.
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
- DefaultResultsList))
+ DefaultResultsList, TD))
return false;
for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
PHINode *PHI = DefaultResultsList[I].first;
@@ -3733,14 +3747,32 @@ static bool SwitchToLookupTable(SwitchInst *SI,
CommonDest->getParent(),
CommonDest);
- // Check whether the condition value is within the case range, and branch to
- // the new BB.
+ // Compute the table index value.
Builder.SetInsertPoint(SI);
Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
"switch.tableidx");
- Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
- MinCaseVal->getType(), TableSize));
- Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+
+ // Compute the maximum table size representable by the integer type we are
+ // switching upon.
+ unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
+ uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize;
+ assert(MaxTableSize >= TableSize &&
+ "It is impossible for a switch to have more entries than the max "
+ "representable value of its input integer type's size.");
+
+ // If we have a fully covered lookup table, unconditionally branch to the
+ // lookup table BB. Otherwise, check if the condition value is within the case
+ // range. If it is so, branch to the new BB. Otherwise branch to SI's default
+ // destination.
+ const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize;
+ if (GeneratingCoveredLookupTable) {
+ Builder.CreateBr(LookupBB);
+ SI->getDefaultDest()->removePredecessor(SI->getParent());
+ } else {
+ Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
+ MinCaseVal->getType(), TableSize));
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+ }
// Populate the BB that does the lookups.
Builder.SetInsertPoint(LookupBB);
@@ -3769,9 +3801,11 @@ static bool SwitchToLookupTable(SwitchInst *SI,
Builder.CreateBr(CommonDest);
// Remove the switch.
- for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
BasicBlock *Succ = SI->getSuccessor(i);
- if (Succ == SI->getDefaultDest()) continue;
+
+ if (Succ == SI->getDefaultDest())
+ continue;
Succ->removePredecessor(SI->getParent());
}
SI->eraseFromParent();
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 094c201..15b3e66 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -26,11 +27,16 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
using namespace llvm;
+static cl::opt<bool>
+ColdErrorCalls("error-reporting-is-cold", cl::init(true),
+ cl::Hidden, cl::desc("Treat error-reporting calls as cold"));
+
/// This class is the abstract base class for the set of optimizations that
/// corresponds to one library call.
namespace {
@@ -118,6 +124,21 @@ static bool callHasFloatingPointArgument(const CallInst *CI) {
return false;
}
+/// \brief Check whether the overloaded unary floating point function
+/// corresponing to \a Ty is available.
+static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc::Func DoubleFn, LibFunc::Func FloatFn,
+ LibFunc::Func LongDoubleFn) {
+ switch (Ty->getTypeID()) {
+ case Type::FloatTyID:
+ return TLI->has(FloatFn);
+ case Type::DoubleTyID:
+ return TLI->has(DoubleFn);
+ default:
+ return TLI->has(LongDoubleFn);
+ }
+}
+
//===----------------------------------------------------------------------===//
// Fortified Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -477,7 +498,7 @@ struct StrChrOpt : public LibCallOptimization {
// Compute the offset, make sure to handle the case when we're searching for
// zero (a weird way to spell strlen).
- size_t I = CharC->getSExtValue() == 0 ?
+ size_t I = (0xFF & CharC->getSExtValue()) == 0 ?
Str.size() : Str.find(CharC->getSExtValue());
if (I == StringRef::npos) // Didn't find the char. strchr returns null.
return Constant::getNullValue(CI->getType());
@@ -513,7 +534,7 @@ struct StrRChrOpt : public LibCallOptimization {
}
// Compute the offset.
- size_t I = CharC->getSExtValue() == 0 ?
+ size_t I = (0xFF & CharC->getSExtValue()) == 0 ?
Str.size() : Str.rfind(CharC->getSExtValue());
if (I == StringRef::npos) // Didn't find the char. Return null.
return Constant::getNullValue(CI->getType());
@@ -774,7 +795,7 @@ struct StrPBrkOpt : public LibCallOptimization {
// Constant folding.
if (HasS1 && HasS2) {
size_t I = S1.find_first_of(S2);
- if (I == std::string::npos) // No match.
+ if (I == StringRef::npos) // No match.
return Constant::getNullValue(CI->getType());
return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
@@ -912,7 +933,7 @@ struct StrStrOpt : public LibCallOptimization {
// If both strings are known, constant fold it.
if (HasStr1 && HasStr2) {
- std::string::size_type Offset = SearchStr.find(ToFindStr);
+ size_t Offset = SearchStr.find(ToFindStr);
if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
return Constant::getNullValue(CI->getType());
@@ -1031,7 +1052,7 @@ struct MemSetOpt : public LibCallOptimization {
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
@@ -1133,9 +1154,13 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
- if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
+ // pow(1.0, x) -> 1.0
+ if (Op1C->isExactlyValue(1.0))
return Op1C;
- if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x)
+ // pow(2.0, x) -> exp2(x)
+ if (Op1C->isExactlyValue(2.0) &&
+ hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
+ LibFunc::exp2l))
return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
}
@@ -1145,7 +1170,11 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
return ConstantFP::get(CI->getType(), 1.0);
- if (Op2C->isExactlyValue(0.5)) {
+ if (Op2C->isExactlyValue(0.5) &&
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf,
+ LibFunc::sqrtl) &&
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf,
+ LibFunc::fabsl)) {
// Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
// This is faster than calling pow, and still handles negative zero
// and negative infinity correctly.
@@ -1178,7 +1207,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
Value *Ret = NULL;
if (UnsafeFPShrink && Callee->getName() == "exp2" &&
- TLI->has(LibFunc::exp2)) {
+ TLI->has(LibFunc::exp2f)) {
UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
}
@@ -1229,6 +1258,155 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
}
};
+struct SinCosPiOpt : public LibCallOptimization {
+ SinCosPiOpt() {}
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Make sure the prototype is as expected, otherwise the rest of the
+ // function is probably invalid and likely to abort.
+ if (!isTrigLibCall(CI))
+ return 0;
+
+ Value *Arg = CI->getArgOperand(0);
+ SmallVector<CallInst *, 1> SinCalls;
+ SmallVector<CallInst *, 1> CosCalls;
+ SmallVector<CallInst *, 1> SinCosCalls;
+
+ bool IsFloat = Arg->getType()->isFloatTy();
+
+ // Look for all compatible sinpi, cospi and sincospi calls with the same
+ // argument. If there are enough (in some sense) we can make the
+ // substitution.
+ for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ UI != UE; ++UI)
+ classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls,
+ SinCosCalls);
+
+ // It's only worthwhile if both sinpi and cospi are actually used.
+ if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
+ return 0;
+
+ Value *Sin, *Cos, *SinCos;
+ insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
+ SinCos);
+
+ replaceTrigInsts(SinCalls, Sin);
+ replaceTrigInsts(CosCalls, Cos);
+ replaceTrigInsts(SinCosCalls, SinCos);
+
+ return 0;
+ }
+
+ bool isTrigLibCall(CallInst *CI) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+
+ // We can only hope to do anything useful if we can ignore things like errno
+ // and floating-point exceptions.
+ bool AttributesSafe = CI->hasFnAttr(Attribute::NoUnwind) &&
+ CI->hasFnAttr(Attribute::ReadNone);
+
+ // Other than that we need float(float) or double(double)
+ return AttributesSafe && FT->getNumParams() == 1 &&
+ FT->getReturnType() == FT->getParamType(0) &&
+ (FT->getParamType(0)->isFloatTy() ||
+ FT->getParamType(0)->isDoubleTy());
+ }
+
+ void classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
+ SmallVectorImpl<CallInst *> &SinCalls,
+ SmallVectorImpl<CallInst *> &CosCalls,
+ SmallVectorImpl<CallInst *> &SinCosCalls) {
+ CallInst *CI = dyn_cast<CallInst>(Val);
+
+ if (!CI)
+ return;
+
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+ LibFunc::Func Func;
+ if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) ||
+ !isTrigLibCall(CI))
+ return;
+
+ if (IsFloat) {
+ if (Func == LibFunc::sinpif)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc::cospif)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc::sincospi_stretf)
+ SinCosCalls.push_back(CI);
+ } else {
+ if (Func == LibFunc::sinpi)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc::cospi)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc::sincospi_stret)
+ SinCosCalls.push_back(CI);
+ }
+ }
+
+ void replaceTrigInsts(SmallVectorImpl<CallInst*> &Calls, Value *Res) {
+ for (SmallVectorImpl<CallInst*>::iterator I = Calls.begin(),
+ E = Calls.end();
+ I != E; ++I) {
+ LCS->replaceAllUsesWith(*I, Res);
+ }
+ }
+
+ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
+ bool UseFloat, Value *&Sin, Value *&Cos,
+ Value *&SinCos) {
+ Type *ArgTy = Arg->getType();
+ Type *ResTy;
+ StringRef Name;
+
+ Triple T(OrigCallee->getParent()->getTargetTriple());
+ if (UseFloat) {
+ Name = "__sincospi_stretf";
+
+ assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
+ // x86_64 can't use {float, float} since that would be returned in both
+ // xmm0 and xmm1, which isn't what a real struct would do.
+ ResTy = T.getArch() == Triple::x86_64
+ ? static_cast<Type *>(VectorType::get(ArgTy, 2))
+ : static_cast<Type *>(StructType::get(ArgTy, ArgTy, NULL));
+ } else {
+ Name = "__sincospi_stret";
+ ResTy = StructType::get(ArgTy, ArgTy, NULL);
+ }
+
+ Module *M = OrigCallee->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
+ ResTy, ArgTy, NULL);
+
+ if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
+ // If the argument is an instruction, it must dominate all uses so put our
+ // sincos call there.
+ BasicBlock::iterator Loc = ArgInst;
+ B.SetInsertPoint(ArgInst->getParent(), ++Loc);
+ } else {
+ // Otherwise (e.g. for a constant) the beginning of the function is as
+ // good a place as any.
+ BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
+ B.SetInsertPoint(&EntryBB, EntryBB.begin());
+ }
+
+ SinCos = B.CreateCall(Callee, Arg, "sincospi");
+
+ if (SinCos->getType()->isStructTy()) {
+ Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
+ Cos = B.CreateExtractValue(SinCos, 1, "cospi");
+ } else {
+ Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
+ "sinpi");
+ Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
+ "cospi");
+ }
+ }
+
+};
+
//===----------------------------------------------------------------------===//
// Integer Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -1333,6 +1511,54 @@ struct ToAsciiOpt : public LibCallOptimization {
// Formatting and IO Library Call Optimizations
//===----------------------------------------------------------------------===//
+struct ErrorReportingOpt : public LibCallOptimization {
+ ErrorReportingOpt(int S = -1) : StreamArg(S) {}
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &) {
+ // Error reporting calls should be cold, mark them as such.
+ // This applies even to non-builtin calls: it is only a hint and applies to
+ // functions that the frontend might not understand as builtins.
+
+ // This heuristic was suggested in:
+ // Improving Static Branch Prediction in a Compiler
+ // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
+ // Proceedings of PACT'98, Oct. 1998, IEEE
+
+ if (!CI->hasFnAttr(Attribute::Cold) && isReportingError(Callee, CI)) {
+ CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
+ }
+
+ return 0;
+ }
+
+protected:
+ bool isReportingError(Function *Callee, CallInst *CI) {
+ if (!ColdErrorCalls)
+ return false;
+
+ if (!Callee || !Callee->isDeclaration())
+ return false;
+
+ if (StreamArg < 0)
+ return true;
+
+ // These functions might be considered cold, but only if their stream
+ // argument is stderr.
+
+ if (StreamArg >= (int) CI->getNumArgOperands())
+ return false;
+ LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
+ if (!LI)
+ return false;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
+ if (!GV || !GV->isDeclaration())
+ return false;
+ return GV->getName() == "stderr";
+ }
+
+ int StreamArg;
+};
+
struct PrintFOpt : public LibCallOptimization {
Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
IRBuilder<> &B) {
@@ -1361,7 +1587,7 @@ struct PrintFOpt : public LibCallOptimization {
// printf("foo\n") --> puts("foo")
if (FormatStr[FormatStr.size()-1] == '\n' &&
- FormatStr.find('%') == std::string::npos) { // no format characters.
+ FormatStr.find('%') == StringRef::npos) { // No format characters.
// Create a string literal with no \n on it. We expect the constant merge
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
@@ -1513,6 +1739,9 @@ struct SPrintFOpt : public LibCallOptimization {
struct FPrintFOpt : public LibCallOptimization {
Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
IRBuilder<> &B) {
+ ErrorReportingOpt ER(/* StreamArg = */ 0);
+ (void) ER.callOptimizer(Callee, CI, B);
+
// All the optimizations depend on the format string.
StringRef FormatStr;
if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1590,6 +1819,9 @@ struct FPrintFOpt : public LibCallOptimization {
struct FWriteOpt : public LibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ ErrorReportingOpt ER(/* StreamArg = */ 3);
+ (void) ER.callOptimizer(Callee, CI, B);
+
// Require a pointer, an integer, an integer, a pointer, returning integer.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
@@ -1623,6 +1855,9 @@ struct FWriteOpt : public LibCallOptimization {
struct FPutsOpt : public LibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ ErrorReportingOpt ER(/* StreamArg = */ 1);
+ (void) ER.callOptimizer(Callee, CI, B);
+
// These optimizations require DataLayout.
if (!TD) return 0;
@@ -1741,6 +1976,7 @@ static MemSetOpt MemSet;
// Math library call optimizations.
static UnaryDoubleFPOpt UnaryDoubleFP(false);
static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+static SinCosPiOpt SinCosPi;
// Integer library call optimizations.
static FFSOpt FFS;
@@ -1750,6 +1986,9 @@ static IsAsciiOpt IsAscii;
static ToAsciiOpt ToAscii;
// Formatting and IO library call optimizations.
+static ErrorReportingOpt ErrorReporting;
+static ErrorReportingOpt ErrorReporting0(0);
+static ErrorReportingOpt ErrorReporting1(1);
static PrintFOpt PrintF;
static SPrintFOpt SPrintF;
static FPrintFOpt FPrintF;
@@ -1825,6 +2064,11 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
case LibFunc::cos:
case LibFunc::cosl:
return &Cos;
+ case LibFunc::sinpif:
+ case LibFunc::sinpi:
+ case LibFunc::cospif:
+ case LibFunc::cospi:
+ return &SinCosPi;
case LibFunc::powf:
case LibFunc::pow:
case LibFunc::powl:
@@ -1859,6 +2103,13 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
return &FPuts;
case LibFunc::puts:
return &Puts;
+ case LibFunc::perror:
+ return &ErrorReporting;
+ case LibFunc::vfprintf:
+ case LibFunc::fiprintf:
+ return &ErrorReporting0;
+ case LibFunc::fputc:
+ return &ErrorReporting1;
case LibFunc::ceil:
case LibFunc::fabs:
case LibFunc::floor:
diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
index b98cb5b..2ef692c 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -49,29 +49,45 @@ struct SpecialCaseList::Entry {
}
};
-SpecialCaseList::SpecialCaseList(const StringRef Path) {
- // Validate and open blacklist file.
- if (Path.empty()) return;
+SpecialCaseList::SpecialCaseList() : Entries() {}
+
+SpecialCaseList *SpecialCaseList::create(
+ const StringRef Path, std::string &Error) {
+ if (Path.empty())
+ return new SpecialCaseList();
OwningPtr<MemoryBuffer> File;
if (error_code EC = MemoryBuffer::getFile(Path, File)) {
- report_fatal_error("Can't open blacklist file: " + Path + ": " +
- EC.message());
+ Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
+ return 0;
}
+ return create(File.get(), Error);
+}
- init(File.get());
+SpecialCaseList *SpecialCaseList::create(
+ const MemoryBuffer *MB, std::string &Error) {
+ OwningPtr<SpecialCaseList> SCL(new SpecialCaseList());
+ if (!SCL->parse(MB, Error))
+ return 0;
+ return SCL.take();
}
-SpecialCaseList::SpecialCaseList(const MemoryBuffer *MB) {
- init(MB);
+SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) {
+ std::string Error;
+ if (SpecialCaseList *SCL = create(Path, Error))
+ return SCL;
+ report_fatal_error(Error);
}
-void SpecialCaseList::init(const MemoryBuffer *MB) {
+bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
// Iterate through each line in the blacklist file.
SmallVector<StringRef, 16> Lines;
SplitString(MB->getBuffer(), Lines, "\n\r");
StringMap<StringMap<std::string> > Regexps;
+ assert(Entries.empty() &&
+ "parse() should be called on an empty SpecialCaseList");
+ int LineNo = 1;
for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end();
- I != E; ++I) {
+ I != E; ++I, ++LineNo) {
// Ignore empty lines and lines starting with "#"
if (I->empty() || I->startswith("#"))
continue;
@@ -80,7 +96,9 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
StringRef Prefix = SplitLine.first;
if (SplitLine.second.empty()) {
// Missing ':' in the line.
- report_fatal_error("malformed blacklist line: " + SplitLine.first);
+ Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" +
+ SplitLine.first + "'").str();
+ return false;
}
std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
@@ -113,10 +131,11 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
// Check that the regexp is valid.
Regex CheckRE(Regexp);
- std::string Error;
- if (!CheckRE.isValid(Error)) {
- report_fatal_error("malformed blacklist regex: " + SplitLine.second +
- ": " + Error);
+ std::string REError;
+ if (!CheckRE.isValid(REError)) {
+ Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" +
+ SplitLine.second + "': " + REError).str();
+ return false;
}
// Add this regexp into the proper group by its prefix.
@@ -135,6 +154,7 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
}
}
+ return true;
}
SpecialCaseList::~SpecialCaseList() {
@@ -149,18 +169,12 @@ SpecialCaseList::~SpecialCaseList() {
}
}
-bool SpecialCaseList::findCategory(const Function &F,
- StringRef &Category) const {
- return findCategory(*F.getParent(), Category) ||
- findCategory("fun", F.getName(), Category);
-}
-
bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
return isIn(*F.getParent(), Category) ||
inSectionCategory("fun", F.getName(), Category);
}
-static StringRef GetGVTypeString(const GlobalVariable &G) {
+static StringRef GetGlobalTypeString(const GlobalValue &G) {
// Types of GlobalVariables are always pointer types.
Type *GType = G.getType()->getElementType();
// For now we support blacklisting struct types only.
@@ -171,46 +185,29 @@ static StringRef GetGVTypeString(const GlobalVariable &G) {
return "<unknown type>";
}
-bool SpecialCaseList::findCategory(const GlobalVariable &G,
- StringRef &Category) const {
- return findCategory(*G.getParent(), Category) ||
- findCategory("global", G.getName(), Category) ||
- findCategory("type", GetGVTypeString(G), Category);
-}
-
bool SpecialCaseList::isIn(const GlobalVariable &G,
const StringRef Category) const {
return isIn(*G.getParent(), Category) ||
inSectionCategory("global", G.getName(), Category) ||
- inSectionCategory("type", GetGVTypeString(G), Category);
+ inSectionCategory("type", GetGlobalTypeString(G), Category);
}
-bool SpecialCaseList::findCategory(const Module &M, StringRef &Category) const {
- return findCategory("src", M.getModuleIdentifier(), Category);
+bool SpecialCaseList::isIn(const GlobalAlias &GA,
+ const StringRef Category) const {
+ if (isIn(*GA.getParent(), Category))
+ return true;
+
+ if (isa<FunctionType>(GA.getType()->getElementType()))
+ return inSectionCategory("fun", GA.getName(), Category);
+
+ return inSectionCategory("global", GA.getName(), Category) ||
+ inSectionCategory("type", GetGlobalTypeString(GA), Category);
}
bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
return inSectionCategory("src", M.getModuleIdentifier(), Category);
}
-bool SpecialCaseList::findCategory(const StringRef Section,
- const StringRef Query,
- StringRef &Category) const {
- StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
- if (I == Entries.end()) return false;
-
- for (StringMap<Entry>::const_iterator II = I->second.begin(),
- IE = I->second.end();
- II != IE; ++II) {
- if (II->getValue().match(Query)) {
- Category = II->first();
- return true;
- }
- }
-
- return false;
-}
-
bool SpecialCaseList::inSectionCategory(const StringRef Section,
const StringRef Query,
const StringRef Category) const {
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index cbc1d63..c5e1dcb 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -533,7 +533,7 @@ namespace {
default: break;
case Instruction::GetElementPtr:
// We mark this instruction as zero-cost because scalar GEPs are usually
- // lowered to the intruction addressing mode. At the moment we don't
+ // lowered to the instruction addressing mode. At the moment we don't
// generate vector GEPs.
return 0;
case Instruction::Br:
@@ -625,10 +625,10 @@ namespace {
ConstantInt *IntOff = ConstOffSCEV->getValue();
int64_t Offset = IntOff->getSExtValue();
- Type *VTy = cast<PointerType>(IPtr->getType())->getElementType();
+ Type *VTy = IPtr->getType()->getPointerElementType();
int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy);
- Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType();
+ Type *VTy2 = JPtr->getType()->getPointerElementType();
if (VTy != VTy2 && Offset < 0) {
int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2);
OffsetInElmts = Offset/VTy2TSS;
@@ -1182,6 +1182,8 @@ namespace {
// Look for an instruction with which to pair instruction *I...
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
bool JAfterStart = IAfterStart;
BasicBlock::iterator J = llvm::next(I);
for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
@@ -1403,6 +1405,8 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) {
(void) trackUsesOfI(Users, WriteSet, I, J);
@@ -2227,11 +2231,12 @@ namespace {
// The pointer value is taken to be the one with the lowest offset.
Value *VPtr = IPtr;
- Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType();
- Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType();
+ Type *ArgTypeI = IPtr->getType()->getPointerElementType();
+ Type *ArgTypeJ = JPtr->getType()->getPointerElementType();
Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- Type *VArgPtrType = PointerType::get(VArgType,
- cast<PointerType>(IPtr->getType())->getAddressSpace());
+ Type *VArgPtrType
+ = PointerType::get(VArgType,
+ IPtr->getType()->getPointerAddressSpace());
return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
/* insert before */ I);
}
@@ -2240,7 +2245,7 @@ namespace {
unsigned MaskOffset, unsigned NumInElem,
unsigned NumInElem1, unsigned IdxOffset,
std::vector<Constant*> &Mask) {
- unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements();
+ unsigned NumElem1 = J->getType()->getVectorNumElements();
for (unsigned v = 0; v < NumElem1; ++v) {
int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
if (m < 0) {
@@ -2267,18 +2272,18 @@ namespace {
Type *ArgTypeJ = J->getType();
Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements();
+ unsigned NumElemI = ArgTypeI->getVectorNumElements();
// Get the total number of elements in the fused vector type.
// By definition, this must equal the number of elements in
// the final mask.
- unsigned NumElem = cast<VectorType>(VArgType)->getNumElements();
+ unsigned NumElem = VArgType->getVectorNumElements();
std::vector<Constant*> Mask(NumElem);
Type *OpTypeI = I->getOperand(0)->getType();
- unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements();
+ unsigned NumInElemI = OpTypeI->getVectorNumElements();
Type *OpTypeJ = J->getOperand(0)->getType();
- unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements();
+ unsigned NumInElemJ = OpTypeJ->getVectorNumElements();
// The fused vector will be:
// -----------------------------------------------------
@@ -2340,6 +2345,12 @@ namespace {
return ExpandedIEChain;
}
+ static unsigned getNumScalarElements(Type *Ty) {
+ if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
+ return VecTy->getNumElements();
+ return 1;
+ }
+
// Returns the value to be used as the specified operand of the vector
// instruction that fuses I with J.
Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
@@ -2355,17 +2366,8 @@ namespace {
Instruction *L = I, *H = J;
Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
- unsigned numElemL;
- if (ArgTypeL->isVectorTy())
- numElemL = cast<VectorType>(ArgTypeL)->getNumElements();
- else
- numElemL = 1;
-
- unsigned numElemH;
- if (ArgTypeH->isVectorTy())
- numElemH = cast<VectorType>(ArgTypeH)->getNumElements();
- else
- numElemH = 1;
+ unsigned numElemL = getNumScalarElements(ArgTypeL);
+ unsigned numElemH = getNumScalarElements(ArgTypeH);
Value *LOp = L->getOperand(o);
Value *HOp = H->getOperand(o);
@@ -2426,11 +2428,12 @@ namespace {
if (CanUseInputs) {
unsigned LOpElem =
- cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType())
- ->getNumElements();
+ cast<Instruction>(LOp)->getOperand(0)->getType()
+ ->getVectorNumElements();
+
unsigned HOpElem =
- cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType())
- ->getNumElements();
+ cast<Instruction>(HOp)->getOperand(0)->getType()
+ ->getVectorNumElements();
// We have one or two input vectors. We need to map each index of the
// operands to the index of the original vector.
@@ -2646,14 +2649,14 @@ namespace {
getReplacementName(IBeforeJ ? I : J,
true, o, 1));
}
-
+
NHOp->insertBefore(IBeforeJ ? J : I);
HOp = NHOp;
}
}
if (ArgType->isVectorTy()) {
- unsigned numElem = cast<VectorType>(VArgType)->getNumElements();
+ unsigned numElem = VArgType->getVectorNumElements();
std::vector<Constant*> Mask(numElem);
for (unsigned v = 0; v < numElem; ++v) {
unsigned Idx = v;
@@ -2746,16 +2749,8 @@ namespace {
VectorType *VType = getVecTypeForPair(IType, JType);
unsigned numElem = VType->getNumElements();
- unsigned numElemI, numElemJ;
- if (IType->isVectorTy())
- numElemI = cast<VectorType>(IType)->getNumElements();
- else
- numElemI = 1;
-
- if (JType->isVectorTy())
- numElemJ = cast<VectorType>(JType)->getNumElements();
- else
- numElemJ = 1;
+ unsigned numElemI = getNumScalarElements(IType);
+ unsigned numElemJ = getNumScalarElements(JType);
if (IType->isVectorTy()) {
std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
@@ -2804,6 +2799,8 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
for (; cast<Instruction>(L) != J; ++L)
(void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs);
@@ -2824,6 +2821,8 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
for (; cast<Instruction>(L) != J;) {
if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) {
// Move this instruction
@@ -2853,6 +2852,7 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
// Note: We cannot end the loop when we reach J because J could be moved
// farther down the use chain by another instruction pairing. Also, J
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a62fedc..5e75871 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -48,6 +48,7 @@
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -126,6 +127,9 @@ static const unsigned MaxVectorWidth = 64;
/// Maximum vectorization unroll count.
static const unsigned MaxUnrollFactor = 16;
+/// The cost of a loop that is considered 'small' by the unroller.
+static const unsigned SmallLoopCost = 20;
+
namespace {
// Forward declarations.
@@ -167,7 +171,9 @@ public:
updateAnalysis();
}
-private:
+ virtual ~InnerLoopVectorizer() {}
+
+protected:
/// A small list of PHINodes.
typedef SmallVector<PHINode*, 4> PhiVector;
/// When we unroll loops we have multiple vector values for each scalar.
@@ -187,7 +193,13 @@ private:
/// Create an empty loop, based on the loop ranges of the old loop.
void createEmptyLoop(LoopVectorizationLegality *Legal);
/// Copy and widen the instructions from the old loop.
- void vectorizeLoop(LoopVectorizationLegality *Legal);
+ virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
+
+ /// \brief The Loop exit block may have single value PHI nodes where the
+ /// incoming value is 'Undef'. While vectorizing we only handled real values
+ /// that were defined inside the loop. Here we fix the 'undef case'.
+ /// See PR14725.
+ void fixLCSSAPHIs();
/// A helper function that computes the predicate of the block BB, assuming
/// that the header block of the loop is set to True. It returns the *entry*
@@ -201,16 +213,23 @@ private:
void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB,
PhiVector *PV);
+ /// Vectorize a single PHINode in a block. This method handles the induction
+ /// variable canonicalization. It supports both VF = 1 for unrolled loops and
+ /// arbitrary length vectors.
+ void widenPHIInstruction(Instruction *PN, VectorParts &Entry,
+ LoopVectorizationLegality *Legal,
+ unsigned UF, unsigned VF, PhiVector *PV);
+
/// Insert the new loop to the loop hierarchy and pass manager
/// and update the analysis passes.
void updateAnalysis();
/// This instruction is un-vectorizable. Implement it as a sequence
/// of scalars.
- void scalarizeInstruction(Instruction *Instr);
+ virtual void scalarizeInstruction(Instruction *Instr);
/// Vectorize Load and Store instructions,
- void vectorizeMemoryInstruction(Instruction *Instr,
+ virtual void vectorizeMemoryInstruction(Instruction *Instr,
LoopVectorizationLegality *Legal);
/// Create a broadcast instruction. This method generates a broadcast
@@ -218,12 +237,12 @@ private:
/// value. If this is the induction variable then we extend it to N, N+1, ...
/// this is needed because each iteration in the loop corresponds to a SIMD
/// element.
- Value *getBroadcastInstrs(Value *V);
+ virtual Value *getBroadcastInstrs(Value *V);
/// This function adds 0, 1, 2 ... to each vector element, starting at zero.
/// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
/// The sequence starts at StartIndex.
- Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
+ virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
/// When we go over instructions in the basic block we rely on previous
/// values within the current basic block or on loop invariant values.
@@ -233,7 +252,7 @@ private:
VectorParts &getVectorValue(Value *V);
/// Generate a shuffle sequence that will reverse the vector Vec.
- Value *reverseVector(Value *Vec);
+ virtual Value *reverseVector(Value *Vec);
/// This is a helper class that holds the vectorizer state. It maps scalar
/// instructions to vector instructions. When the code is 'unrolled' then
@@ -291,6 +310,8 @@ private:
/// The vectorization SIMD factor to use. Each vector will have this many
/// vector elements.
unsigned VF;
+
+protected:
/// The vectorization unroll factor to use. Each scalar is vectorized to this
/// many different vector instructions.
unsigned UF;
@@ -326,6 +347,22 @@ private:
EdgeMaskCache MaskCache;
};
+class InnerLoopUnroller : public InnerLoopVectorizer {
+public:
+ InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, DataLayout *DL,
+ const TargetLibraryInfo *TLI, unsigned UnrollFactor) :
+ InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
+
+private:
+ virtual void scalarizeInstruction(Instruction *Instr);
+ virtual void vectorizeMemoryInstruction(Instruction *Instr,
+ LoopVectorizationLegality *Legal);
+ virtual Value *getBroadcastInstrs(Value *V);
+ virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
+ virtual Value *reverseVector(Value *Vec);
+};
+
/// \brief Look for a meaningful debug location on the instruction or it's
/// operands.
static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
@@ -409,7 +446,7 @@ public:
MRK_FloatMax
};
- /// This POD struct holds information about reduction variables.
+ /// This struct holds information about reduction variables.
struct ReductionDescriptor {
ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
@@ -446,8 +483,8 @@ public:
MinMaxReductionKind MinMaxKind;
};
- // This POD struct holds information about the memory runtime legality
- // check that a group of pointers do not overlap.
+ /// This struct holds information about the memory runtime legality
+ /// check that a group of pointers do not overlap.
struct RuntimePointerCheck {
RuntimePointerCheck() : Need(false) {}
@@ -457,6 +494,8 @@ public:
Pointers.clear();
Starts.clear();
Ends.clear();
+ IsWritePtr.clear();
+ DependencySetId.clear();
}
/// Insert a pointer and calculate the start and end SCEVs.
@@ -478,7 +517,7 @@ public:
SmallVector<unsigned, 2> DependencySetId;
};
- /// A POD for saving information about induction variables.
+ /// A struct for saving information about induction variables.
struct InductionInfo {
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
@@ -725,9 +764,9 @@ struct LoopVectorizeHints {
/// Vectorization unroll factor.
unsigned Unroll;
- LoopVectorizeHints(const Loop *L)
+ LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
: Width(VectorizationFactor)
- , Unroll(VectorizationUnroll)
+ , Unroll(DisableUnrolling ? 1 : VectorizationUnroll)
, LoopID(L->getLoopID()) {
getHints(L);
// The command line options override any loop metadata except for when
@@ -736,6 +775,9 @@ struct LoopVectorizeHints {
Width = VectorizationFactor;
if (VectorizationUnroll.getNumOccurrences() > 0)
Unroll = VectorizationUnroll;
+
+ DEBUG(if (DisableUnrolling && Unroll == 1)
+ dbgs() << "LV: Unrolling disabled by the pass manager\n");
}
/// Return the loop vectorizer metadata prefix.
@@ -762,6 +804,7 @@ struct LoopVectorizeHints {
Vals.push_back(LoopID->getOperand(i));
Vals.push_back(createHint(Context, Twine(Prefix(), "width").str(), Width));
+ Vals.push_back(createHint(Context, Twine(Prefix(), "unroll").str(), 1));
MDNode *NewLoopID = MDNode::get(Context, Vals);
// Set operand 0 to refer to the loop id itself.
@@ -825,15 +868,18 @@ private:
unsigned Val = C->getZExtValue();
if (Hint == "width") {
- assert(isPowerOf2_32(Val) && Val <= MaxVectorWidth &&
- "Invalid width metadata");
- Width = Val;
+ if (isPowerOf2_32(Val) && Val <= MaxVectorWidth)
+ Width = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid width hint metadata\n");
} else if (Hint == "unroll") {
- assert(isPowerOf2_32(Val) && Val <= MaxUnrollFactor &&
- "Invalid unroll metadata");
- Unroll = Val;
- } else
- DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint);
+ if (isPowerOf2_32(Val) && Val <= MaxUnrollFactor)
+ Unroll = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n");
+ } else {
+ DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n');
+ }
}
};
@@ -842,7 +888,8 @@ struct LoopVectorize : public LoopPass {
/// Pass identification, replacement for typeid
static char ID;
- explicit LoopVectorize() : LoopPass(ID) {
+ explicit LoopVectorize(bool NoUnrolling = false)
+ : LoopPass(ID), DisableUnrolling(NoUnrolling) {
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
}
@@ -852,6 +899,7 @@ struct LoopVectorize : public LoopPass {
TargetTransformInfo *TTI;
DominatorTree *DT;
TargetLibraryInfo *TLI;
+ bool DisableUnrolling;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
// We only vectorize innermost loops.
@@ -865,17 +913,22 @@ struct LoopVectorize : public LoopPass {
DT = &getAnalysis<DominatorTree>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ // If the target claims to have no vector registers don't attempt
+ // vectorization.
+ if (!TTI->getNumberOfRegisters(true))
+ return false;
+
if (DL == NULL) {
- DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout");
+ DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout\n");
return false;
}
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
- LoopVectorizeHints Hints(L);
+ LoopVectorizeHints Hints(L, DisableUnrolling);
- if (Hints.Width == 1) {
+ if (Hints.Width == 1 && Hints.Unroll == 1) {
DEBUG(dbgs() << "LV: Not vectorizing.\n");
return false;
}
@@ -912,19 +965,23 @@ struct LoopVectorize : public LoopPass {
unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width,
VF.Cost);
+ DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
+ F->getParent()->getModuleIdentifier() << '\n');
+ DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
+
if (VF.Width == 1) {
DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
- return false;
+ if (UF == 1)
+ return false;
+ // We decided not to vectorize, but we may want to unroll.
+ InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
+ Unroller.vectorize(&LVL);
+ } else {
+ // If we decided that it is *legal* to vectorize the loop then do it.
+ InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
+ LB.vectorize(&LVL);
}
- DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
- F->getParent()->getModuleIdentifier()<<"\n");
- DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n");
-
- // If we decided that it is *legal* to vectorize the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
- LB.vectorize(&LVL);
-
// Mark the loop as already vectorized to avoid vectorizing again.
Hints.setAlreadyVectorized(L);
@@ -971,25 +1028,19 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
}
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
- // Save the current insertion location.
- Instruction *Loc = Builder.GetInsertPoint();
-
// We need to place the broadcast of invariant variables outside the loop.
Instruction *Instr = dyn_cast<Instruction>(V);
bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody);
bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
// Place the code for broadcasting invariant variables in the new preheader.
+ IRBuilder<>::InsertPointGuard Guard(Builder);
if (Invariant)
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
// Broadcast the scalar into all locations in the vector.
Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
- // Restore the builder insertion point.
- if (Invariant)
- Builder.SetInsertPoint(Loc);
-
return Shuf;
}
@@ -1016,10 +1067,35 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
return Builder.CreateAdd(Val, Cv, "induction");
}
+/// \brief Find the operand of the GEP that should be checked for consecutive
+/// stores. This ignores trailing indices that have no effect on the final
+/// pointer.
+static unsigned getGEPInductionOperand(DataLayout *DL,
+ const GetElementPtrInst *Gep) {
+ unsigned LastOperand = Gep->getNumOperands() - 1;
+ unsigned GEPAllocSize = DL->getTypeAllocSize(
+ cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
+
+ // Walk backwards and try to peel off zeros.
+ while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
+ // Find the type we're currently indexing into.
+ gep_type_iterator GEPTI = gep_type_begin(Gep);
+ std::advance(GEPTI, LastOperand - 1);
+
+ // If it's a type with the same allocation size as the result of the GEP we
+ // can peel off the zero index.
+ if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize)
+ break;
+ --LastOperand;
+ }
+
+ return LastOperand;
+}
+
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
// Make sure that the pointer does not point to structs.
- if (cast<PointerType>(Ptr->getType())->getElementType()->isAggregateType())
+ if (Ptr->getType()->getPointerElementType()->isAggregateType())
return 0;
// If this value is a pointer induction variable we know it is consecutive.
@@ -1037,8 +1113,6 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
return 0;
unsigned NumOperands = Gep->getNumOperands();
- Value *LastIndex = Gep->getOperand(NumOperands - 1);
-
Value *GpPtr = Gep->getPointerOperand();
// If this GEP value is a consecutive pointer induction variable and all of
// the indices are constant then we know it is consecutive. We can
@@ -1062,14 +1136,18 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
return -1;
}
- // Check that all of the gep indices are uniform except for the last.
- for (unsigned i = 0; i < NumOperands - 1; ++i)
- if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
+
+ // Check that all of the gep indices are uniform except for our induction
+ // operand.
+ for (unsigned i = 0; i != NumOperands; ++i)
+ if (i != InductionOperand &&
+ !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
return 0;
- // We can emit wide load/stores only if the last index is the induction
- // variable.
- const SCEV *Last = SE->getSCEV(LastIndex);
+ // We can emit wide load/stores only if the last non-zero index is the
+ // induction variable.
+ const SCEV *Last = SE->getSCEV(Gep->getOperand(InductionOperand));
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
const SCEV *Step = AR->getStepRecurrence(*SE);
@@ -1127,6 +1205,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Type *DataTy = VectorType::get(ScalarDataTy, VF);
Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
+ // An alignment of 0 means target abi alignment. We need to use the scalar's
+ // target abi alignment in such a case.
+ if (!Alignment)
+ Alignment = DL->getABITypeAlignment(ScalarDataTy);
unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
@@ -1166,7 +1248,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
unsigned NumOperands = Gep->getNumOperands();
- unsigned LastOperand = NumOperands - 1;
+ unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
// Create the new GEP with the new induction variable.
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
@@ -1175,9 +1257,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Instruction *GepOperandInst = dyn_cast<Instruction>(GepOperand);
// Update last index or loop invariant instruction anchored in loop.
- if (i == LastOperand ||
+ if (i == InductionOperand ||
(GepOperandInst && OrigLoop->contains(GepOperandInst))) {
- assert((i == LastOperand ||
+ assert((i == InductionOperand ||
SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) &&
"Must be last index or loop invariant");
@@ -1301,7 +1383,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
Instruction *Cloned = Instr->clone();
if (!IsVoidRetTy)
Cloned->setName(Instr->getName() + ".cloned");
- // Replace the operands of the cloned instrucions with extracted scalars.
+ // Replace the operands of the cloned instructions with extracted scalars.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
Value *Op = Params[op][Part];
// Param is a vector. Need to extract the right lane.
@@ -1335,11 +1417,9 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
SmallVector<TrackingVH<Value> , 2> Starts;
SmallVector<TrackingVH<Value> , 2> Ends;
+ LLVMContext &Ctx = Loc->getContext();
SCEVExpander Exp(*SE, "induction");
- // Use this type for pointer arithmetic.
- Type* PtrArithTy = Type::getInt8PtrTy(Loc->getContext(), 0);
-
for (unsigned i = 0; i < NumPointers; ++i) {
Value *Ptr = PtrRtCheck->Pointers[i];
const SCEV *Sc = SE->getSCEV(Ptr);
@@ -1350,7 +1430,11 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
Starts.push_back(Ptr);
Ends.push_back(Ptr);
} else {
- DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
+ DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+
+ // Use this type for pointer arithmetic.
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
@@ -1372,10 +1456,20 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
continue;
- Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
- Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
- Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc");
- Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy, "bc");
+ unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
+ unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
+
+ assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
+ (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
+ "Trying to bounds check pointers with different address spaces");
+
+ Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+ Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+ Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
+ Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
+ Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
+ Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
@@ -1390,9 +1484,8 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
// We have to do this trickery because the IRBuilder might fold the check to a
// constant expression in which case there is no Instruction anchored in a
// the block.
- LLVMContext &Ctx = Loc->getContext();
- Instruction * Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
- ConstantInt::getTrue(Ctx));
+ Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
+ ConstantInt::getTrue(Ctx));
ChkBuilder.Insert(Check, "memcheck.conflict");
return Check;
}
@@ -1444,6 +1537,16 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
+ // The exit count might have the type of i64 while the phi is i32. This can
+ // happen if we have an induction variable that is sign extended before the
+ // compare. The only way that we get a backedge taken count is that the
+ // induction variable was signed and as such will not overflow. In such a case
+ // truncation is legal.
+ if (ExitCount->getType()->getPrimitiveSizeInBits() >
+ IdxTy->getPrimitiveSizeInBits())
+ ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
+
+ ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
// Get the total trip count from the count by adding 1.
ExitCount = SE->getAddExpr(ExitCount,
SE->getConstant(ExitCount->getType(), 1));
@@ -1496,7 +1599,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// Use this IR builder to create the loop instructions (Phi, Br, Cmp)
// inside the loop.
- Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+ Builder.SetInsertPoint(VecBody->getFirstNonPHI());
// Generate the induction variable.
setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
@@ -1724,6 +1827,9 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
LoopExitBlock = ExitBlock;
LoopVectorBody = VecBody;
LoopScalarBody = OldBasicBlock;
+
+ LoopVectorizeHints Hints(Lp, true);
+ Hints.setAlreadyVectorized(Lp);
}
/// This function returns the identity element (or neutral element) for
@@ -1753,6 +1859,31 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
}
}
+static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
+ Intrinsic::ID ValidIntrinsicID) {
+ if (I.getNumArgOperands() != 1 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ !I.onlyReadsMemory())
+ return Intrinsic::not_intrinsic;
+
+ return ValidIntrinsicID;
+}
+
+static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
+ Intrinsic::ID ValidIntrinsicID) {
+ if (I.getNumArgOperands() != 2 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ !I.getArgOperand(1)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ I.getType() != I.getArgOperand(1)->getType() ||
+ !I.onlyReadsMemory())
+ return Intrinsic::not_intrinsic;
+
+ return ValidIntrinsicID;
+}
+
+
static Intrinsic::ID
getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
// If we have an intrinsic call, check if it is trivially vectorizable.
@@ -1767,11 +1898,13 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
case Intrinsic::log10:
case Intrinsic::log2:
case Intrinsic::fabs:
+ case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
+ case Intrinsic::round:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
@@ -1789,8 +1922,9 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
LibFunc::Func Func;
Function *F = CI->getCalledFunction();
// We're going to make assumptions on the semantics of the functions, check
- // that the target knows that it's available in this environment.
- if (!F || !TLI->getLibFunc(F->getName(), Func))
+ // that the target knows that it's available in this environment and it does
+ // not have local linkage.
+ if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
return Intrinsic::not_intrinsic;
// Otherwise check if we have a call to a function that can be turned into a
@@ -1801,59 +1935,67 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
case LibFunc::sin:
case LibFunc::sinf:
case LibFunc::sinl:
- return Intrinsic::sin;
+ return checkUnaryFloatSignature(*CI, Intrinsic::sin);
case LibFunc::cos:
case LibFunc::cosf:
case LibFunc::cosl:
- return Intrinsic::cos;
+ return checkUnaryFloatSignature(*CI, Intrinsic::cos);
case LibFunc::exp:
case LibFunc::expf:
case LibFunc::expl:
- return Intrinsic::exp;
+ return checkUnaryFloatSignature(*CI, Intrinsic::exp);
case LibFunc::exp2:
case LibFunc::exp2f:
case LibFunc::exp2l:
- return Intrinsic::exp2;
+ return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
case LibFunc::log:
case LibFunc::logf:
case LibFunc::logl:
- return Intrinsic::log;
+ return checkUnaryFloatSignature(*CI, Intrinsic::log);
case LibFunc::log10:
case LibFunc::log10f:
case LibFunc::log10l:
- return Intrinsic::log10;
+ return checkUnaryFloatSignature(*CI, Intrinsic::log10);
case LibFunc::log2:
case LibFunc::log2f:
case LibFunc::log2l:
- return Intrinsic::log2;
+ return checkUnaryFloatSignature(*CI, Intrinsic::log2);
case LibFunc::fabs:
case LibFunc::fabsf:
case LibFunc::fabsl:
- return Intrinsic::fabs;
+ return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ case LibFunc::copysignl:
+ return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
case LibFunc::floor:
case LibFunc::floorf:
case LibFunc::floorl:
- return Intrinsic::floor;
+ return checkUnaryFloatSignature(*CI, Intrinsic::floor);
case LibFunc::ceil:
case LibFunc::ceilf:
case LibFunc::ceill:
- return Intrinsic::ceil;
+ return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
case LibFunc::trunc:
case LibFunc::truncf:
case LibFunc::truncl:
- return Intrinsic::trunc;
+ return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
case LibFunc::rint:
case LibFunc::rintf:
case LibFunc::rintl:
- return Intrinsic::rint;
+ return checkUnaryFloatSignature(*CI, Intrinsic::rint);
case LibFunc::nearbyint:
case LibFunc::nearbyintf:
case LibFunc::nearbyintl:
- return Intrinsic::nearbyint;
+ return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ return checkUnaryFloatSignature(*CI, Intrinsic::round);
case LibFunc::pow:
case LibFunc::powf:
case LibFunc::powl:
- return Intrinsic::pow;
+ return checkBinaryFloatSignature(*CI, Intrinsic::pow);
}
return Intrinsic::not_intrinsic;
@@ -1925,6 +2067,54 @@ Value *createMinMaxOp(IRBuilder<> &Builder,
return Select;
}
+namespace {
+struct CSEDenseMapInfo {
+ static bool canHandle(Instruction *I) {
+ return isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I) || isa<GetElementPtrInst>(I);
+ }
+ static inline Instruction *getEmptyKey() {
+ return DenseMapInfo<Instruction *>::getEmptyKey();
+ }
+ static inline Instruction *getTombstoneKey() {
+ return DenseMapInfo<Instruction *>::getTombstoneKey();
+ }
+ static unsigned getHashValue(Instruction *I) {
+ assert(canHandle(I) && "Unknown instruction!");
+ return hash_combine(I->getOpcode(), hash_combine_range(I->value_op_begin(),
+ I->value_op_end()));
+ }
+ static bool isEqual(Instruction *LHS, Instruction *RHS) {
+ if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
+ LHS == getTombstoneKey() || RHS == getTombstoneKey())
+ return LHS == RHS;
+ return LHS->isIdenticalTo(RHS);
+ }
+};
+}
+
+///\brief Perform cse of induction variable instructions.
+static void cse(BasicBlock *BB) {
+ // Perform simple cse.
+ SmallDenseMap<Instruction *, Instruction *, 4, CSEDenseMapInfo> CSEMap;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+ Instruction *In = I++;
+
+ if (!CSEDenseMapInfo::canHandle(In))
+ continue;
+
+ // Check if we can replace this instruction with any of the
+ // visited instructions.
+ if (Instruction *V = CSEMap.lookup(In)) {
+ In->replaceAllUsesWith(V);
+ In->eraseFromParent();
+ continue;
+ }
+
+ CSEMap[In] = In;
+ }
+}
+
void
InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
//===------------------------------------------------===//
@@ -1995,18 +2185,31 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
// MinMax reduction have the start value as their identify.
- VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
- "minmax.ident");
+ if (VF == 1) {
+ VectorStart = Identity = RdxDesc.StartValue;
+ } else {
+ VectorStart = Identity = Builder.CreateVectorSplat(VF,
+ RdxDesc.StartValue,
+ "minmax.ident");
+ }
} else {
+ // Handle other reduction kinds:
Constant *Iden =
- LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
- VecTy->getScalarType());
- Identity = ConstantVector::getSplat(VF, Iden);
-
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- VectorStart = Builder.CreateInsertElement(Identity,
- RdxDesc.StartValue, Zero);
+ LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+ VecTy->getScalarType());
+ if (VF == 1) {
+ Identity = Iden;
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart = RdxDesc.StartValue;
+ } else {
+ Identity = ConstantVector::getSplat(VF, Iden);
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart = Builder.CreateInsertElement(Identity,
+ RdxDesc.StartValue, Zero);
+ }
}
// Fix the vector-loop phi.
@@ -2062,37 +2265,40 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
ReducedPartRdx, RdxParts[part]);
}
- // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
- // and vector ops, reducing the set of values being computed by half each
- // round.
- assert(isPowerOf2_32(VF) &&
- "Reduction emission only supported for pow2 vectors!");
- Value *TmpVec = ReducedPartRdx;
- SmallVector<Constant*, 32> ShuffleMask(VF, 0);
- for (unsigned i = VF; i != 1; i >>= 1) {
- // Move the upper half of the vector to the lower half.
- for (unsigned j = 0; j != i/2; ++j)
- ShuffleMask[j] = Builder.getInt32(i/2 + j);
-
- // Fill the rest of the mask with undef.
- std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
- UndefValue::get(Builder.getInt32Ty()));
-
- Value *Shuf =
+ if (VF > 1) {
+ // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
+ // and vector ops, reducing the set of values being computed by half each
+ // round.
+ assert(isPowerOf2_32(VF) &&
+ "Reduction emission only supported for pow2 vectors!");
+ Value *TmpVec = ReducedPartRdx;
+ SmallVector<Constant*, 32> ShuffleMask(VF, 0);
+ for (unsigned i = VF; i != 1; i >>= 1) {
+ // Move the upper half of the vector to the lower half.
+ for (unsigned j = 0; j != i/2; ++j)
+ ShuffleMask[j] = Builder.getInt32(i/2 + j);
+
+ // Fill the rest of the mask with undef.
+ std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
+ UndefValue::get(Builder.getInt32Ty()));
+
+ Value *Shuf =
Builder.CreateShuffleVector(TmpVec,
UndefValue::get(TmpVec->getType()),
ConstantVector::get(ShuffleMask),
"rdx.shuf");
- if (Op != Instruction::ICmp && Op != Instruction::FCmp)
- TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
- "bin.rdx");
- else
- TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
- }
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+ TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+ "bin.rdx");
+ else
+ TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
+ }
- // The result is in the first element of the vector.
- Value *Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+ // The result is in the first element of the vector.
+ ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
+ Builder.getInt32(0));
+ }
// Now, we need to fix the users of the reduction variable
// inside and outside of the scalar remainder loop.
@@ -2101,7 +2307,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
- if (!LCSSAPhi) continue;
+ if (!LCSSAPhi) break;
// All PHINodes need to have a single entry edge, or two if
// we already fixed them.
@@ -2111,7 +2317,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// incoming bypass edge.
if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
// Add an edge coming from the bypass.
- LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
+ LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
break;
}
}// end of the LCSSA phi scan.
@@ -2123,23 +2329,26 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
// Pick the other block.
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
- (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
+ (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
}// end of for each redux variable.
- // The Loop exit block may have single value PHI nodes where the incoming
- // value is 'undef'. While vectorizing we only handled real values that
- // were defined inside the loop. Here we handle the 'undef case'.
- // See PR14725.
+ fixLCSSAPHIs();
+
+ // Remove redundant induction instructions.
+ cse(LoopVectorBody);
+}
+
+void InnerLoopVectorizer::fixLCSSAPHIs() {
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
- if (!LCSSAPhi) continue;
+ if (!LCSSAPhi) break;
if (LCSSAPhi->getNumIncomingValues() == 1)
LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()),
LoopMiddleBlock);
}
-}
+}
InnerLoopVectorizer::VectorParts
InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
@@ -2200,161 +2409,185 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
return BlockMask;
}
-void
-InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
- BasicBlock *BB, PhiVector *PV) {
- // For each instruction in the old loop.
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- VectorParts &Entry = WidenMap.get(it);
- switch (it->getOpcode()) {
- case Instruction::Br:
- // Nothing to do for PHIs and BR, since we already took care of the
- // loop control flow instructions.
- continue;
- case Instruction::PHI:{
- PHINode* P = cast<PHINode>(it);
- // Handle reduction variables:
- if (Legal->getReductionVars()->count(P)) {
- for (unsigned part = 0; part < UF; ++part) {
- // This is phase one of vectorizing PHIs.
- Type *VecTy = VectorType::get(it->getType(), VF);
- Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
- LoopVectorBody-> getFirstInsertionPt());
- }
- PV->push_back(P);
- continue;
- }
+void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
+ InnerLoopVectorizer::VectorParts &Entry,
+ LoopVectorizationLegality *Legal,
+ unsigned UF, unsigned VF, PhiVector *PV) {
+ PHINode* P = cast<PHINode>(PN);
+ // Handle reduction variables:
+ if (Legal->getReductionVars()->count(P)) {
+ for (unsigned part = 0; part < UF; ++part) {
+ // This is phase one of vectorizing PHIs.
+ Type *VecTy = (VF == 1) ? PN->getType() :
+ VectorType::get(PN->getType(), VF);
+ Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
+ LoopVectorBody-> getFirstInsertionPt());
+ }
+ PV->push_back(P);
+ return;
+ }
- setDebugLocFromInst(Builder, P);
- // Check for PHI nodes that are lowered to vector selects.
- if (P->getParent() != OrigLoop->getHeader()) {
- // We know that all PHIs in non header blocks are converted into
- // selects, so we don't have to worry about the insertion order and we
- // can just use the builder.
- // At this point we generate the predication tree. There may be
- // duplications since this is a simple recursive scan, but future
- // optimizations will clean it up.
-
- unsigned NumIncoming = P->getNumIncomingValues();
-
- // Generate a sequence of selects of the form:
- // SELECT(Mask3, In3,
- // SELECT(Mask2, In2,
- // ( ...)))
- for (unsigned In = 0; In < NumIncoming; In++) {
- VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
- P->getParent());
- VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
-
- for (unsigned part = 0; part < UF; ++part) {
- // We might have single edge PHIs (blocks) - use an identity
- // 'select' for the first PHI operand.
- if (In == 0)
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
- In0[part]);
- else
- // Select between the current value and the previous incoming edge
- // based on the incoming mask.
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
- Entry[part], "predphi");
- }
- }
- continue;
+ setDebugLocFromInst(Builder, P);
+ // Check for PHI nodes that are lowered to vector selects.
+ if (P->getParent() != OrigLoop->getHeader()) {
+ // We know that all PHIs in non header blocks are converted into
+ // selects, so we don't have to worry about the insertion order and we
+ // can just use the builder.
+ // At this point we generate the predication tree. There may be
+ // duplications since this is a simple recursive scan, but future
+ // optimizations will clean it up.
+
+ unsigned NumIncoming = P->getNumIncomingValues();
+
+ // Generate a sequence of selects of the form:
+ // SELECT(Mask3, In3,
+ // SELECT(Mask2, In2,
+ // ( ...)))
+ for (unsigned In = 0; In < NumIncoming; In++) {
+ VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
+ P->getParent());
+ VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
+
+ for (unsigned part = 0; part < UF; ++part) {
+ // We might have single edge PHIs (blocks) - use an identity
+ // 'select' for the first PHI operand.
+ if (In == 0)
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+ In0[part]);
+ else
+ // Select between the current value and the previous incoming edge
+ // based on the incoming mask.
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+ Entry[part], "predphi");
}
+ }
+ return;
+ }
- // This PHINode must be an induction variable.
- // Make sure that we know about it.
- assert(Legal->getInductionVars()->count(P) &&
- "Not an induction variable");
-
- LoopVectorizationLegality::InductionInfo II =
- Legal->getInductionVars()->lookup(P);
-
- switch (II.IK) {
- case LoopVectorizationLegality::IK_NoInduction:
- llvm_unreachable("Unknown induction");
- case LoopVectorizationLegality::IK_IntInduction: {
- assert(P->getType() == II.StartValue->getType() && "Types must match");
- Type *PhiTy = P->getType();
- Value *Broadcasted;
- if (P == OldInduction) {
- // Handle the canonical induction variable. We might have had to
- // extend the type.
- Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
- } else {
- // Handle other induction variables that are now based on the
- // canonical one.
- Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
- "normalized.idx");
- NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
- Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
- "offset.idx");
- }
- Broadcasted = getBroadcastInstrs(Broadcasted);
- // After broadcasting the induction variable we need to make the vector
- // consecutive by adding 0, 1, 2, etc.
+ // This PHINode must be an induction variable.
+ // Make sure that we know about it.
+ assert(Legal->getInductionVars()->count(P) &&
+ "Not an induction variable");
+
+ LoopVectorizationLegality::InductionInfo II =
+ Legal->getInductionVars()->lookup(P);
+
+ switch (II.IK) {
+ case LoopVectorizationLegality::IK_NoInduction:
+ llvm_unreachable("Unknown induction");
+ case LoopVectorizationLegality::IK_IntInduction: {
+ assert(P->getType() == II.StartValue->getType() && "Types must match");
+ Type *PhiTy = P->getType();
+ Value *Broadcasted;
+ if (P == OldInduction) {
+ // Handle the canonical induction variable. We might have had to
+ // extend the type.
+ Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
+ } else {
+ // Handle other induction variables that are now based on the
+ // canonical one.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
+ "normalized.idx");
+ NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
+ Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
+ "offset.idx");
+ }
+ Broadcasted = getBroadcastInstrs(Broadcasted);
+ // After broadcasting the induction variable we need to make the vector
+ // consecutive by adding 0, 1, 2, etc.
+ for (unsigned part = 0; part < UF; ++part)
+ Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
+ return;
+ }
+ case LoopVectorizationLegality::IK_ReverseIntInduction:
+ case LoopVectorizationLegality::IK_PtrInduction:
+ case LoopVectorizationLegality::IK_ReversePtrInduction:
+ // Handle reverse integer and pointer inductions.
+ Value *StartIdx = ExtendedIdx;
+ // This is the normalized GEP that starts counting at zero.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
+ "normalized.idx");
+
+ // Handle the reverse integer induction variable case.
+ if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
+ IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
+ Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
+ "resize.norm.idx");
+ Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
+ "reverse.idx");
+
+ // This is a new value so do not hoist it out.
+ Value *Broadcasted = getBroadcastInstrs(ReverseInd);
+ // After broadcasting the induction variable we need to make the
+ // vector consecutive by adding ... -3, -2, -1, 0.
for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
- continue;
+ Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
+ true);
+ return;
}
- case LoopVectorizationLegality::IK_ReverseIntInduction:
- case LoopVectorizationLegality::IK_PtrInduction:
- case LoopVectorizationLegality::IK_ReversePtrInduction:
- // Handle reverse integer and pointer inductions.
- Value *StartIdx = ExtendedIdx;
- // This is the normalized GEP that starts counting at zero.
- Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
- "normalized.idx");
- // Handle the reverse integer induction variable case.
- if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
- IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
- Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
- "resize.norm.idx");
- Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
- "reverse.idx");
-
- // This is a new value so do not hoist it out.
- Value *Broadcasted = getBroadcastInstrs(ReverseInd);
- // After broadcasting the induction variable we need to make the
- // vector consecutive by adding ... -3, -2, -1, 0.
- for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
- true);
+ // Handle the pointer induction variable case.
+ assert(P->getType()->isPointerTy() && "Unexpected type.");
+
+ // Is this a reverse induction ptr or a consecutive induction ptr.
+ bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
+ II.IK);
+
+ // This is the vector of results. Notice that we don't generate
+ // vector geps because scalar geps result in better code.
+ for (unsigned part = 0; part < UF; ++part) {
+ if (VF == 1) {
+ int EltIndex = (part) * (Reverse ? -1 : 1);
+ Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+ Value *GlobalIdx;
+ if (Reverse)
+ GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
+ else
+ GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+
+ Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
+ "next.gep");
+ Entry[part] = SclrGep;
continue;
}
- // Handle the pointer induction variable case.
- assert(P->getType()->isPointerTy() && "Unexpected type.");
-
- // Is this a reverse induction ptr or a consecutive induction ptr.
- bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
- II.IK);
-
- // This is the vector of results. Notice that we don't generate
- // vector geps because scalar geps result in better code.
- for (unsigned part = 0; part < UF; ++part) {
- Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
- for (unsigned int i = 0; i < VF; ++i) {
- int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
- Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
- Value *GlobalIdx;
- if (!Reverse)
- GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
- else
- GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
-
- Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
- "next.gep");
- VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
- Builder.getInt32(i),
- "insert.gep");
- }
- Entry[part] = VecVal;
+ Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+ for (unsigned int i = 0; i < VF; ++i) {
+ int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
+ Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+ Value *GlobalIdx;
+ if (!Reverse)
+ GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+ else
+ GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
+
+ Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
+ "next.gep");
+ VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+ Builder.getInt32(i),
+ "insert.gep");
}
- continue;
+ Entry[part] = VecVal;
}
+ return;
+ }
+}
+void
+InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
+ BasicBlock *BB, PhiVector *PV) {
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ VectorParts &Entry = WidenMap.get(it);
+ switch (it->getOpcode()) {
+ case Instruction::Br:
+ // Nothing to do for PHIs and BR, since we already took care of the
+ // loop control flow instructions.
+ continue;
+ case Instruction::PHI:{
+ // Vectorize PHINodes.
+ widenPHIInstruction(it, Entry, Legal, UF, VF, PV);
+ continue;
}// End of PHI.
case Instruction::Add:
@@ -2413,8 +2646,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
VectorParts &Cond = getVectorValue(it->getOperand(0));
VectorParts &Op0 = getVectorValue(it->getOperand(1));
VectorParts &Op1 = getVectorValue(it->getOperand(2));
- Value *ScalarCond = Builder.CreateExtractElement(Cond[0],
- Builder.getInt32(0));
+
+ Value *ScalarCond = (VF == 1) ? Cond[0] :
+ Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
+
for (unsigned Part = 0; Part < UF; ++Part) {
Entry[Part] = Builder.CreateSelect(
InvariantCond ? ScalarCond : Cond[Part],
@@ -2475,7 +2710,8 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
break;
}
/// Vectorize casts.
- Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
+ Type *DestTy = (VF == 1) ? CI->getType() :
+ VectorType::get(CI->getType(), VF);
VectorParts &A = getVectorValue(it->getOperand(0));
for (unsigned Part = 0; Part < UF; ++Part)
@@ -2505,7 +2741,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
Args.push_back(Arg[Part]);
}
- Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) };
+ Type *Tys[] = {CI->getType()};
+ if (VF > 1)
+ Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+
Function *F = Intrinsic::getDeclaration(M, ID, Tys);
Entry[Part] = Builder.CreateCall(F, Args);
}
@@ -2542,19 +2781,36 @@ void InnerLoopVectorizer::updateAnalysis() {
DEBUG(DT->verifyAnalysis());
}
+/// \brief Check whether it is safe to if-convert this phi node.
+///
+/// Phi nodes with constant expressions that can trap are not safe to if
+/// convert.
+static bool canIfConvertPHINodes(BasicBlock *BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ PHINode *Phi = dyn_cast<PHINode>(I);
+ if (!Phi)
+ return true;
+ for (unsigned p = 0, e = Phi->getNumIncomingValues(); p != e; ++p)
+ if (Constant *C = dyn_cast<Constant>(Phi->getIncomingValue(p)))
+ if (C->canTrap())
+ return false;
+ }
+ return true;
+}
+
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!EnableIfConversion)
return false;
assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
- std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
// A list of pointers that we can safely read and write to.
SmallPtrSet<Value *, 8> SafePointes;
// Collect safe addresses.
- for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
- BasicBlock *BB = LoopBlocks[i];
+ for (Loop::block_iterator BI = TheLoop->block_begin(),
+ BE = TheLoop->block_end(); BI != BE; ++BI) {
+ BasicBlock *BB = *BI;
if (blockNeedsPredication(BB))
continue;
@@ -2568,16 +2824,22 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
}
// Collect the blocks that need predication.
- for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
- BasicBlock *BB = LoopBlocks[i];
+ BasicBlock *Header = TheLoop->getHeader();
+ for (Loop::block_iterator BI = TheLoop->block_begin(),
+ BE = TheLoop->block_end(); BI != BE; ++BI) {
+ BasicBlock *BB = *BI;
// We don't support switch statements inside loops.
if (!isa<BranchInst>(BB->getTerminator()))
return false;
// We must be able to predicate all blocks that need to be predicated.
- if (blockNeedsPredication(BB) && !blockCanBePredicated(BB, SafePointes))
+ if (blockNeedsPredication(BB)) {
+ if (!blockCanBePredicated(BB, SafePointes))
+ return false;
+ } else if (BB != Header && !canIfConvertPHINodes(BB))
return false;
+
}
// We can if-convert this loop.
@@ -2602,19 +2864,17 @@ bool LoopVectorizationLegality::canVectorize() {
if (!TheLoop->getExitingBlock())
return false;
- unsigned NumBlocks = TheLoop->getNumBlocks();
+ // We need to have a loop header.
+ DEBUG(dbgs() << "LV: Found a loop: " <<
+ TheLoop->getHeader()->getName() << '\n');
// Check if we can if-convert non single-bb loops.
+ unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
return false;
}
- // We need to have a loop header.
- BasicBlock *Latch = TheLoop->getLoopLatch();
- DEBUG(dbgs() << "LV: Found a loop: " <<
- TheLoop->getHeader()->getName() << "\n");
-
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
if (ExitCount == SE->getCouldNotCompute()) {
@@ -2623,6 +2883,7 @@ bool LoopVectorizationLegality::canVectorize() {
}
// Do not loop-vectorize loops with a tiny trip count.
+ BasicBlock *Latch = TheLoop->getLoopLatch();
unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch);
if (TC > 0u && TC < TinyTripCountVectorThreshold) {
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
@@ -2657,7 +2918,13 @@ bool LoopVectorizationLegality::canVectorize() {
static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) {
if (Ty->isPointerTy())
- return DL.getIntPtrType(Ty->getContext());
+ return DL.getIntPtrType(Ty);
+
+ // It is possible that char's or short's overflow when we ask for the loop's
+ // trip count, work around this by changing the type size.
+ if (Ty->getScalarSizeInBits() < 32)
+ return Type::getInt32Ty(Ty->getContext());
+
return Ty;
}
@@ -2682,7 +2949,7 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
Instruction *U = cast<Instruction>(*I);
// This user may be a reduction exit value.
if (!TheLoop->contains(U)) {
- DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+ DEBUG(dbgs() << "LV: Found an outside user for : " << *U << '\n');
return true;
}
}
@@ -2758,6 +3025,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
DEBUG(dbgs() << "LV: Found an induction variable.\n");
Inductions[Phi] = InductionInfo(StartValue, IK);
+
+ // Until we explicitly handle the case of an induction variable with
+ // an outside loop user we have to give up vectorizing this loop.
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ return false;
+
continue;
}
@@ -2812,9 +3085,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
// Check that the instruction return type is vectorizable.
- if (!VectorType::isValidElementType(it->getType()) &&
- !it->getType()->isVoidTy()) {
- DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
+ // Also, we can't vectorize extractelement instructions.
+ if ((!VectorType::isValidElementType(it->getType()) &&
+ !it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
+ DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
return false;
}
@@ -2904,7 +3178,7 @@ public:
/// non-intersection.
bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
unsigned &NumComparisons, ScalarEvolution *SE,
- Loop *TheLoop);
+ Loop *TheLoop, bool ShouldCheckStride = false);
/// \brief Goes over all memory accesses, checks whether a RT check is needed
/// and builds sets of dependent accesses.
@@ -2918,6 +3192,7 @@ public:
bool isRTCheckNeeded() { return IsRTCheckNeeded; }
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
+ void resetDepChecks() { CheckDeps.clear(); }
MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
@@ -2972,10 +3247,15 @@ static bool hasComputableBounds(ScalarEvolution *SE, Value *Ptr) {
return AR->isAffine();
}
+/// \brief Check the stride of the pointer and ensure that it does not wrap in
+/// the address space.
+static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
+ const Loop *Lp);
+
bool AccessAnalysis::canCheckPtrAtRT(
LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
unsigned &NumComparisons, ScalarEvolution *SE,
- Loop *TheLoop) {
+ Loop *TheLoop, bool ShouldCheckStride) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
unsigned NumReadPtrChecks = 0;
@@ -3003,7 +3283,10 @@ bool AccessAnalysis::canCheckPtrAtRT(
else
++NumReadPtrChecks;
- if (hasComputableBounds(SE, Ptr)) {
+ if (hasComputableBounds(SE, Ptr) &&
+ // When we run after a failing dependency check we have to make sure we
+ // don't have wrapping pointers.
+ (!ShouldCheckStride || isStridedPtr(SE, DL, Ptr, TheLoop) == 1)) {
// The id of the dependence set.
unsigned DepId;
@@ -3019,7 +3302,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId);
- DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr <<"\n");
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
} else {
CanDoRT = false;
}
@@ -3027,9 +3310,36 @@ bool AccessAnalysis::canCheckPtrAtRT(
if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
NumComparisons = 0; // Only one dependence set.
- else
+ else {
NumComparisons = (NumWritePtrChecks * (NumReadPtrChecks +
NumWritePtrChecks - 1));
+ }
+
+ // If the pointers that we would use for the bounds comparison have different
+ // address spaces, assume the values aren't directly comparable, so we can't
+ // use them for the runtime check. We also have to assume they could
+ // overlap. In the future there should be metadata for whether address spaces
+ // are disjoint.
+ unsigned NumPointers = RtCheck.Pointers.size();
+ for (unsigned i = 0; i < NumPointers; ++i) {
+ for (unsigned j = i + 1; j < NumPointers; ++j) {
+ // Only need to check pointers between two different dependency sets.
+ if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
+ continue;
+
+ Value *PtrI = RtCheck.Pointers[i];
+ Value *PtrJ = RtCheck.Pointers[j];
+
+ unsigned ASi = PtrI->getType()->getPointerAddressSpace();
+ unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
+ if (ASi != ASj) {
+ DEBUG(dbgs() << "LV: Runtime check would require comparison between"
+ " different address spaces\n");
+ return false;
+ }
+ }
+ }
+
return CanDoRT;
}
@@ -3084,7 +3394,7 @@ void AccessAnalysis::processMemAccesses(bool UseDeferred) {
!isa<Argument>(UnderlyingObj)) &&
!isIdentifiedObject(UnderlyingObj))) {
DEBUG(dbgs() << "LV: Found an unidentified " <<
- (IsWrite ? "write" : "read" ) << " ptr:" << *UnderlyingObj <<
+ (IsWrite ? "write" : "read" ) << " ptr: " << *UnderlyingObj <<
"\n");
IsRTCheckNeeded = (IsRTCheckNeeded ||
!isIdentifiedObject(UnderlyingObj) ||
@@ -3158,8 +3468,9 @@ public:
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
- MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L) :
- SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0) {}
+ MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L)
+ : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
+ ShouldRetryWithRuntimeCheck(false) {}
/// \brief Register the location (instructions are given increasing numbers)
/// of a write access.
@@ -3189,6 +3500,10 @@ public:
/// the accesses safely with.
unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
+ /// \brief In same cases when the dependency check fails we can still
+ /// vectorize the loop with a dynamic array access check.
+ bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
+
private:
ScalarEvolution *SE;
DataLayout *DL;
@@ -3206,6 +3521,10 @@ private:
// We can access this many bytes in parallel safely.
unsigned MaxSafeDepDistBytes;
+ /// \brief If we see a non constant dependence distance we can still try to
+ /// vectorize this loop with runtime checks.
+ bool ShouldRetryWithRuntimeCheck;
+
/// \brief Check whether there is a plausible dependence between the two
/// accesses.
///
@@ -3403,6 +3722,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
if (!C) {
DEBUG(dbgs() << "LV: Dependence because of non constant distance\n");
+ ShouldRetryWithRuntimeCheck = true;
return true;
}
@@ -3428,7 +3748,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (Val == 0) {
if (ATy == BTy)
return false;
- DEBUG(dbgs() << "LV: Zero dependence difference but different types");
+ DEBUG(dbgs() << "LV: Zero dependence difference but different types\n");
return true;
}
@@ -3437,7 +3757,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Positive distance bigger than max vectorization factor.
if (ATy != BTy) {
DEBUG(dbgs() <<
- "LV: ReadWrite-Write positive dependency with different types");
+ "LV: ReadWrite-Write positive dependency with different types\n");
return false;
}
@@ -3454,7 +3774,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2*TypeByteSize > MaxSafeDepDistBytes ||
Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
DEBUG(dbgs() << "LV: Failure because of Positive distance "
- << Val.getSExtValue() << "\n");
+ << Val.getSExtValue() << '\n');
return true;
}
@@ -3467,7 +3787,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return true;
DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
- " with max VF=" << MaxSafeDepDistBytes/TypeByteSize << "\n");
+ " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
return false;
}
@@ -3571,8 +3891,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Stores.push_back(St);
DepChecker.addAccess(St);
}
- } // next instr.
- } // next block.
+ } // Next instr.
+ } // Next block.
// Now we have two lists that hold the loads and the stores.
// Next, we find the pointers that they use.
@@ -3619,7 +3939,6 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return true;
}
- SmallPtrSet<Value *, 16> ReadOnlyPtr;
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
LoadInst *LD = cast<LoadInst>(*I);
Value* Ptr = LD->getPointerOperand();
@@ -3667,7 +3986,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (NumComparisons == 0 && NeedRTCheck)
NeedRTCheck = false;
- // Check that we did not collect too many pointers or found a unsizeable
+ // Check that we did not collect too many pointers or found an unsizeable
// pointer.
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
PtrRtCheck.reset();
@@ -3693,9 +4012,32 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
Accesses.getDependenciesToCheck());
MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
+
+ if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
+ DEBUG(dbgs() << "LV: Retrying with memory checks\n");
+ NeedRTCheck = true;
+
+ // Clear the dependency checks. We assume they are not needed.
+ Accesses.resetDepChecks();
+
+ PtrRtCheck.reset();
+ PtrRtCheck.Need = true;
+
+ CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
+ TheLoop, true);
+ // Check that we did not collect too many pointers or found an unsizeable
+ // pointer.
+ if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
+ DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
+ PtrRtCheck.reset();
+ return false;
+ }
+
+ CanVecMem = true;
+ }
}
- DEBUG(dbgs() << "LV: We "<< (NeedRTCheck ? "" : "don't") <<
+ DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
" need a runtime memory check.\n");
return CanVecMem;
@@ -3839,6 +4181,12 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
if (ExitInstruction != 0 || Cur == Phi)
return false;
+ // The instruction used by an outside user must be the last instruction
+ // before we feed back to the reduction phi. Otherwise, we loose VF-1
+ // operations on the value.
+ if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end())
+ return false;
+
ExitInstruction = Cur;
continue;
}
@@ -4045,6 +4393,14 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
if (it->mayWriteToMemory() || it->mayThrow())
return false;
+ // Check that we don't have a constant expression that can trap as operand.
+ for (Instruction::op_iterator OI = it->op_begin(), OE = it->op_end();
+ OI != OE; ++OI) {
+ if (Constant *C = dyn_cast<Constant>(*OI))
+ if (C->canTrap())
+ return false;
+ }
+
// The instructions below can trap.
switch (it->getOpcode()) {
default: continue;
@@ -4071,7 +4427,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
// Find the trip count.
unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
- DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n");
+ DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
unsigned WidestType = getWidestType();
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
@@ -4082,7 +4438,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
WidestRegister : MaxSafeDepDist);
unsigned MaxVectorSize = WidestRegister / WidestType;
DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
- DEBUG(dbgs() << "LV: The Widest register is:" << WidestRegister << "bits.\n");
+ DEBUG(dbgs() << "LV: The Widest register is: "
+ << WidestRegister << " bits.\n");
if (MaxVectorSize == 0) {
DEBUG(dbgs() << "LV: The target has no vector registers.\n");
@@ -4118,7 +4475,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
if (UserVF != 0) {
assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
- DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n");
+ DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
Factor.Width = UserVF;
return Factor;
@@ -4126,13 +4483,13 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
float Cost = expectedCost(1);
unsigned Width = 1;
- DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n");
+ DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n");
for (unsigned i=2; i <= VF; i*=2) {
// Notice that the vector loop needs to be executed less times, so
// we need to divide the cost of the vector loops by the width of
// the vector elements.
float VectorCost = expectedCost(i) / (float)i;
- DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " <<
+ DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " <<
(int)VectorCost << ".\n");
if (VectorCost < Cost) {
Cost = VectorCost;
@@ -4256,8 +4613,20 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
else if (UF < 1)
UF = 1;
- if (Legal->getReductionVars()->size()) {
- DEBUG(dbgs() << "LV: Unrolling because of reductions. \n");
+ bool HasReductions = Legal->getReductionVars()->size();
+
+ // Decide if we want to unroll if we decided that it is legal to vectorize
+ // but not profitable.
+ if (VF == 1) {
+ if (TheLoop->getNumBlocks() > 1 || !HasReductions ||
+ LoopCost > SmallLoopCost)
+ return 1;
+
+ return UF;
+ }
+
+ if (HasReductions) {
+ DEBUG(dbgs() << "LV: Unrolling because of reductions.\n");
return UF;
}
@@ -4265,14 +4634,14 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
// We assume that the cost overhead is 1 and we use the cost model
// to estimate the cost of the loop and unroll until the cost of the
// loop overhead is about 5% of the cost of the loop.
- DEBUG(dbgs() << "LV: Loop cost is "<< LoopCost <<" \n");
- if (LoopCost < 20) {
- DEBUG(dbgs() << "LV: Unrolling to reduce branch cost. \n");
- unsigned NewUF = 20/LoopCost + 1;
+ DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n');
+ if (LoopCost < SmallLoopCost) {
+ DEBUG(dbgs() << "LV: Unrolling to reduce branch cost.\n");
+ unsigned NewUF = SmallLoopCost / (LoopCost + 1);
return std::min(NewUF, UF);
}
- DEBUG(dbgs() << "LV: Not Unrolling. \n");
+ DEBUG(dbgs() << "LV: Not Unrolling.\n");
return 1;
}
@@ -4373,16 +4742,16 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
MaxUsage = std::max(MaxUsage, OpenIntervals.size());
DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " <<
- OpenIntervals.size() <<"\n");
+ OpenIntervals.size() << '\n');
// Add the current instruction to the list of open intervals.
OpenIntervals.insert(I);
}
unsigned Invariant = LoopInvariants.size();
- DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << " \n");
- DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << " \n");
- DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << " \n");
+ DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << '\n');
+ DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
+ DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << '\n');
R.LoopInvariantRegs = Invariant;
R.MaxLocalUsers = MaxUsage;
@@ -4406,8 +4775,8 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
unsigned C = getInstructionCost(it, VF);
BlockCost += C;
- DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " <<
- VF << " For instruction: "<< *it << "\n");
+ DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " <<
+ VF << " For instruction: " << *it << '\n');
}
// We assume that if-converted blocks have a 50% chance of being executed.
@@ -4669,13 +5038,16 @@ char LoopVectorize::ID = 0;
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
- Pass *createLoopVectorizePass() {
- return new LoopVectorize();
+ Pass *createLoopVectorizePass(bool NoUnrolling) {
+ return new LoopVectorize(NoUnrolling);
}
}
@@ -4690,3 +5062,96 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
return false;
}
+
+
+void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
+ assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
+ // Holds vector parameters or scalars, in case of uniform vals.
+ SmallVector<VectorParts, 4> Params;
+
+ setDebugLocFromInst(Builder, Instr);
+
+ // Find all of the vectorized parameters.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *SrcOp = Instr->getOperand(op);
+
+ // If we are accessing the old induction variable, use the new one.
+ if (SrcOp == OldInduction) {
+ Params.push_back(getVectorValue(SrcOp));
+ continue;
+ }
+
+ // Try using previously calculated values.
+ Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
+
+ // If the src is an instruction that appeared earlier in the basic block
+ // then it should already be vectorized.
+ if (SrcInst && OrigLoop->contains(SrcInst)) {
+ assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
+ // The parameter is a vector value from earlier.
+ Params.push_back(WidenMap.get(SrcInst));
+ } else {
+ // The parameter is a scalar from outside the loop. Maybe even a constant.
+ VectorParts Scalars;
+ Scalars.append(UF, SrcOp);
+ Params.push_back(Scalars);
+ }
+ }
+
+ assert(Params.size() == Instr->getNumOperands() &&
+ "Invalid number of operands");
+
+ // Does this instruction return a value ?
+ bool IsVoidRetTy = Instr->getType()->isVoidTy();
+
+ Value *UndefVec = IsVoidRetTy ? 0 :
+ UndefValue::get(Instr->getType());
+ // Create a new entry in the WidenMap and initialize it to Undef or Null.
+ VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
+
+ // For each vector unroll 'part':
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ // For each scalar that we create:
+
+ Instruction *Cloned = Instr->clone();
+ if (!IsVoidRetTy)
+ Cloned->setName(Instr->getName() + ".cloned");
+ // Replace the operands of the cloned instructions with extracted scalars.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *Op = Params[op][Part];
+ Cloned->setOperand(op, Op);
+ }
+
+ // Place the cloned scalar in the new loop.
+ Builder.Insert(Cloned);
+
+ // If the original scalar returns a value we need to place it in a vector
+ // so that future users will be able to use it.
+ if (!IsVoidRetTy)
+ VecResults[Part] = Cloned;
+ }
+}
+
+void
+InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr,
+ LoopVectorizationLegality*) {
+ return scalarizeInstruction(Instr);
+}
+
+Value *InnerLoopUnroller::reverseVector(Value *Vec) {
+ return Vec;
+}
+
+Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) {
+ return V;
+}
+
+Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx,
+ bool Negate) {
+ // When unrolling and the VF is 1, we only need to add a simple scalar.
+ Type *ITy = Val->getType();
+ assert(!ITy->isVectorTy() && "Val must be a scalar");
+ Constant *C = ConstantInt::get(ITy, StartIdx, Negate);
+ return Builder.CreateAdd(Val, C, "induction");
+}
+
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9312b4b..c72b51f 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -25,8 +25,8 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -49,34 +49,24 @@ static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
cl::desc("Only vectorize if you gain more than this "
"number "));
+
+static cl::opt<bool>
+ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
+ cl::desc("Attempt to vectorize horizontal reductions"));
+
+static cl::opt<bool> ShouldStartVectorizeHorAtStore(
+ "slp-vectorize-hor-store", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Attempt to vectorize horizontal reductions feeding into a store"));
+
namespace {
static const unsigned MinVecRegSize = 128;
static const unsigned RecursionMaxDepth = 12;
-/// RAII pattern to save the insertion point of the IR builder.
-class BuilderLocGuard {
-public:
- BuilderLocGuard(IRBuilder<> &B) : Builder(B), Loc(B.GetInsertPoint()),
- DbgLoc(B.getCurrentDebugLocation()) {}
- ~BuilderLocGuard() {
- Builder.SetCurrentDebugLocation(DbgLoc);
- if (Loc)
- Builder.SetInsertPoint(Loc);
- }
-
-private:
- // Prevent copying.
- BuilderLocGuard(const BuilderLocGuard &);
- BuilderLocGuard &operator=(const BuilderLocGuard &);
- IRBuilder<> &Builder;
- AssertingVH<Instruction> Loc;
- DebugLoc DbgLoc;
-};
-
-/// A helper class for numbering instructions in multible blocks.
-/// Numbers starts at zero for each basic block.
+/// A helper class for numbering instructions in multiple blocks.
+/// Numbers start at zero for each basic block.
struct BlockNumbering {
BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {}
@@ -173,6 +163,37 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) {
return Opcode;
}
+/// \returns \p I after propagating metadata from \p VL.
+static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
+ Instruction *I0 = cast<Instruction>(VL[0]);
+ SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
+ I0->getAllMetadataOtherThanDebugLoc(Metadata);
+
+ for (unsigned i = 0, n = Metadata.size(); i != n; ++i) {
+ unsigned Kind = Metadata[i].first;
+ MDNode *MD = Metadata[i].second;
+
+ for (int i = 1, e = VL.size(); MD && i != e; i++) {
+ Instruction *I = cast<Instruction>(VL[i]);
+ MDNode *IMD = I->getMetadata(Kind);
+
+ switch (Kind) {
+ default:
+ MD = 0; // Remove unknown metadata
+ break;
+ case LLVMContext::MD_tbaa:
+ MD = MDNode::getMostGenericTBAA(MD, IMD);
+ break;
+ case LLVMContext::MD_fpmath:
+ MD = MDNode::getMostGenericFPMath(MD, IMD);
+ break;
+ }
+ }
+ I->setMetadata(Kind, MD);
+ }
+ return I;
+}
+
/// \returns The type that all of the values in \p VL have or null if there
/// are different types.
static Type* getSameType(ArrayRef<Value *> VL) {
@@ -216,6 +237,104 @@ static bool CanReuseExtract(ArrayRef<Value *> VL) {
return true;
}
+static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right) {
+
+ SmallVector<Value *, 16> OrigLeft, OrigRight;
+
+ bool AllSameOpcodeLeft = true;
+ bool AllSameOpcodeRight = true;
+ for (unsigned i = 0, e = VL.size(); i != e; ++i) {
+ Instruction *I = cast<Instruction>(VL[i]);
+ Value *V0 = I->getOperand(0);
+ Value *V1 = I->getOperand(1);
+
+ OrigLeft.push_back(V0);
+ OrigRight.push_back(V1);
+
+ Instruction *I0 = dyn_cast<Instruction>(V0);
+ Instruction *I1 = dyn_cast<Instruction>(V1);
+
+ // Check whether all operands on one side have the same opcode. In this case
+ // we want to preserve the original order and not make things worse by
+ // reordering.
+ AllSameOpcodeLeft = I0;
+ AllSameOpcodeRight = I1;
+
+ if (i && AllSameOpcodeLeft) {
+ if(Instruction *P0 = dyn_cast<Instruction>(OrigLeft[i-1])) {
+ if(P0->getOpcode() != I0->getOpcode())
+ AllSameOpcodeLeft = false;
+ } else
+ AllSameOpcodeLeft = false;
+ }
+ if (i && AllSameOpcodeRight) {
+ if(Instruction *P1 = dyn_cast<Instruction>(OrigRight[i-1])) {
+ if(P1->getOpcode() != I1->getOpcode())
+ AllSameOpcodeRight = false;
+ } else
+ AllSameOpcodeRight = false;
+ }
+
+ // Sort two opcodes. In the code below we try to preserve the ability to use
+ // broadcast of values instead of individual inserts.
+ // vl1 = load
+ // vl2 = phi
+ // vr1 = load
+ // vr2 = vr2
+ // = vl1 x vr1
+ // = vl2 x vr2
+ // If we just sorted according to opcode we would leave the first line in
+ // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
+ // = vl1 x vr1
+ // = vr2 x vl2
+ // Because vr2 and vr1 are from the same load we loose the opportunity of a
+ // broadcast for the packed right side in the backend: we have [vr1, vl2]
+ // instead of [vr1, vr2=vr1].
+ if (I0 && I1) {
+ if(!i && I0->getOpcode() > I1->getOpcode()) {
+ Left.push_back(I1);
+ Right.push_back(I0);
+ } else if (i && I0->getOpcode() > I1->getOpcode() && Right[i-1] != I1) {
+ // Try not to destroy a broad cast for no apparent benefit.
+ Left.push_back(I1);
+ Right.push_back(I0);
+ } else if (i && I0->getOpcode() == I1->getOpcode() && Right[i-1] == I0) {
+ // Try preserve broadcasts.
+ Left.push_back(I1);
+ Right.push_back(I0);
+ } else if (i && I0->getOpcode() == I1->getOpcode() && Left[i-1] == I1) {
+ // Try preserve broadcasts.
+ Left.push_back(I1);
+ Right.push_back(I0);
+ } else {
+ Left.push_back(I0);
+ Right.push_back(I1);
+ }
+ continue;
+ }
+ // One opcode, put the instruction on the right.
+ if (I0) {
+ Left.push_back(V1);
+ Right.push_back(I0);
+ continue;
+ }
+ Left.push_back(V0);
+ Right.push_back(V1);
+ }
+
+ bool LeftBroadcast = isSplat(Left);
+ bool RightBroadcast = isSplat(Right);
+
+ // Don't reorder if the operands where good to begin with.
+ if (!(LeftBroadcast || RightBroadcast) &&
+ (AllSameOpcodeRight || AllSameOpcodeLeft)) {
+ Left = OrigLeft;
+ Right = OrigRight;
+ }
+}
+
/// Bottom Up SLP Vectorizer.
class BoUpSLP {
public:
@@ -238,17 +357,20 @@ public:
}
/// \brief Vectorize the tree that starts with the elements in \p VL.
- void vectorizeTree();
+ /// Returns the vectorized root.
+ Value *vectorizeTree();
/// \returns the vectorization cost of the subtree that starts at \p VL.
/// A negative number means that this is profitable.
int getTreeCost();
- /// Construct a vectorizable tree that starts at \p Roots.
- void buildTree(ArrayRef<Value *> Roots);
+ /// Construct a vectorizable tree that starts at \p Roots and is possibly
+ /// used by a reduction of \p RdxOps.
+ void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0);
/// Clear the internal data structures that are created by 'buildTree'.
void deleteTree() {
+ RdxOps = 0;
VectorizableTree.clear();
ScalarToTreeEntry.clear();
MustGather.clear();
@@ -278,7 +400,7 @@ private:
/// \returns the pointer to the vectorized value if \p VL is already
/// vectorized, or NULL. They may happen in cycles.
- Value *alreadyVectorized(ArrayRef<Value *> VL);
+ Value *alreadyVectorized(ArrayRef<Value *> VL) const;
/// \brief Take the pointer operand from the Load/Store instruction.
/// \returns NULL if this is not a valid Load/Store instruction.
@@ -305,26 +427,31 @@ private:
/// \returns the pointer to the barrier instruction if we can't sink.
Value *getSinkBarrier(Instruction *Src, Instruction *Dst);
- /// \returns the index of the last instrucion in the BB from \p VL.
+ /// \returns the index of the last instruction in the BB from \p VL.
int getLastIndex(ArrayRef<Value *> VL);
- /// \returns the Instrucion in the bundle \p VL.
+ /// \returns the Instruction in the bundle \p VL.
Instruction *getLastInstruction(ArrayRef<Value *> VL);
+ /// \brief Set the Builder insert point to one after the last instruction in
+ /// the bundle
+ void setInsertPointAfterBundle(ArrayRef<Value *> VL);
+
/// \returns a vector from a collection of scalars in \p VL.
Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
+ /// \returns whether the VectorizableTree is fully vectoriable and will
+ /// be beneficial even the tree height is tiny.
+ bool isFullyVectorizableTinyTree();
+
struct TreeEntry {
TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0),
NeedToGather(0) {}
/// \returns true if the scalars in VL are equal to this entry.
- bool isSame(ArrayRef<Value *> VL) {
+ bool isSame(ArrayRef<Value *> VL) const {
assert(VL.size() == Scalars.size() && "Invalid size");
- for (int i = 0, e = VL.size(); i != e; ++i)
- if (VL[i] != Scalars[i])
- return false;
- return true;
+ return std::equal(VL.begin(), VL.end(), Scalars.begin());
}
/// A vector of scalars.
@@ -393,10 +520,15 @@ private:
/// Holds all of the instructions that we gathered.
SetVector<Instruction *> GatherSeq;
+ /// A list of blocks that we are going to CSE.
+ SmallSet<BasicBlock *, 8> CSEBlocks;
/// Numbers instructions in different blocks.
DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
+ /// Reduction operators.
+ ValueSet *RdxOps;
+
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
@@ -409,8 +541,9 @@ private:
IRBuilder<> Builder;
};
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
deleteTree();
+ RdxOps = Rdx;
if (!getSameType(Roots))
return;
buildTree_rec(Roots, 0);
@@ -431,18 +564,20 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
UE = Scalar->use_end(); User != UE; ++User) {
DEBUG(dbgs() << "SLP: Checking user:" << **User << ".\n");
- bool Gathered = MustGather.count(*User);
-
// Skip in-tree scalars that become vectors.
- if (ScalarToTreeEntry.count(*User) && !Gathered) {
+ if (ScalarToTreeEntry.count(*User)) {
DEBUG(dbgs() << "SLP: \tInternal user will be removed:" <<
**User << ".\n");
int Idx = ScalarToTreeEntry[*User]; (void) Idx;
assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
continue;
}
+ Instruction *UserInst = dyn_cast<Instruction>(*User);
+ if (!UserInst)
+ continue;
- if (!isa<Instruction>(*User))
+ // Ignore uses that are part of the reduction.
+ if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
continue;
DEBUG(dbgs() << "SLP: Need to extract:" << **User << " from lane " <<
@@ -574,6 +709,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
continue;
}
+ // This user is part of the reduction.
+ if (RdxOps && RdxOps->count(User))
+ continue;
+
// Make sure that we can schedule this unknown user.
BlockNumbering &BN = BlocksNumbers[BB];
int UserIndex = BN.getIndex(User);
@@ -635,6 +774,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
switch (Opcode) {
case Instruction::PHI: {
PHINode *PH = dyn_cast<PHINode>(VL0);
+
+ // Check for terminator values (e.g. invoke).
+ for (unsigned j = 0; j < VL.size(); ++j)
+ for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
+ TerminatorInst *Term = dyn_cast<TerminatorInst>(cast<PHINode>(VL[j])->getIncomingValue(i));
+ if (Term) {
+ DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
newTreeEntry(VL, true);
DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
@@ -658,13 +809,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
case Instruction::Load: {
// Check if the loads are consecutive or of we need to swizzle them.
- for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
- if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
+ for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
+ LoadInst *L = cast<LoadInst>(VL[i]);
+ if (!L->isSimple() || !isConsecutiveAccess(VL[i], VL[i + 1])) {
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Need to swizzle loads.\n");
return;
}
-
+ }
newTreeEntry(VL, true);
DEBUG(dbgs() << "SLP: added a vector of loads.\n");
return;
@@ -753,6 +905,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
newTreeEntry(VL, true);
DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
+ // Sort operands of the instructions so that each side is more likely to
+ // have the same opcode.
+ if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
+ ValueList Left, Right;
+ reorderInputsAccordingToOpcode(VL, Left, Right);
+ buildTree_rec(Left, Depth + 1);
+ buildTree_rec(Right, Depth + 1);
+ return;
+ }
+
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
@@ -874,9 +1036,24 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
} else {
- ScalarCost = VecTy->getNumElements() *
- TTI->getArithmeticInstrCost(Opcode, ScalarTy);
- VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
+ // Certain instructions can be cheaper to vectorize if they have a
+ // constant second vector operand.
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_UniformConstantValue;
+
+ // Check whether all second operands are constant.
+ for (unsigned i = 0; i < VL.size(); ++i)
+ if (!isa<ConstantInt>(cast<Instruction>(VL[i])->getOperand(1))) {
+ Op2VK = TargetTransformInfo::OK_AnyValue;
+ break;
+ }
+
+ ScalarCost =
+ VecTy->getNumElements() *
+ TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK);
+ VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK);
}
return VecCost - ScalarCost;
}
@@ -884,14 +1061,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// Cost of wide load - cost of scalar loads.
int ScalarLdCost = VecTy->getNumElements() *
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
- int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+ int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, 1, 0);
return VecLdCost - ScalarLdCost;
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
int ScalarStCost = VecTy->getNumElements() *
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
- int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
+ int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0);
return VecStCost - ScalarStCost;
}
default:
@@ -899,19 +1076,32 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
}
+bool BoUpSLP::isFullyVectorizableTinyTree() {
+ DEBUG(dbgs() << "SLP: Check whether the tree with height " <<
+ VectorizableTree.size() << " is fully vectorizable .\n");
+
+ // We only handle trees of height 2.
+ if (VectorizableTree.size() != 2)
+ return false;
+
+ // Gathering cost would be too much for tiny trees.
+ if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather)
+ return false;
+
+ return true;
+}
+
int BoUpSLP::getTreeCost() {
int Cost = 0;
DEBUG(dbgs() << "SLP: Calculating cost for tree of size " <<
VectorizableTree.size() << ".\n");
- // Don't vectorize tiny trees. Small load/store chains or consecutive stores
- // of constants will be vectoried in SelectionDAG in MergeConsecutiveStores.
- // The SelectionDAG vectorizer can only handle pairs (trees of height = 2).
- if (VectorizableTree.size() < 3) {
+ // We only vectorize tiny trees if it is fully vectorizable.
+ if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) {
if (!VectorizableTree.size()) {
assert(!ExternalUses.size() && "We should not have any external users");
}
- return 0;
+ return INT_MAX;
}
unsigned BundleWidth = VectorizableTree[0].Scalars.size();
@@ -992,63 +1182,29 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
return false;
- // Calculate a constant offset from the base pointer without using SCEV
- // in the supported cases.
- // TODO: Add support for the case where one of the pointers is a GEP that
- // uses the other pointer.
- GetElementPtrInst *GepA = dyn_cast<GetElementPtrInst>(PtrA);
- GetElementPtrInst *GepB = dyn_cast<GetElementPtrInst>(PtrB);
-
- unsigned BW = DL->getPointerSizeInBits(ASA);
+ unsigned PtrBitWidth = DL->getPointerSizeInBits(ASA);
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
- int64_t Sz = DL->getTypeStoreSize(Ty);
+ APInt Size(PtrBitWidth, DL->getTypeStoreSize(Ty));
- // Check if PtrA is the base and PtrB is a constant offset.
- if (GepB && GepB->getPointerOperand() == PtrA) {
- APInt Offset(BW, 0);
- if (GepB->accumulateConstantOffset(*DL, Offset))
- return Offset.getSExtValue() == Sz;
- return false;
- }
+ APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
+ PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetA);
+ PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetB);
- // Check if PtrB is the base and PtrA is a constant offset.
- if (GepA && GepA->getPointerOperand() == PtrB) {
- APInt Offset(BW, 0);
- if (GepA->accumulateConstantOffset(*DL, Offset))
- return Offset.getSExtValue() == -Sz;
- return false;
- }
+ APInt OffsetDelta = OffsetB - OffsetA;
- // If both pointers are GEPs:
- if (GepA && GepB) {
- // Check that they have the same base pointer and number of indices.
- if (GepA->getPointerOperand() != GepB->getPointerOperand() ||
- GepA->getNumIndices() != GepB->getNumIndices())
- return false;
+ // Check if they are based on the same pointer. That makes the offsets
+ // sufficient.
+ if (PtrA == PtrB)
+ return OffsetDelta == Size;
- // Try to strip the geps. This makes SCEV faster.
- // Make sure that all of the indices except for the last are identical.
- int LastIdx = GepA->getNumIndices();
- for (int i = 0; i < LastIdx - 1; i++) {
- if (GepA->getOperand(i+1) != GepB->getOperand(i+1))
- return false;
- }
-
- PtrA = GepA->getOperand(LastIdx);
- PtrB = GepB->getOperand(LastIdx);
- Sz = 1;
- }
-
- ConstantInt *CA = dyn_cast<ConstantInt>(PtrA);
- ConstantInt *CB = dyn_cast<ConstantInt>(PtrB);
- if (CA && CB) {
- return (CA->getSExtValue() + Sz == CB->getSExtValue());
- }
+ // Compute the necessary base pointer delta to have the necessary final delta
+ // equal to the size.
+ APInt BaseDelta = Size - OffsetDelta;
- // Calculate the distance.
+ // Otherwise compute the distance with SCEV between the base pointers.
const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
- const SCEV *C = SE->getConstant(PtrSCEVA->getType(), Sz);
+ const SCEV *C = SE->getConstant(BaseDelta);
const SCEV *X = SE->getAddExpr(PtrSCEVA, C);
return X == PtrSCEVB;
}
@@ -1102,6 +1258,15 @@ Instruction *BoUpSLP::getLastInstruction(ArrayRef<Value *> VL) {
return I;
}
+void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
+ Instruction *VL0 = cast<Instruction>(VL[0]);
+ Instruction *LastInst = getLastInstruction(VL);
+ BasicBlock::iterator NextInst = LastInst;
+ ++NextInst;
+ Builder.SetInsertPoint(VL0->getParent(), NextInst);
+ Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+}
+
Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
Value *Vec = UndefValue::get(Ty);
// Generate the 'InsertElement' instruction.
@@ -1109,6 +1274,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
GatherSeq.insert(Insrt);
+ CSEBlocks.insert(Insrt->getParent());
// Add to our 'need-to-extract' list.
if (ScalarToTreeEntry.count(VL[i])) {
@@ -1132,10 +1298,12 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
return Vec;
}
-Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) {
- if (ScalarToTreeEntry.count(VL[0])) {
- int Idx = ScalarToTreeEntry[VL[0]];
- TreeEntry *En = &VectorizableTree[Idx];
+Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
+ SmallDenseMap<Value*, int>::const_iterator Entry
+ = ScalarToTreeEntry.find(VL[0]);
+ if (Entry != ScalarToTreeEntry.end()) {
+ int Idx = Entry->second;
+ const TreeEntry *En = &VectorizableTree[Idx];
if (En->isSame(VL) && En->VectorizedValue)
return En->VectorizedValue;
}
@@ -1159,38 +1327,48 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
}
Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
- BuilderLocGuard Guard(Builder);
+ IRBuilder<>::InsertPointGuard Guard(Builder);
if (E->VectorizedValue) {
DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");
return E->VectorizedValue;
}
- Type *ScalarTy = E->Scalars[0]->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(E->Scalars[0]))
+ Instruction *VL0 = cast<Instruction>(E->Scalars[0]);
+ Type *ScalarTy = VL0->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
ScalarTy = SI->getValueOperand()->getType();
VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
if (E->NeedToGather) {
+ setInsertPointAfterBundle(E->Scalars);
return Gather(E->Scalars, VecTy);
}
- Instruction *VL0 = cast<Instruction>(E->Scalars[0]);
unsigned Opcode = VL0->getOpcode();
assert(Opcode == getSameOpcode(E->Scalars) && "Invalid opcode");
switch (Opcode) {
case Instruction::PHI: {
PHINode *PH = dyn_cast<PHINode>(VL0);
- Builder.SetInsertPoint(PH->getParent()->getFirstInsertionPt());
+ Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
E->VectorizedValue = NewPhi;
+ // PHINodes may have multiple entries from the same block. We want to
+ // visit every block once.
+ SmallSet<BasicBlock*, 4> VisitedBBs;
+
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
ValueList Operands;
BasicBlock *IBB = PH->getIncomingBlock(i);
+ if (!VisitedBBs.insert(IBB)) {
+ NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
+ continue;
+ }
+
// Prepare the operand vector.
for (unsigned j = 0; j < E->Scalars.size(); ++j)
Operands.push_back(cast<PHINode>(E->Scalars[j])->
@@ -1231,8 +1409,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
for (int i = 0, e = E->Scalars.size(); i < e; ++i)
INVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
- Builder.SetInsertPoint(getLastInstruction(E->Scalars));
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+ setInsertPointAfterBundle(E->Scalars);
Value *InVec = vectorizeTree(INVL);
@@ -1252,8 +1429,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
RHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
}
- Builder.SetInsertPoint(getLastInstruction(E->Scalars));
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+ setInsertPointAfterBundle(E->Scalars);
Value *L = vectorizeTree(LHSV);
Value *R = vectorizeTree(RHSV);
@@ -1279,8 +1455,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
FalseVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(2));
}
- Builder.SetInsertPoint(getLastInstruction(E->Scalars));
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+ setInsertPointAfterBundle(E->Scalars);
Value *Cond = vectorizeTree(CondVec);
Value *True = vectorizeTree(TrueVec);
@@ -1288,7 +1463,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (Value *V = alreadyVectorized(E->Scalars))
return V;
-
+
Value *V = Builder.CreateSelect(Cond, True, False);
E->VectorizedValue = V;
return V;
@@ -1312,13 +1487,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Or:
case Instruction::Xor: {
ValueList LHSVL, RHSVL;
- for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
- LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
- RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
- }
+ if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
+ reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
+ else
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
+ LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+ RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+ }
- Builder.SetInsertPoint(getLastInstruction(E->Scalars));
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+ setInsertPointAfterBundle(E->Scalars);
Value *LHS = vectorizeTree(LHSVL);
Value *RHS = vectorizeTree(RHSVL);
@@ -1333,41 +1510,46 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
E->VectorizedValue = V;
+
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return propagateMetadata(I, E->Scalars);
+
return V;
}
case Instruction::Load: {
// Loads are inserted at the head of the tree because we don't want to
// sink them all the way down past store instructions.
- Builder.SetInsertPoint(getLastInstruction(E->Scalars));
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+ setInsertPointAfterBundle(E->Scalars);
LoadInst *LI = cast<LoadInst>(VL0);
- Value *VecPtr =
- Builder.CreateBitCast(LI->getPointerOperand(), VecTy->getPointerTo());
+ unsigned AS = LI->getPointerAddressSpace();
+
+ Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
+ VecTy->getPointerTo(AS));
unsigned Alignment = LI->getAlignment();
LI = Builder.CreateLoad(VecPtr);
LI->setAlignment(Alignment);
E->VectorizedValue = LI;
- return LI;
+ return propagateMetadata(LI, E->Scalars);
}
case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(VL0);
unsigned Alignment = SI->getAlignment();
+ unsigned AS = SI->getPointerAddressSpace();
ValueList ValueOp;
for (int i = 0, e = E->Scalars.size(); i < e; ++i)
ValueOp.push_back(cast<StoreInst>(E->Scalars[i])->getValueOperand());
- Builder.SetInsertPoint(getLastInstruction(E->Scalars));
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+ setInsertPointAfterBundle(E->Scalars);
Value *VecValue = vectorizeTree(ValueOp);
- Value *VecPtr =
- Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo());
+ Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
+ VecTy->getPointerTo(AS));
StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
S->setAlignment(Alignment);
E->VectorizedValue = S;
- return S;
+ return propagateMetadata(S, E->Scalars);
}
default:
llvm_unreachable("unknown inst");
@@ -1375,7 +1557,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return 0;
}
-void BoUpSLP::vectorizeTree() {
+Value *BoUpSLP::vectorizeTree() {
Builder.SetInsertPoint(F->getEntryBlock().begin());
vectorizeTree(&VectorizableTree[0]);
@@ -1407,6 +1589,7 @@ void BoUpSLP::vectorizeTree() {
if (PHINode *PN = dyn_cast<PHINode>(Vec)) {
Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ CSEBlocks.insert(PN->getParent());
User->replaceUsesOfWith(Scalar, Ex);
} else if (isa<Instruction>(Vec)){
if (PHINode *PH = dyn_cast<PHINode>(User)) {
@@ -1414,17 +1597,20 @@ void BoUpSLP::vectorizeTree() {
if (PH->getIncomingValue(i) == Scalar) {
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ CSEBlocks.insert(PH->getIncomingBlock(i));
PH->setOperand(i, Ex);
}
}
} else {
Builder.SetInsertPoint(cast<Instruction>(User));
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ CSEBlocks.insert(cast<Instruction>(User)->getParent());
User->replaceUsesOfWith(Scalar, Ex);
}
} else {
Builder.SetInsertPoint(F->getEntryBlock().begin());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ CSEBlocks.insert(&F->getEntryBlock());
User->replaceUsesOfWith(Scalar, Ex);
}
@@ -1450,9 +1636,10 @@ void BoUpSLP::vectorizeTree() {
for (Value::use_iterator User = Scalar->use_begin(),
UE = Scalar->use_end(); User != UE; ++User) {
DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n");
- assert(!MustGather.count(*User) &&
- "Replacing gathered value with undef");
- assert(ScalarToTreeEntry.count(*User) &&
+
+ assert((ScalarToTreeEntry.count(*User) ||
+ // It is legal to replace the reduction users by undef.
+ (RdxOps && RdxOps->count(*User))) &&
"Replacing out-of-tree value with undef");
}
Value *Undef = UndefValue::get(Ty);
@@ -1467,8 +1654,20 @@ void BoUpSLP::vectorizeTree() {
BlocksNumbers[it].forget();
}
Builder.ClearInsertionPoint();
+
+ return VectorizableTree[0].VectorizedValue;
}
+class DTCmp {
+ const DominatorTree *DT;
+
+public:
+ DTCmp(const DominatorTree *DT) : DT(DT) {}
+ bool operator()(const BasicBlock *A, const BasicBlock *B) const {
+ return DT->properlyDominates(A, B);
+ }
+};
+
void BoUpSLP::optimizeGatherSequence() {
DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
<< " gather sequences instructions.\n");
@@ -1504,45 +1703,48 @@ void BoUpSLP::optimizeGatherSequence() {
Insert->moveBefore(PreHeader->getTerminator());
}
+ // Sort blocks by domination. This ensures we visit a block after all blocks
+ // dominating it are visited.
+ SmallVector<BasicBlock *, 8> CSEWorkList(CSEBlocks.begin(), CSEBlocks.end());
+ std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), DTCmp(DT));
+
// Perform O(N^2) search over the gather sequences and merge identical
// instructions. TODO: We can further optimize this scan if we split the
// instructions into different buckets based on the insert lane.
- SmallPtrSet<Instruction*, 16> Visited;
- SmallVector<Instruction*, 16> ToRemove;
- ReversePostOrderTraversal<Function*> RPOT(F);
- for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
- E = RPOT.end(); I != E; ++I) {
+ SmallVector<Instruction *, 16> Visited;
+ for (SmallVectorImpl<BasicBlock *>::iterator I = CSEWorkList.begin(),
+ E = CSEWorkList.end();
+ I != E; ++I) {
+ assert((I == CSEWorkList.begin() || !DT->dominates(*I, *llvm::prior(I))) &&
+ "Worklist not sorted properly!");
BasicBlock *BB = *I;
- // For all instructions in the function:
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- Instruction *In = it;
- if ((!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) ||
- !GatherSeq.count(In))
+ // For all instructions in blocks containing gather sequences:
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
+ Instruction *In = it++;
+ if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
continue;
// Check if we can replace this instruction with any of the
// visited instructions.
- for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
- ve = Visited.end(); v != ve; ++v) {
+ for (SmallVectorImpl<Instruction *>::iterator v = Visited.begin(),
+ ve = Visited.end();
+ v != ve; ++v) {
if (In->isIdenticalTo(*v) &&
DT->dominates((*v)->getParent(), In->getParent())) {
In->replaceAllUsesWith(*v);
- ToRemove.push_back(In);
+ In->eraseFromParent();
In = 0;
break;
}
}
- if (In)
- Visited.insert(In);
+ if (In) {
+ assert(std::find(Visited.begin(), Visited.end(), In) == Visited.end());
+ Visited.push_back(In);
+ }
}
}
-
- // Erase all of the instructions that we RAUWed.
- for (SmallVectorImpl<Instruction *>::iterator v = ToRemove.begin(),
- ve = ToRemove.end(); v != ve; ++v) {
- assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses");
- (*v)->eraseFromParent();
- }
+ CSEBlocks.clear();
+ GatherSeq.clear();
}
/// The SLPVectorizer Pass.
@@ -1575,14 +1777,18 @@ struct SLPVectorizer : public FunctionPass {
StoreRefs.clear();
bool Changed = false;
+ // If the target claims to have no vector registers don't attempt
+ // vectorization.
+ if (!TTI->getNumberOfRegisters(true))
+ return false;
+
// Must have DataLayout. We can't require it because some tests run w/o
// triple.
if (!DL)
return false;
// Don't vectorize when the attribute NoImplicitFloat is used.
- if (F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat))
+ if (F.hasFnAttribute(Attribute::NoImplicitFloat))
return false;
DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
@@ -1661,6 +1867,21 @@ private:
StoreListMap StoreRefs;
};
+/// \brief Check that the Values in the slice in VL array are still existant in
+/// the WeakVH array.
+/// Vectorization of part of the VL array may cause later values in the VL array
+/// to become invalid. We track when this has happened in the WeakVH array.
+static bool hasValueBeenRAUWed(ArrayRef<Value *> &VL,
+ SmallVectorImpl<WeakVH> &VH,
+ unsigned SliceBegin,
+ unsigned SliceSize) {
+ for (unsigned i = SliceBegin; i < SliceBegin + SliceSize; ++i)
+ if (VH[i] != VL[i])
+ return true;
+
+ return false;
+}
+
bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
int CostThreshold, BoUpSLP &R) {
unsigned ChainLen = Chain.size();
@@ -1673,11 +1894,19 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
if (!isPowerOf2_32(Sz) || VF < 2)
return false;
+ // Keep track of values that were delete by vectorizing in the loop below.
+ SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end());
+
bool Changed = false;
// Look for profitable vectorizable trees at all offsets, starting at zero.
for (unsigned i = 0, e = ChainLen; i < e; ++i) {
if (i + VF > e)
break;
+
+ // Check that a previous iteration of this loop did not delete the Value.
+ if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
+ continue;
+
DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
<< "\n");
ArrayRef<Value *> Operands = Chain.slice(i, VF);
@@ -1697,7 +1926,7 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
}
}
- return Changed;
+ return Changed;
}
bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
@@ -1764,15 +1993,17 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
if (!SI)
continue;
+ // Don't touch volatile stores.
+ if (!SI->isSimple())
+ continue;
+
// Check that the pointer points to scalars.
Type *Ty = SI->getValueOperand()->getType();
if (Ty->isAggregateType() || Ty->isVectorTy())
return 0;
- // Find the base of the GEP.
- Value *Ptr = SI->getPointerOperand();
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
- Ptr = GEP->getPointerOperand();
+ // Find the base pointer.
+ Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL);
// Save the store locations.
StoreRefs[Ptr].push_back(SI);
@@ -1797,28 +2028,61 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
// Check that all of the parts are scalar instructions of the same type.
Instruction *I0 = dyn_cast<Instruction>(VL[0]);
if (!I0)
- return 0;
+ return false;
unsigned Opcode0 = I0->getOpcode();
+ Type *Ty0 = I0->getType();
+ unsigned Sz = DL->getTypeSizeInBits(Ty0);
+ unsigned VF = MinVecRegSize / Sz;
+
for (int i = 0, e = VL.size(); i < e; ++i) {
Type *Ty = VL[i]->getType();
if (Ty->isAggregateType() || Ty->isVectorTy())
- return 0;
+ return false;
Instruction *Inst = dyn_cast<Instruction>(VL[i]);
if (!Inst || Inst->getOpcode() != Opcode0)
- return 0;
+ return false;
}
- R.buildTree(VL);
- int Cost = R.getTreeCost();
+ bool Changed = false;
- if (Cost >= -SLPCostThreshold)
- return false;
+ // Keep track of values that were delete by vectorizing in the loop below.
+ SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
- DEBUG(dbgs() << "SLP: Vectorizing pair at cost:" << Cost << ".\n");
- R.vectorizeTree();
- return true;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ unsigned OpsWidth = 0;
+
+ if (i + VF > e)
+ OpsWidth = e - i;
+ else
+ OpsWidth = VF;
+
+ if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
+ break;
+
+ // Check that a previous iteration of this loop did not delete the Value.
+ if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
+ continue;
+
+ DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
+ << "\n");
+ ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
+
+ R.buildTree(Ops);
+ int Cost = R.getTreeCost();
+
+ if (Cost < -SLPCostThreshold) {
+ DEBUG(dbgs() << "SLP: Vectorizing pair at cost:" << Cost << ".\n");
+ R.vectorizeTree();
+
+ // Move to the next bundle.
+ i += VF - 1;
+ Changed = true;
+ }
+ }
+
+ return Changed;
}
bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
@@ -1861,30 +2125,405 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
return 0;
}
+/// \brief Generate a shuffle mask to be used in a reduction tree.
+///
+/// \param VecLen The length of the vector to be reduced.
+/// \param NumEltsToRdx The number of elements that should be reduced in the
+/// vector.
+/// \param IsPairwise Whether the reduction is a pairwise or splitting
+/// reduction. A pairwise reduction will generate a mask of
+/// <0,2,...> or <1,3,..> while a splitting reduction will generate
+/// <2,3, undef,undef> for a vector of 4 and NumElts = 2.
+/// \param IsLeft True will generate a mask of even elements, odd otherwise.
+static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
+ bool IsPairwise, bool IsLeft,
+ IRBuilder<> &Builder) {
+ assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
+
+ SmallVector<Constant *, 32> ShuffleMask(
+ VecLen, UndefValue::get(Builder.getInt32Ty()));
+
+ if (IsPairwise)
+ // Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
+ for (unsigned i = 0; i != NumEltsToRdx; ++i)
+ ShuffleMask[i] = Builder.getInt32(2 * i + !IsLeft);
+ else
+ // Move the upper half of the vector to the lower half.
+ for (unsigned i = 0; i != NumEltsToRdx; ++i)
+ ShuffleMask[i] = Builder.getInt32(NumEltsToRdx + i);
+
+ return ConstantVector::get(ShuffleMask);
+}
+
+
+/// Model horizontal reductions.
+///
+/// A horizontal reduction is a tree of reduction operations (currently add and
+/// fadd) that has operations that can be put into a vector as its leaf.
+/// For example, this tree:
+///
+/// mul mul mul mul
+/// \ / \ /
+/// + +
+/// \ /
+/// +
+/// This tree has "mul" as its reduced values and "+" as its reduction
+/// operations. A reduction might be feeding into a store or a binary operation
+/// feeding a phi.
+/// ...
+/// \ /
+/// +
+/// |
+/// phi +=
+///
+/// Or:
+/// ...
+/// \ /
+/// +
+/// |
+/// *p =
+///
+class HorizontalReduction {
+ SmallPtrSet<Value *, 16> ReductionOps;
+ SmallVector<Value *, 32> ReducedVals;
+
+ BinaryOperator *ReductionRoot;
+ PHINode *ReductionPHI;
+
+ /// The opcode of the reduction.
+ unsigned ReductionOpcode;
+ /// The opcode of the values we perform a reduction on.
+ unsigned ReducedValueOpcode;
+ /// The width of one full horizontal reduction operation.
+ unsigned ReduxWidth;
+ /// Should we model this reduction as a pairwise reduction tree or a tree that
+ /// splits the vector in halves and adds those halves.
+ bool IsPairwiseReduction;
+
+public:
+ HorizontalReduction()
+ : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0),
+ ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
+
+ /// \brief Try to find a reduction tree.
+ bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B,
+ DataLayout *DL) {
+ assert((!Phi ||
+ std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
+ "Thi phi needs to use the binary operator");
+
+ // We could have a initial reductions that is not an add.
+ // r *= v1 + v2 + v3 + v4
+ // In such a case start looking for a tree rooted in the first '+'.
+ if (Phi) {
+ if (B->getOperand(0) == Phi) {
+ Phi = 0;
+ B = dyn_cast<BinaryOperator>(B->getOperand(1));
+ } else if (B->getOperand(1) == Phi) {
+ Phi = 0;
+ B = dyn_cast<BinaryOperator>(B->getOperand(0));
+ }
+ }
+
+ if (!B)
+ return false;
+
+ Type *Ty = B->getType();
+ if (Ty->isVectorTy())
+ return false;
+
+ ReductionOpcode = B->getOpcode();
+ ReducedValueOpcode = 0;
+ ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty);
+ ReductionRoot = B;
+ ReductionPHI = Phi;
+
+ if (ReduxWidth < 4)
+ return false;
+
+ // We currently only support adds.
+ if (ReductionOpcode != Instruction::Add &&
+ ReductionOpcode != Instruction::FAdd)
+ return false;
+
+ // Post order traverse the reduction tree starting at B. We only handle true
+ // trees containing only binary operators.
+ SmallVector<std::pair<BinaryOperator *, unsigned>, 32> Stack;
+ Stack.push_back(std::make_pair(B, 0));
+ while (!Stack.empty()) {
+ BinaryOperator *TreeN = Stack.back().first;
+ unsigned EdgeToVist = Stack.back().second++;
+ bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
+
+ // Only handle trees in the current basic block.
+ if (TreeN->getParent() != B->getParent())
+ return false;
+
+ // Each tree node needs to have one user except for the ultimate
+ // reduction.
+ if (!TreeN->hasOneUse() && TreeN != B)
+ return false;
+
+ // Postorder vist.
+ if (EdgeToVist == 2 || IsReducedValue) {
+ if (IsReducedValue) {
+ // Make sure that the opcodes of the operations that we are going to
+ // reduce match.
+ if (!ReducedValueOpcode)
+ ReducedValueOpcode = TreeN->getOpcode();
+ else if (ReducedValueOpcode != TreeN->getOpcode())
+ return false;
+ ReducedVals.push_back(TreeN);
+ } else {
+ // We need to be able to reassociate the adds.
+ if (!TreeN->isAssociative())
+ return false;
+ ReductionOps.insert(TreeN);
+ }
+ // Retract.
+ Stack.pop_back();
+ continue;
+ }
+
+ // Visit left or right.
+ Value *NextV = TreeN->getOperand(EdgeToVist);
+ BinaryOperator *Next = dyn_cast<BinaryOperator>(NextV);
+ if (Next)
+ Stack.push_back(std::make_pair(Next, 0));
+ else if (NextV != Phi)
+ return false;
+ }
+ return true;
+ }
+
+ /// \brief Attempt to vectorize the tree found by
+ /// matchAssociativeReduction.
+ bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
+ if (ReducedVals.empty())
+ return false;
+
+ unsigned NumReducedVals = ReducedVals.size();
+ if (NumReducedVals < ReduxWidth)
+ return false;
+
+ Value *VectorizedTree = 0;
+ IRBuilder<> Builder(ReductionRoot);
+ FastMathFlags Unsafe;
+ Unsafe.setUnsafeAlgebra();
+ Builder.SetFastMathFlags(Unsafe);
+ unsigned i = 0;
+
+ for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
+ ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
+ V.buildTree(ValsToReduce, &ReductionOps);
+
+ // Estimate cost.
+ int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
+ if (Cost >= -SLPCostThreshold)
+ break;
+
+ DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost
+ << ". (HorRdx)\n");
+
+ // Vectorize a tree.
+ DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
+ Value *VectorizedRoot = V.vectorizeTree();
+
+ // Emit a reduction.
+ Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder);
+ if (VectorizedTree) {
+ Builder.SetCurrentDebugLocation(Loc);
+ VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
+ ReducedSubTree, "bin.rdx");
+ } else
+ VectorizedTree = ReducedSubTree;
+ }
+
+ if (VectorizedTree) {
+ // Finish the reduction.
+ for (; i < NumReducedVals; ++i) {
+ Builder.SetCurrentDebugLocation(
+ cast<Instruction>(ReducedVals[i])->getDebugLoc());
+ VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
+ ReducedVals[i]);
+ }
+ // Update users.
+ if (ReductionPHI) {
+ assert(ReductionRoot != NULL && "Need a reduction operation");
+ ReductionRoot->setOperand(0, VectorizedTree);
+ ReductionRoot->setOperand(1, ReductionPHI);
+ } else
+ ReductionRoot->replaceAllUsesWith(VectorizedTree);
+ }
+ return VectorizedTree != 0;
+ }
+
+private:
+
+ /// \brief Calcuate the cost of a reduction.
+ int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
+ Type *ScalarTy = FirstReducedVal->getType();
+ Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
+
+ int PairwiseRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, true);
+ int SplittingRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, false);
+
+ IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
+ int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
+
+ int ScalarReduxCost =
+ ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy);
+
+ DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost
+ << " for reduction that starts with " << *FirstReducedVal
+ << " (It is a "
+ << (IsPairwiseReduction ? "pairwise" : "splitting")
+ << " reduction)\n");
+
+ return VecReduxCost - ScalarReduxCost;
+ }
+
+ static Value *createBinOp(IRBuilder<> &Builder, unsigned Opcode, Value *L,
+ Value *R, const Twine &Name = "") {
+ if (Opcode == Instruction::FAdd)
+ return Builder.CreateFAdd(L, R, Name);
+ return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, L, R, Name);
+ }
+
+ /// \brief Emit a horizontal reduction of the vectorized value.
+ Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder) {
+ assert(VectorizedValue && "Need to have a vectorized tree node");
+ Instruction *ValToReduce = dyn_cast<Instruction>(VectorizedValue);
+ assert(isPowerOf2_32(ReduxWidth) &&
+ "We only handle power-of-two reductions for now");
+
+ Value *TmpVec = ValToReduce;
+ for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
+ if (IsPairwiseReduction) {
+ Value *LeftMask =
+ createRdxShuffleMask(ReduxWidth, i, true, true, Builder);
+ Value *RightMask =
+ createRdxShuffleMask(ReduxWidth, i, true, false, Builder);
+
+ Value *LeftShuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l");
+ Value *RightShuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()), (RightMask),
+ "rdx.shuf.r");
+ TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf,
+ "bin.rdx");
+ } else {
+ Value *UpperHalf =
+ createRdxShuffleMask(ReduxWidth, i, false, false, Builder);
+ Value *Shuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf");
+ TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf, "bin.rdx");
+ }
+ }
+
+ // The result is in the first element of the vector.
+ return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+ }
+};
+
+/// \brief Recognize construction of vectors like
+/// %ra = insertelement <4 x float> undef, float %s0, i32 0
+/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
+/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
+/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
+///
+/// Returns true if it matches
+///
+static bool findBuildVector(InsertElementInst *IE,
+ SmallVectorImpl<Value *> &Ops) {
+ if (!isa<UndefValue>(IE->getOperand(0)))
+ return false;
+
+ while (true) {
+ Ops.push_back(IE->getOperand(1));
+
+ if (IE->use_empty())
+ return false;
+
+ InsertElementInst *NextUse = dyn_cast<InsertElementInst>(IE->use_back());
+ if (!NextUse)
+ return true;
+
+ // If this isn't the final use, make sure the next insertelement is the only
+ // use. It's OK if the final constructed vector is used multiple times
+ if (!IE->hasOneUse())
+ return false;
+
+ IE = NextUse;
+ }
+
+ return false;
+}
+
+static bool PhiTypeSorterFunc(Value *V, Value *V2) {
+ return V->getType() < V2->getType();
+}
+
bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
SmallVector<Value *, 4> Incoming;
- // Collect the incoming values from the PHIs.
- for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
- ++instr) {
- PHINode *P = dyn_cast<PHINode>(instr);
-
- if (!P)
- break;
+ SmallSet<Value *, 16> VisitedInstrs;
+
+ bool HaveVectorizedPhiNodes = true;
+ while (HaveVectorizedPhiNodes) {
+ HaveVectorizedPhiNodes = false;
+
+ // Collect the incoming values from the PHIs.
+ Incoming.clear();
+ for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
+ ++instr) {
+ PHINode *P = dyn_cast<PHINode>(instr);
+ if (!P)
+ break;
- // Stop constructing the list when you reach a different type.
- if (Incoming.size() && P->getType() != Incoming[0]->getType()) {
- Changed |= tryToVectorizeList(Incoming, R);
- Incoming.clear();
+ if (!VisitedInstrs.count(P))
+ Incoming.push_back(P);
}
- Incoming.push_back(P);
+ // Sort by type.
+ std::stable_sort(Incoming.begin(), Incoming.end(), PhiTypeSorterFunc);
+
+ // Try to vectorize elements base on their type.
+ for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
+ E = Incoming.end();
+ IncIt != E;) {
+
+ // Look for the next elements with the same type.
+ SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
+ while (SameTypeIt != E &&
+ (*SameTypeIt)->getType() == (*IncIt)->getType()) {
+ VisitedInstrs.insert(*SameTypeIt);
+ ++SameTypeIt;
+ }
+
+ // Try to vectorize them.
+ unsigned NumElts = (SameTypeIt - IncIt);
+ DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
+ if (NumElts > 1 &&
+ tryToVectorizeList(ArrayRef<Value *>(IncIt, NumElts), R)) {
+ // Success start over because instructions might have been changed.
+ HaveVectorizedPhiNodes = true;
+ Changed = true;
+ break;
+ }
+
+ // Start over at the next instruction of a differnt type (or the end).
+ IncIt = SameTypeIt;
+ }
}
- if (Incoming.size() > 1)
- Changed |= tryToVectorizeList(Incoming, R);
+ VisitedInstrs.clear();
+
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
+ // We may go through BB multiple times so skip the one we have checked.
+ if (!VisitedInstrs.insert(it))
+ continue;
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
if (isa<DbgInfoIntrinsic>(it))
continue;
@@ -1902,24 +2541,86 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (!BI)
continue;
- Value *Inst = BI->getOperand(0);
+ // Try to match and vectorize a horizontal reduction.
+ HorizontalReduction HorRdx;
+ if (ShouldVectorizeHor &&
+ HorRdx.matchAssociativeReduction(P, BI, DL) &&
+ HorRdx.tryToReduce(R, TTI)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+
+ Value *Inst = BI->getOperand(0);
if (Inst == P)
Inst = BI->getOperand(1);
- Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
+ if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+
continue;
}
+ // Try to vectorize horizontal reductions feeding into a store.
+ if (ShouldStartVectorizeHorAtStore)
+ if (StoreInst *SI = dyn_cast<StoreInst>(it))
+ if (BinaryOperator *BinOp =
+ dyn_cast<BinaryOperator>(SI->getValueOperand())) {
+ HorizontalReduction HorRdx;
+ if (((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
+ HorRdx.tryToReduce(R, TTI)) ||
+ tryToVectorize(BinOp, R))) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+ }
+
// Try to vectorize trees that start at compare instructions.
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
- Changed |= true;
+ Changed = true;
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+
+ for (int i = 0; i < 2; ++i) {
+ if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
+ if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
+ Changed = true;
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ it = BB->begin();
+ e = BB->end();
+ }
+ }
+ }
+ continue;
+ }
+
+ // Try to vectorize trees that start at insertelement instructions.
+ if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) {
+ SmallVector<Value *, 8> Ops;
+ if (!findBuildVector(IE, Ops))
continue;
+
+ if (tryToVectorizeList(Ops, R)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
}
- for (int i = 0; i < 2; ++i)
- if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
- Changed |=
- tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
+
continue;
}
}